c - 需要展开循环宏

标签 c m4

考虑以下代码:

extern int conn_fds[MAX_CLIENTS];
extern int fl_conn_indexes[MAX_CLIENTS];
extern int fl_req_bufs[MAX_CLIENTS];
extern struct epoll_event estab_events[MAX_THREADS];
extern req_buf_t req_bufs[MAX_REQ_BUFS];
extern int fl_req_bufs_top;
extern int conn_statuses[MAX_CLIENTS];
extern int fl_conn_indexes_top;
extern  tcpl_nc_t nc_http_list;
extern struct sockaddr_in conn_addresses[MAX_CLIENTS];

void accept_connections(unsigned int num_conns) {
    int fds[MAX_THREADS];
    int conn_indexes[MAX_THREADS];
    int conn_idx=0;
    int new_bottom;
    socklen_t slenghts[MAX_THREADS];
    void *labels1[MAX_THREADS] = {&&a0,&&a1,&&a2,&&a3,&&a4,&&a5,&&a6,&&a7,&&a8,&&a9,&&a10,&&a11,&&a12,&&a13,&&a14,&&a15};
    void *labels2[MAX_THREADS] = {&&b0,&&b1,&&b2,&&b3,&&b4,&&b5,&&b6,&&b7,&&b8,&&b9,&&b10,&&b11,&&b12,&&b13,&&b14,&&b15};
    void *labels3[MAX_THREADS] = {&&c0,&&c1,&&c2,&&c3,&&c4,&&c5,&&c6,&&c7,&&c8,&&c9,&&c10,&&c11,&&c12,&&c13,&&c14,&&c15};

    new_bottom=fl_conn_indexes_top-num_conns;
    if (new_bottom<=0) return;

    goto *labels1[num_conns];
 a15:
    conn_indexes[MAX_THREADS- 1]=fl_conn_indexes[fl_conn_indexes_top- 1];
 a14:
    conn_indexes[MAX_THREADS- 2]=fl_conn_indexes[fl_conn_indexes_top- 2];
 a13:
    conn_indexes[MAX_THREADS- 3]=fl_conn_indexes[fl_conn_indexes_top- 3];
 a12:
    conn_indexes[MAX_THREADS- 4]=fl_conn_indexes[fl_conn_indexes_top- 4];
 a11:
    conn_indexes[MAX_THREADS- 5]=fl_conn_indexes[fl_conn_indexes_top- 5];
 a10:
    conn_indexes[MAX_THREADS- 6]=fl_conn_indexes[fl_conn_indexes_top- 6];
 a9:
    conn_indexes[MAX_THREADS- 7]=fl_conn_indexes[fl_conn_indexes_top- 7];
 a8:
    conn_indexes[MAX_THREADS- 8]=fl_conn_indexes[fl_conn_indexes_top- 8];
 a7:
    conn_indexes[MAX_THREADS- 9]=fl_conn_indexes[fl_conn_indexes_top- 9];
 a6:
    conn_indexes[MAX_THREADS-10]=fl_conn_indexes[fl_conn_indexes_top-10];
 a5:
    conn_indexes[MAX_THREADS-11]=fl_conn_indexes[fl_conn_indexes_top-11];
 a4:
    conn_indexes[MAX_THREADS-12]=fl_conn_indexes[fl_conn_indexes_top-12];
 a3:
    conn_indexes[MAX_THREADS-13]=fl_conn_indexes[fl_conn_indexes_top-13];
 a2:
    conn_indexes[MAX_THREADS-14]=fl_conn_indexes[fl_conn_indexes_top-14];
 a1:
    conn_indexes[MAX_THREADS-15]=fl_conn_indexes[fl_conn_indexes_top-15];
 a0:
    conn_indexes[MAX_THREADS-16]=fl_conn_indexes[fl_conn_indexes_top-16];

    fl_conn_indexes_top=fl_conn_indexes_top-num_conns;
    goto *labels2[num_conns];
 b15:
    conn_fds[conn_indexes[MAX_THREADS- 1]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 1]],&slenghts[MAX_THREADS- 1]);
 b14:
    conn_fds[conn_indexes[MAX_THREADS- 2]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 2]],&slenghts[MAX_THREADS- 2]);
 b13:
    conn_fds[conn_indexes[MAX_THREADS- 3]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 3]],&slenghts[MAX_THREADS- 3]);
 b12:
    conn_fds[conn_indexes[MAX_THREADS- 4]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 4]],&slenghts[MAX_THREADS- 4]);
 b11:
    conn_fds[conn_indexes[MAX_THREADS- 5]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 5]],&slenghts[MAX_THREADS- 5]);
 b10:
    conn_fds[conn_indexes[MAX_THREADS- 6]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 6]],&slenghts[MAX_THREADS- 6]);
 b9:
    conn_fds[conn_indexes[MAX_THREADS- 7]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 7]],&slenghts[MAX_THREADS- 7]);
 b8:
    conn_fds[conn_indexes[MAX_THREADS- 8]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 8]],&slenghts[MAX_THREADS- 8]);
 b7:
    conn_fds[conn_indexes[MAX_THREADS- 9]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 9]],&slenghts[MAX_THREADS- 9]);
 b6:
    conn_fds[conn_indexes[MAX_THREADS-10]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-10]],&slenghts[MAX_THREADS-10]);
 b5:
    conn_fds[conn_indexes[MAX_THREADS-11]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-11]],&slenghts[MAX_THREADS-11]);
 b4:
    conn_fds[conn_indexes[MAX_THREADS-12]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-12]],&slenghts[MAX_THREADS-12]);
 b3:
    conn_fds[conn_indexes[MAX_THREADS-13]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-13]],&slenghts[MAX_THREADS-13]);
 b2:
    conn_fds[conn_indexes[MAX_THREADS-14]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-14]],&slenghts[MAX_THREADS-14]);
 b1:
    conn_fds[conn_indexes[MAX_THREADS-15]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-15]],&slenghts[MAX_THREADS-15]);
 b0:
    conn_fds[conn_indexes[MAX_THREADS-16]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-16]],&slenghts[MAX_THREADS-16]);

    goto *labels3[num_conns];
 c15:
    conn_statuses[conn_indexes[MAX_THREADS- 1]]=CONN_STATUS_READING;
 c14:
    conn_statuses[conn_indexes[MAX_THREADS- 2]]=CONN_STATUS_READING;
 c13:
    conn_statuses[conn_indexes[MAX_THREADS- 3]]=CONN_STATUS_READING;
 c12:
    conn_statuses[conn_indexes[MAX_THREADS- 4]]=CONN_STATUS_READING;
 c11:
    conn_statuses[conn_indexes[MAX_THREADS- 5]]=CONN_STATUS_READING;
 c10:
    conn_statuses[conn_indexes[MAX_THREADS- 6]]=CONN_STATUS_READING;
 c9:
    conn_statuses[conn_indexes[MAX_THREADS- 7]]=CONN_STATUS_READING;
 c8:
    conn_statuses[conn_indexes[MAX_THREADS- 8]]=CONN_STATUS_READING;
 c7:
    conn_statuses[conn_indexes[MAX_THREADS- 9]]=CONN_STATUS_READING;
 c6:
    conn_statuses[conn_indexes[MAX_THREADS-10]]=CONN_STATUS_READING;
 c5:
    conn_statuses[conn_indexes[MAX_THREADS-11]]=CONN_STATUS_READING;
 c4:
    conn_statuses[conn_indexes[MAX_THREADS-12]]=CONN_STATUS_READING;
 c3:
    conn_statuses[conn_indexes[MAX_THREADS-13]]=CONN_STATUS_READING;
 c2:
    conn_statuses[conn_indexes[MAX_THREADS-14]]=CONN_STATUS_READING;
 c1:
    conn_statuses[conn_indexes[MAX_THREADS-15]]=CONN_STATUS_READING;
 c0:
    conn_statuses[conn_indexes[MAX_THREADS-16]]=CONN_STATUS_READING;

}

我在这里所做的是并行处理 MAX_THREADS 个连接数。循环是为了速度而故意展开的。但是代码很长。我需要使用宏来减少它,有点像这样:

extern int conn_fds[MAX_CLIENTS];
extern int fl_conn_indexes[MAX_CLIENTS];
extern int fl_req_bufs[MAX_CLIENTS];
extern struct epoll_event estab_events[MAX_THREADS];
extern req_buf_t req_bufs[MAX_REQ_BUFS];
extern int fl_req_bufs_top;
extern int conn_statuses[MAX_CLIENTS];
extern int fl_conn_indexes_top;
extern  tcpl_nc_t nc_http_list;
extern struct sockaddr_in conn_addresses[MAX_CLIENTS];

void accept_connections(unsigned int num_conns) {
    int fds[MAX_THREADS];
    int conn_indexes[MAX_THREADS];
    int conn_idx=0;
    int new_bottom;
    socklen_t slenghts[MAX_THREADS];
    void *labels1[MAX_THREADS] = {&&a0,&&a1,&&a2,&&a3,&&a4,&&a5,&&a6,&&a7,&&a8,&&a9,&&a10,&&a11,&&a12,&&a13,&&a14,&&a15};
    void *labels2[MAX_THREADS] = {&&b0,&&b1,&&b2,&&b3,&&b4,&&b5,&&b6,&&b7,&&b8,&&b9,&&b10,&&b11,&&b12,&&b13,&&b14,&&b15};
    void *labels3[MAX_THREADS] = {&&c0,&&c1,&&c2,&&c3,&&c4,&&c5,&&c6,&&c7,&&c8,&&c9,&&c10,&&c11,&&c12,&&c13,&&c14,&&c15};

    new_bottom=fl_conn_indexes_top-num_conns;
    if (new_bottom<=0) return;

    goto *labels1[num_conns];

LOOP(1,MAX_THREADS) {
 a{ITERATOR}:
    conn_indexes[MAX_THREADS- {ITERATOR}]=fl_conn_indexes[fl_conn_indexes_top- {ITERATOR}];
}
    fl_conn_indexes_top=fl_conn_indexes_top-num_conns;
    goto *labels2[num_conns];
LOOP(1,MAX_THREADS) {      
 b{ITERATOR}:
    conn_fds[conn_indexes[MAX_THREADS- {ITERATOR}]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 1{ITERATOR}]],&slenghts[MAX_THREADS- {ITERATOR}]);
}
    goto *labels3[num_conns];
LOOP(1,MAX_THREADS) {
 c{ITERATOR}:
    conn_statuses[conn_indexes[MAX_THREADS- {ITERATOR}]]=CONN_STATUS_READING;
}

}

{ITERATOR} 在 LOOP() 中从 1 到 MAX_THREADS 标签是必需的,因为连接数可以低于 MAX_THREADS,我必须跳过空变量,否则我会得到段错误。

用GCC的预处理器能轻松实现吗?如果没有,也许我可以使用 m4,但如何将 m4 与 GCC 集成以进行舒适的开发。因为,我将在源代码中的许多其他文件中经常编辑这些宏,并且需要易于编码。

非常感谢您的帮助

最佳答案

如果您确实需要 m4 中的答案:

dnl usage: unroll(var, initial, max, code)
dnl e.g. unroll(`i', 0, 10, ``total += arr[i]'')
dnl The above goes from 0 to 9, it does not include the final 10.
define(`unroll',
       `define(`$1', `$2')ifelse(eval($2 < $3),
                                 1,
                                 `$4`'unroll(`$1',incr($2),$3,`$4')',
                                 `undefine(`$1')')')dnl

void accept_connections(unsigned int num_conns) {
    int fds[MAX_THREADS];
    int conn_indexes[MAX_THREADS];
    int conn_idx=0;
    int new_bottom;
    socklen_t slenghts[MAX_THREADS];
    void *labels1[MAX_THREADS] = {unroll(`i', 0, 16, `&&a`'i,')};
    void *labels2[MAX_THREADS] = {unroll(`i', 0, 16, `&&b`'i,')};
    void *labels3[MAX_THREADS] = {unroll(`i', 0, 16, `&&c`'i,')};

    new_bottom=fl_conn_indexes_top-num_conns;
    if (new_bottom<=0) return;

    define(`MAX_NUM_CONNS', 16)
    goto *labels1[num_conns];
unroll(`count', 1, incr(MAX_NUM_CONNS()), `dnl
 a`'eval(MAX_NUM_CONNS() - count):
    conn_indexes[MAX_THREADS - count]=fl_conn_indexes[fl_conn_indexes_top - count];
')dnl

    fl_conn_indexes_top=fl_conn_indexes_top-num_conns;
    goto *labels2[num_conns];
unroll(`count', 1, incr(MAX_NUM_CONNS()), `dnl
define(`index', `MAX_THREADS - count')dnl
 b`'eval(MAX_NUM_CONNS() - count):
    conn_fds[conn_indexes[index]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[index]],&slenghts[index]);
')dnl
undefine(`index')

    goto *labels3[num_conns];
unroll(`count', 1, incr(MAX_NUM_CONNS()), `dnl
 c`'eval(MAX_NUM_CONNS() - count):
    conn_statuses[conn_indexes[MAX_THREADS - count]]=CONN_STATUS_READING;
')dnl

}

请特别注意代码中的宏:defineunrollevalMAX_NUM_CONNS计数索引i。还要注意 m4 喜欢的古怪引用。

我可以对此提出改进建议(无论您是否使用 m4)。您在这里使用的 goto 的用例是一个更知名的控制结构:switch(因为 fallthrough 是没有 break 的默认行为) >):

    define(`MAX_NUM_CONNS', 16)
    switch (num_conns) {
unroll(`count', 1, incr(MAX_NUM_CONNS()), `dnl
    case eval(MAX_NUM_CONNS() - count):
        conn_indexes[MAX_THREADS - count]=fl_conn_indexes[fl_conn_indexes_top - count];
')dnl
    }

    fl_conn_indexes_top=fl_conn_indexes_top-num_conns;
    switch (num_conns) {
unroll(`count', 1, incr(MAX_NUM_CONNS()), `dnl
define(`index', `MAX_THREADS - count')dnl
    case eval(MAX_NUM_CONNS() - count):
        conn_fds[conn_indexes[index]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[index]],&slenghts[index]);
')dnl
    }
undefine(`index')

    switch (num_conns) {
unroll(`count', 1, incr(MAX_NUM_CONNS()), `dnl
    case eval(MAX_NUM_CONNS() - count):
        conn_statuses[conn_indexes[MAX_THREADS - count]]=CONN_STATUS_READING;
')dnl
    }

如果将此转换转换为开关,则可以删除标签数组。

最后,也许它可以是一个开关:

    define(`MAX_NUM_CONNS', 16)
    switch (num_conns) {
unroll(`count', 1, incr(MAX_NUM_CONNS()), `dnl
define(`index', `MAX_THREADS - count')dnl
    case eval(MAX_NUM_CONNS() - count):
        conn_indexes[index]=fl_conn_indexes[fl_conn_indexes_top - count];
        conn_fds[conn_indexes[index]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[index]],&slenghts[index]);
        conn_statuses[conn_indexes[index]]=CONN_STATUS_READING;
')dnl
    }
undefine(`index')
    fl_conn_indexes_top=fl_conn_indexes_top-num_conns;

请注意,我已将 fl_conn_indexes_top 修改移动到整个展开的循环完成之后 - 我不确定这样做是否有效,因为这取决于何时读取该变量从或写到。您可以在那里做出判断。


正如我在评论中提到的,这也可以通过编译器标志来完成。 GCC 支持 -funroll-loops 选项,它在编译器级别为您执行此操作。 GCC 还支持 -fprofile-use,它(当从 -fprofile-generate 提供配置文件时)将展开循环,这被证明会导致性能问题。


最后:

我认为这都是过早的优化。当每次迭代调用 accept 时,我强烈怀疑循环的比较检查是瓶颈!

关于c - 需要展开循环宏,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34824118/

相关文章:

c++ - 错误输出包括最后几行代码

c++ - Flex和Bison生成C++头文件时如何使用m4?

makefile - 在 Makefile.am 中使用 pkg-config 变量的规范方法

bash - 从 m4 中的 esyscmd 中删除尾随换行符

c - 为什么 Frama-C 的这个使用 scanf() 的程序的依赖图看起来像这样?

c - 在 C 中传递和返回数组

c - 从用户空间访问内核内存(task_struct)

c - 如何将未终止的字符数组输出到标准输出

macros - m4 扩展里面的宏

c - cuda程序内存泄漏