c - 这个神秘的***检测到缓冲区溢出的来源是什么***: terminated error

标签 c c-ares

我有一个程序,它从一个文件中读取域名列表。它执行异步 DNS,然后使用异步 epoll 循环下载每个域的登录页面。

该程序在数千次迭代中运行良好,然后因 *** buffer溢出检测到***:终止 错误而崩溃。这是回溯:

Program received signal SIGABRT, Aborted.
__pthread_kill_implementation (no_tid=0, signo=6, threadid=140737351415616) at pthread_kill.c:44
44  pthread_kill.c: No such file or directory.
(gdb) bt
#0  __pthread_kill_implementation (no_tid=0, signo=6, threadid=140737351415616) at pthread_kill.c:44
#1  __pthread_kill_internal (signo=6, threadid=140737351415616) at pthread_kill.c:80
#2  __GI___pthread_kill (threadid=140737351415616, signo=signo@entry=6) at pthread_kill.c:91
#3  0x00007ffff7db0476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#4  0x00007ffff7d967b7 in __GI_abort () at abort.c:79
#5  0x00007ffff7df75e6 in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7ffff7f48ef4 "*** %s ***: terminated\n") at ../sysdeps/posix/libc_fatal.c:155
#6  0x00007ffff7ea322a in __GI___fortify_fail (msg=msg@entry=0x7ffff7f48e9a "buffer overflow detected") at fortify_fail.c:26
#7  0x00007ffff7ea1b46 in __GI___chk_fail () at chk_fail.c:28
#8  0x00007ffff7ea316b in __fdelt_chk (d=<optimised out>) at fdelt_chk.c:25
#9  0x00007ffff7f97362 in ares_fds () from /lib/x86_64-linux-gnu/libcares.so.2
#10 0x000055555555682d in wait_ares (channel=0x555556bb32a0) at epoll_recv_with_async_dns.c:80
#11 0x000055555555773c in main (argc=2, argv=0x7fffffffe0a8) at epoll_recv_with_async_dns.c:303

正如您所看到的,回溯指向对 ares_fds 的调用。有问题的代码行是:

nfds = ares_fds(channel, &read_fds, &write_fds);

我看不出该行代码中如何存在缓冲区溢出。我可以做些什么来进一步调试并找到并解决问题。对于那些感兴趣的人,最小的复制器如下:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/socket.h>
#include <resolv.h>
#include <sys/epoll.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <time.h>
#include <ares.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>

#define MAXWAITING 1000 /* Max. number of parallel DNS queries */
#define MAXTRIES      3 /* Max. number of tries per domain */
#define DNSTIMEOUT    3000 /* Max. number of ms for first try */
#define SERVERS    "1.0.0.1,8.8.8.8" /* DNS server to use (Cloudflare & Google) */
#define MAXDOMAINS 8192
#define MAX_CONNECTIONS 8192
#define TIMEOUT 10000
int epfd;
int sockfd[MAX_CONNECTIONS];
struct epoll_event event[MAX_CONNECTIONS];
struct sockaddr_in dest[MAX_CONNECTIONS];
char resolved[MAXDOMAINS][254];
char ips[MAXDOMAINS][128];
int current = 0, active = 0, next = 0;
char servers[MAX_CONNECTIONS][128];
char domains[MAX_CONNECTIONS][254];
int i, num_ready, connections = 0, done = 0, total_bytes = 0, total_domains = 0, iterations = 0, count = 0;
static int nwaiting;

static void state_cb(void *data, int s, int read, int write)
{
    //printf("Change state fd %d read:%d write:%d\n", s, read, write);
}

static void callback(void *arg, int status, int timeouts, struct hostent *host)
{
    nwaiting--;

    if(!host || status != ARES_SUCCESS){
        //fprintf(stderr, "Failed to lookup %s\n", ares_strerror(status));
        return;
    }

    char ip[INET6_ADDRSTRLEN];

    if (host->h_addr_list[0] != NULL){
        inet_ntop(host->h_addrtype, host->h_addr_list[0], ip, sizeof(ip));
        strcpy(resolved[current], host->h_name);
        strcpy(ips[current], ip);
        if (current < MAXDOMAINS - 1) current++; else current = 0;
        active++;
        printf("active %d\r", active);
    }
}

static void wait_ares(ares_channel channel)
{
    struct timeval *tvp, tv;
    fd_set read_fds, write_fds;
    int nfds = 0;

    FD_ZERO(&read_fds);
    FD_ZERO(&write_fds);

    nfds = ares_fds(channel, &read_fds, &write_fds);
    
    if (nfds > 0) {
    tvp = ares_timeout(channel, NULL, &tv);
        select(nfds, &read_fds, &write_fds, NULL, tvp);
        ares_process(channel, &read_fds, &write_fds);
    }     
}
                
int main(int argc, char *argv[]) {
        
    sigaction(SIGPIPE, &(struct sigaction){SIG_IGN}, NULL);
    FILE * fp;
    char domain[128];
    size_t len = 0;
    ssize_t read;
    ares_channel channel;
    int status, dns_done = 0;
    int optmask;
    
    status = ares_library_init(ARES_LIB_INIT_ALL);
    if (status != ARES_SUCCESS) {
        printf("ares_library_init: %s\n", ares_strerror(status));
        return 1;
    }

    struct ares_options options = {
        .timeout = DNSTIMEOUT,     /* set first query timeout */
        .tries = MAXTRIES       /* set max. number of tries */
    };
    optmask = ARES_OPT_TIMEOUTMS | ARES_OPT_TRIES;

    status = ares_init_options(&channel, &options, optmask);
    if (status != ARES_SUCCESS) {
        printf("ares_init_options: %s\n", ares_strerror(status));
        return 1;
    }

    status = ares_set_servers_csv(channel, SERVERS);
    if (status != ARES_SUCCESS) {
        printf("ares_set_servers_csv: %s\n", ares_strerror(status));
        return 1;
    }
    
    fp = fopen(argv[1], "r");
    if (!fp)
        exit(EXIT_FAILURE);

    do{
        if (nwaiting >= MAXWAITING || dns_done) {
            do {
                wait_ares(channel);
                
            } while (nwaiting > MAXWAITING);
        }
        if (!dns_done) {
            if (fscanf(fp, "%128s", domain) == 1) {
                ares_gethostbyname(channel, domain, AF_INET, callback, NULL);
                nwaiting++;
            } else {
                dns_done = 1;
            }
        }
    } while (active < MAX_CONNECTIONS);
    
    /*---Open sockets for streaming---*/
    for (i = 0; i < MAX_CONNECTIONS; i++)
    { 
        if ( (sockfd[i] = socket(AF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0)) < 0 ) {
            perror("Socket");
            exit(errno);
        }
        count++;
    }

    while (1)
    {
        /*---Do async DNS---*/
        while (/*active < MAXDOMAINS &&*/ nwaiting > 0) {
            //printf("active = %d MAXDOMAINS = %d nwaiting = %d MAXWAITING = %d\n", active, MAXDOMAINS, nwaiting, MAXWAITING);
            if (nwaiting >= MAXWAITING || dns_done) {
                do {
                    wait_ares(channel);
                } while (nwaiting > MAXWAITING);
            }
            if (!dns_done) {
                if (fscanf(fp, "%127s", domain) == 1) {
                    ares_gethostbyname(channel, domain, AF_INET, callback, NULL);
                    nwaiting++;
                } else {
                    dns_done = 1;
                }
            }
        } //while (active < MAXDOMAINS);
        
        if (done && count == 0) break;
    }
    ares_destroy(channel);
    ares_library_cleanup();
    fclose(fp);
    printf("\nFinished without errors\n");
    return 0;
}

如果我注释掉创建套接字的部分,则不会发生中止:

 /*---Open sockets for streaming---*/
    for (i = 0; i < MAX_CONNECTIONS; i++)
    { 
        if ( (sockfd[i] = socket(AF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0)) < 0 ) {
            perror("Socket");
            exit(errno);
        }
        count++;
    }

所以无论问题是什么,它都与我有许多套接字文件描述符这一事实有关。有什么想法吗?

进一步编辑:

进一步调试似乎表明问题与打开的套接字数量有关。如果我将创建的套接字数量减少到 1017,则不再发生中止。而如果我创建 1018 个套接字,程序就会中止。

最佳答案

看起来这可能是根本原因:

https://c-ares.org/mail/c-ares-archive-2017-08/0002.shtml

>>> The stack trace is shown as above.
>>>
>>> /(gdb) bt/
>>> /#0 0x00007f959c01ac37 in __GI_raise (sig=sig_at_entry=6) at
>>> ../nptl/sysdeps/unix/sysv/linux/raise.c:56/
>>> /#1 0x00007f959c01e028 in __GI_abort () at abort.c:89/
>>> /#2 0x00007f959c0572a4 in __libc_message
>>> (do_abort=do_abort_at_entry=2, fmt=fmt_at_entry=0x7f959c166d70 "*** %s
>>> ***: %s terminated\n")/
>>> / at ../sysdeps/posix/libc_fatal.c:175/
>>> /#3 0x00007f959c0f283c in __GI___fortify_fail (msg=<optimized out>,
>>> msg_at_entry=0x7f959c166d07 "buffer overflow detected") at
>>> fortify_fail.c:38/
>>> /#4 0x00007f959c0f1710 in __GI___chk_fail () at chk_fail.c:28/
>>> /#5 0x00007f959c0f2787 in __fdelt_chk (d=<optimized out>) at
>>> fdelt_chk.c:25/
>>> /#6 0x00007f959c6b69ad in ares_fds () from
>>> /usr/local/multiplier/system/libs/libcares.so.2/
>>> /#7 0x000000000040b448 in rec_c_ares_execute () at
>>> /home/necs/dev/apat/source/recorder/recdns.c:157/
>>> /#8 0x00000000004052f2 in rec_main_thread (data=0x0) at
>>> /home/necs/dev/apat/source/recorder/rec.c:772/
>>> /#9 0x0000000000403de1 in main (argc=7, argv=0x7fff58cde398) at
>>> /home/necs/dev/apat/source/recorder/main.c:129/
>> ...

You are either crossing FD_SETSIZE limit, or have negative number of fds. Glibc checks this internally and causes crash if check will fail: https://github.com/lattera/glibc/blob/master/debug/fdelt_chk.c

Daniel Received on 2017-08-01

由于我不确定您所在的平台,因此除了跟踪先前 nfds(立即失败之前返回值)。

关于c - 这个神秘的***检测到缓冲区溢出的来源是什么***: terminated error,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/70570379/

相关文章:

c - 如何将指针指向的 textf 的内容复制到 char 数组(即指针 -> char 数组)

php - 如何启用 curl 的 AsynchDNS?

C-ares 获取 ns 记录

c - 如何根据使用的选项更改大小写

c - 为什么这段代码不计算负数的立方根?

c++ - 编码时如何获取我的自定义函数执行地址?

c++ - 多边形轮廓上的边并不总是正确的