我有一个程序,它从一个文件中读取域名列表。它执行异步 DNS,然后使用异步 epoll 循环下载每个域的登录页面。
该程序在数千次迭代中运行良好,然后因 *** buffer溢出检测到***:终止
错误而崩溃。这是回溯:
Program received signal SIGABRT, Aborted.
__pthread_kill_implementation (no_tid=0, signo=6, threadid=140737351415616) at pthread_kill.c:44
44 pthread_kill.c: No such file or directory.
(gdb) bt
#0 __pthread_kill_implementation (no_tid=0, signo=6, threadid=140737351415616) at pthread_kill.c:44
#1 __pthread_kill_internal (signo=6, threadid=140737351415616) at pthread_kill.c:80
#2 __GI___pthread_kill (threadid=140737351415616, signo=signo@entry=6) at pthread_kill.c:91
#3 0x00007ffff7db0476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#4 0x00007ffff7d967b7 in __GI_abort () at abort.c:79
#5 0x00007ffff7df75e6 in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7ffff7f48ef4 "*** %s ***: terminated\n") at ../sysdeps/posix/libc_fatal.c:155
#6 0x00007ffff7ea322a in __GI___fortify_fail (msg=msg@entry=0x7ffff7f48e9a "buffer overflow detected") at fortify_fail.c:26
#7 0x00007ffff7ea1b46 in __GI___chk_fail () at chk_fail.c:28
#8 0x00007ffff7ea316b in __fdelt_chk (d=<optimised out>) at fdelt_chk.c:25
#9 0x00007ffff7f97362 in ares_fds () from /lib/x86_64-linux-gnu/libcares.so.2
#10 0x000055555555682d in wait_ares (channel=0x555556bb32a0) at epoll_recv_with_async_dns.c:80
#11 0x000055555555773c in main (argc=2, argv=0x7fffffffe0a8) at epoll_recv_with_async_dns.c:303
正如您所看到的,回溯指向对 ares_fds
的调用。有问题的代码行是:
nfds = ares_fds(channel, &read_fds, &write_fds);
我看不出该行代码中如何存在缓冲区溢出。我可以做些什么来进一步调试并找到并解决问题。对于那些感兴趣的人,最小的复制器如下:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/socket.h>
#include <resolv.h>
#include <sys/epoll.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <time.h>
#include <ares.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#define MAXWAITING 1000 /* Max. number of parallel DNS queries */
#define MAXTRIES 3 /* Max. number of tries per domain */
#define DNSTIMEOUT 3000 /* Max. number of ms for first try */
#define SERVERS "1.0.0.1,8.8.8.8" /* DNS server to use (Cloudflare & Google) */
#define MAXDOMAINS 8192
#define MAX_CONNECTIONS 8192
#define TIMEOUT 10000
int epfd;
int sockfd[MAX_CONNECTIONS];
struct epoll_event event[MAX_CONNECTIONS];
struct sockaddr_in dest[MAX_CONNECTIONS];
char resolved[MAXDOMAINS][254];
char ips[MAXDOMAINS][128];
int current = 0, active = 0, next = 0;
char servers[MAX_CONNECTIONS][128];
char domains[MAX_CONNECTIONS][254];
int i, num_ready, connections = 0, done = 0, total_bytes = 0, total_domains = 0, iterations = 0, count = 0;
static int nwaiting;
static void state_cb(void *data, int s, int read, int write)
{
//printf("Change state fd %d read:%d write:%d\n", s, read, write);
}
static void callback(void *arg, int status, int timeouts, struct hostent *host)
{
nwaiting--;
if(!host || status != ARES_SUCCESS){
//fprintf(stderr, "Failed to lookup %s\n", ares_strerror(status));
return;
}
char ip[INET6_ADDRSTRLEN];
if (host->h_addr_list[0] != NULL){
inet_ntop(host->h_addrtype, host->h_addr_list[0], ip, sizeof(ip));
strcpy(resolved[current], host->h_name);
strcpy(ips[current], ip);
if (current < MAXDOMAINS - 1) current++; else current = 0;
active++;
printf("active %d\r", active);
}
}
static void wait_ares(ares_channel channel)
{
struct timeval *tvp, tv;
fd_set read_fds, write_fds;
int nfds = 0;
FD_ZERO(&read_fds);
FD_ZERO(&write_fds);
nfds = ares_fds(channel, &read_fds, &write_fds);
if (nfds > 0) {
tvp = ares_timeout(channel, NULL, &tv);
select(nfds, &read_fds, &write_fds, NULL, tvp);
ares_process(channel, &read_fds, &write_fds);
}
}
int main(int argc, char *argv[]) {
sigaction(SIGPIPE, &(struct sigaction){SIG_IGN}, NULL);
FILE * fp;
char domain[128];
size_t len = 0;
ssize_t read;
ares_channel channel;
int status, dns_done = 0;
int optmask;
status = ares_library_init(ARES_LIB_INIT_ALL);
if (status != ARES_SUCCESS) {
printf("ares_library_init: %s\n", ares_strerror(status));
return 1;
}
struct ares_options options = {
.timeout = DNSTIMEOUT, /* set first query timeout */
.tries = MAXTRIES /* set max. number of tries */
};
optmask = ARES_OPT_TIMEOUTMS | ARES_OPT_TRIES;
status = ares_init_options(&channel, &options, optmask);
if (status != ARES_SUCCESS) {
printf("ares_init_options: %s\n", ares_strerror(status));
return 1;
}
status = ares_set_servers_csv(channel, SERVERS);
if (status != ARES_SUCCESS) {
printf("ares_set_servers_csv: %s\n", ares_strerror(status));
return 1;
}
fp = fopen(argv[1], "r");
if (!fp)
exit(EXIT_FAILURE);
do{
if (nwaiting >= MAXWAITING || dns_done) {
do {
wait_ares(channel);
} while (nwaiting > MAXWAITING);
}
if (!dns_done) {
if (fscanf(fp, "%128s", domain) == 1) {
ares_gethostbyname(channel, domain, AF_INET, callback, NULL);
nwaiting++;
} else {
dns_done = 1;
}
}
} while (active < MAX_CONNECTIONS);
/*---Open sockets for streaming---*/
for (i = 0; i < MAX_CONNECTIONS; i++)
{
if ( (sockfd[i] = socket(AF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0)) < 0 ) {
perror("Socket");
exit(errno);
}
count++;
}
while (1)
{
/*---Do async DNS---*/
while (/*active < MAXDOMAINS &&*/ nwaiting > 0) {
//printf("active = %d MAXDOMAINS = %d nwaiting = %d MAXWAITING = %d\n", active, MAXDOMAINS, nwaiting, MAXWAITING);
if (nwaiting >= MAXWAITING || dns_done) {
do {
wait_ares(channel);
} while (nwaiting > MAXWAITING);
}
if (!dns_done) {
if (fscanf(fp, "%127s", domain) == 1) {
ares_gethostbyname(channel, domain, AF_INET, callback, NULL);
nwaiting++;
} else {
dns_done = 1;
}
}
} //while (active < MAXDOMAINS);
if (done && count == 0) break;
}
ares_destroy(channel);
ares_library_cleanup();
fclose(fp);
printf("\nFinished without errors\n");
return 0;
}
如果我注释掉创建套接字的部分,则不会发生中止:
/*---Open sockets for streaming---*/
for (i = 0; i < MAX_CONNECTIONS; i++)
{
if ( (sockfd[i] = socket(AF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0)) < 0 ) {
perror("Socket");
exit(errno);
}
count++;
}
所以无论问题是什么,它都与我有许多套接字文件描述符这一事实有关。有什么想法吗?
进一步编辑:
进一步调试似乎表明问题与打开的套接字数量有关。如果我将创建的套接字数量减少到 1017,则不再发生中止。而如果我创建 1018 个套接字,程序就会中止。
最佳答案
看起来这可能是根本原因:
https://c-ares.org/mail/c-ares-archive-2017-08/0002.shtml
>>> The stack trace is shown as above. >>> >>> /(gdb) bt/ >>> /#0 0x00007f959c01ac37 in __GI_raise (sig=sig_at_entry=6) at >>> ../nptl/sysdeps/unix/sysv/linux/raise.c:56/ >>> /#1 0x00007f959c01e028 in __GI_abort () at abort.c:89/ >>> /#2 0x00007f959c0572a4 in __libc_message >>> (do_abort=do_abort_at_entry=2, fmt=fmt_at_entry=0x7f959c166d70 "*** %s >>> ***: %s terminated\n")/ >>> / at ../sysdeps/posix/libc_fatal.c:175/ >>> /#3 0x00007f959c0f283c in __GI___fortify_fail (msg=<optimized out>, >>> msg_at_entry=0x7f959c166d07 "buffer overflow detected") at >>> fortify_fail.c:38/ >>> /#4 0x00007f959c0f1710 in __GI___chk_fail () at chk_fail.c:28/ >>> /#5 0x00007f959c0f2787 in __fdelt_chk (d=<optimized out>) at >>> fdelt_chk.c:25/ >>> /#6 0x00007f959c6b69ad in ares_fds () from >>> /usr/local/multiplier/system/libs/libcares.so.2/ >>> /#7 0x000000000040b448 in rec_c_ares_execute () at >>> /home/necs/dev/apat/source/recorder/recdns.c:157/ >>> /#8 0x00000000004052f2 in rec_main_thread (data=0x0) at >>> /home/necs/dev/apat/source/recorder/rec.c:772/ >>> /#9 0x0000000000403de1 in main (argc=7, argv=0x7fff58cde398) at >>> /home/necs/dev/apat/source/recorder/main.c:129/ >> ...
block 引用>You are either crossing FD_SETSIZE limit, or have negative number of fds. Glibc checks this internally and causes crash if check will fail: https://github.com/lattera/glibc/blob/master/debug/fdelt_chk.c
Daniel Received on 2017-08-01
由于我不确定您所在的平台,因此除了跟踪先前 nfds(立即在失败之前返回值)。
关于c - 这个神秘的***检测到缓冲区溢出的来源是什么***: terminated error,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/70570379/