我是 C 编程和 libcurl 的初学者,正在编写一个程序从网站获取 1000 个数据值。该网站提供了职位编号,并重定向到另一个页面以获取结果。由于我编写的代码几乎有 500 行,因此我给出了程序的一般流程和一段简短的代码,我认为这是有问题的区域:
for(row=0;row<1000;row++)
{
------
url = "http://example.com";
curl_global_init(CURL_GLOBAL_ALL);
curlHandle = curl_easy_init();
if(curlHandle)
{
curl_easy_setopt(curlHandle, CURLOPT_TIMEOUT, 1800);
curl_easy_setopt(curlHandle, CURLOPT_ERRORBUFFER, curlErrStr);
curl_easy_setopt(curlHandle, CURLOPT_FOLLOWLOCATION, 1);
curl_easy_setopt(curlHandle, CURLOPT_URL, url);
curl_easy_setopt(curlHandle, CURLOPT_LOW_SPEED_LIMIT, dl_lowspeed_bytes);
curl_easy_setopt(curlHandle, CURLOPT_LOW_SPEED_TIME, dl_lowspeed_time);
curl_easy_setopt(curlHandle, CURLOPT_VERBOSE, 1L);
free(url);
curlErr = curl_easy_perform(curlHandle);
if(curlErr != CURLE_OK)
{
fprintf(stderr, "curl_easy_perform() failed: %s\n",curl_easy_strerror(curlErr));
}
else
{
curlErr = curl_easy_getinfo(curlHandle, CURLINFO_EFFECTIVE_URL, &url_new);
if((CURLE_OK == curlErr) && url_new)
{
sprintf(job,"%.*s\n", 18, url_new + 28);
if((ptr1 = strchr(job, '\n')) != NULL)
*ptr1 = '\0';
init_string(&s);
curl_easy_setopt(curlHandle, CURLOPT_TIMEOUT, 1800 );
curl_easy_setopt(curlHandle, CURLOPT_URL, url_new);
curl_easy_setopt(curlHandle, CURLOPT_WRITEFUNCTION, writefunc);
curl_easy_setopt(curlHandle, CURLOPT_WRITEDATA, &s);
curlErr1 = curl_easy_perform(curlHandle);
printf("###### %lu\t%s\n",strlen(s.ptr),s.ptr);
free(s.ptr);
}
curl_easy_cleanup(curlHandle);
}
}
功能是:
struct string
{
char *ptr;
size_t len;
};
void init_string(struct string *a)
{
a->len = 0;
a->ptr = malloc(a->len+1);
if (a->ptr == NULL)
{
fprintf(stderr, "malloc() failed\n");
exit(EXIT_FAILURE);
}
a->ptr[0] = '\0';
}
size_t writefunc(void *ptr, size_t size, size_t nmemb, struct string *a)
{
size_t new_len = a->len + size*nmemb;
a->ptr = realloc(a->ptr, new_len+1);
if (a->ptr == NULL)
{
fprintf(stderr, "realloc() failed\n");
exit(EXIT_FAILURE);
}
memcpy(a->ptr+a->len, ptr, size*nmemb);
a->ptr[new_len] = '\0';
a->len = new_len;
return size*nmemb;
}
程序没有显示任何类型的错误。但是在这1000条数据中,几乎有50%由于curl_easy_perform()失败而无法获取:超时;其中 20% 具有 strlen(s.ptr),s.ptr => 0 行的输出。其余的都正确获取。
零输出的详细选项给出以下内容:
- 与主机 www.example.com 的连接 #0 保持不变
- getaddrinfo(3) 失败:80
- 无法解析主机“”
- 关闭连接#1
- 无法解析主机名 0
请指出程序中可能存在的错误。
最佳答案
Here is how I would fetch data using cURL
static CURL *curl = NULL;
CURL *initCURL(void)
{
curl_global_init(CURL_GLOBAL_DEFAULT);
curl = curl_easy_init();
if(curl)
{
// now set all the desired options
curl_easy_setopt(curl, CURLOPT_URL, "http://example.com");
/* example.com is redirected, so we tell libcurl to follow redirection */
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
// etc
}
else
{ // else cURL object creation failed
// display appropriate error message
}
}
void endCurl(void)
{
// and then when all done with the cURL object,
// cleanup
curl_easy_cleanup(curl);
}
CURLcode execCurl( CURL *curl )
{
CURLcode res;
// Perform this request, for each fetch
res = curl_easy_perform(curl);
// Check for errors
if(res != CURLE_OK)
{
fprintf(stderr, "curl_easy_perform() failed: %s\n",
curl_easy_strerror(res));
}
return( res );
}
Note:
I have had this same problem with the cURL timeout occurring.
The best recovery method I found is:
when a timeout occurs, retry the communication, requesting the same data
关于c - 在 C 中的 for 循环中使用多个 curl ,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/23746885/