c - 当http header错误报告content-length时该怎么办

标签 c sockets https

我试图通过 https 下载网页,首先使用 HEAD 请求下载 header ,然后解析以获得内容长度,然后使用内容长度加上 header 的一些空间来为缓冲区分配内存来存储GET 请求的结果。看来 stackoverflow.com 提供的 Content-Length 太小,因此我的代码出现段错误。

我尝试过查看堆栈溢出过去的问题,看看如何动态分配内存来处理错误报告其内容长度的页面,但无法找到任何合适的答案。

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <openssl/bio.h>
#include <openssl/ssl.h>
#include <openssl/err.h>

#define MAX_HEADER_SIZE 8192

/**
 * Main SSL demonstration code entry point
 */
int main() {
    char* host_and_port = "stackoverflow.com:443"; 
    char* head_request = "HEAD / HTTP/1.1\r\nHost: stackoverflow.com\r\n\r\n"; 
    char* get_request = "GET / HTTP/1.1\r\nHost: stackoverflow.com\r\n\r\n"; 
    char* store_path = "mycert.pem"; 
    char *header_token, *line_token, content_length_line[1024];
    char *cmp = "\r\n";
    char *html;
    char *get;
    int content_length;
    size_t i = 0;
    char buffer[MAX_HEADER_SIZE];
    buffer[0] = 0;

    BIO* bio;
    SSL_CTX* ctx = NULL;
    SSL* ssl = NULL;

    /* initilise the OpenSSL library */
    SSL_load_error_strings();
    SSL_library_init();
    ERR_load_BIO_strings();
    OpenSSL_add_all_algorithms();

    bio = NULL;
    int r = 0;

    /* Set up the SSL pointers */
    ctx = SSL_CTX_new(TLS_client_method());
    ssl = NULL;
    r = SSL_CTX_load_verify_locations(ctx, store_path, NULL);

    if (r == 0) {
        fprintf(stdout,"Unable to load the trust store from %s.\n", store_path);
        fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
        fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
        ERR_print_errors_fp(stdout);
    }

    /* Setting up the BIO SSL object */
    bio = BIO_new_ssl_connect(ctx);
    BIO_get_ssl(bio, &ssl);
    if (!(ssl)) {
        printf("Unable to allocate SSL pointer.\n");
        fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
        fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
        ERR_print_errors_fp(stdout); 
        bio = NULL;           
    }
    SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY);

    /* Attempt to connect */
    BIO_set_conn_hostname(bio, host_and_port);

    /* Verify the connection opened and perform the handshake */
    if (BIO_do_connect(bio) < 1) {
        fprintf(stdout, "Unable to connect BIO.%s\n", host_and_port);
        fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
        fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
        ERR_print_errors_fp(stdout);
        bio = NULL;
    }

    if (SSL_get_verify_result(ssl) != X509_V_OK) {
        printf("Unable to verify connection result.\n");
        fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
        fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
        ERR_print_errors_fp(stdout);            
    }

    if (bio == NULL)
        return (EXIT_FAILURE);

    r = -1;

    while (r < 0) {

        r = BIO_write(bio, head_request, strlen(head_request));
        if (r <= 0) {
            if (!BIO_should_retry(bio)) {
                printf("BIO_read should retry test failed.\n");
                fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
                fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
                ERR_print_errors_fp(stdout);            
                continue;
            }
            /* It would be prudent to check the reason for the retry and handle
            * it appropriately here */
        }
    }

    r = -1;

    while (r < 0) {
        r = BIO_read(bio, buffer, MAX_HEADER_SIZE);
        if (r == 0) {
            printf("Reached the end of the data stream.\n");
            fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
            fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
            ERR_print_errors_fp(stdout);
            continue;
        } else if (r < 0) {
            if (!BIO_should_retry(bio)) {
                printf("BIO_read should retry test failed.\n");
                fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
                fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
                ERR_print_errors_fp(stdout);
                continue;
            }

            /* It would be prudent to check the reason for the retry and handle
            * it appropriately here */
        }
    };
    printf("%s\r\n", buffer);

    header_token = strtok(buffer, cmp);

    while (header_token != NULL)
    {
        //printf ("header_token: %s\n\n", header_token);
        if (strncmp(header_token, "Content-Length:", strlen("Content-Length:")) == 0 
        || strncmp(header_token, "content-length:", strlen("content-length:")) == 0)
        {
            //printf ("header_token %s is equal to Content-Length:\n", header_token);
            strcpy(content_length_line, header_token);
        }
        header_token = strtok(NULL, cmp);
    }

    if (strlen(content_length_line) > 0) 
    {
        line_token = strtok(content_length_line, " ");
        line_token = strtok(NULL, " ");
        content_length = atoi(line_token);
        printf ("Content-Length = %d\n", content_length);
    }

    //char get[content_length + MAX_HEADER_SIZE];
    get = malloc((content_length + MAX_HEADER_SIZE)*sizeof(char));
    if (get == NULL) {
        fprintf(stdout, "Out of memory\n");
        return (EXIT_FAILURE);
    }

    r = -1;

    while (r < 0) {

        r = BIO_write(bio, get_request, strlen(get_request));
        if (r <= 0) {
            if (!BIO_should_retry(bio)) {
                printf("BIO_read should retry test failed.\n");
                fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
                fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
                ERR_print_errors_fp(stdout);            
                continue;
            }
            /* It would be prudent to check the reason for the retry and handle
            * it appropriately here */
        }
    }

    r = -1;

    while (r) {
        while (r < 0) {
            r = BIO_read(bio, buffer, 4096);
            if (r == 0) {
                printf("Reached the end of the data stream.\n");
                fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
                fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
                ERR_print_errors_fp(stdout);
                continue;
            } else if (r < 0) {
                if (!BIO_should_retry(bio)) {
                    printf("BIO_read should retry test failed.\n");
                    fprintf(stdout, "Error: %s\n", ERR_reason_error_string(ERR_get_error()));
                    fprintf(stdout, "%s\n", ERR_error_string(ERR_get_error(), NULL));
                    ERR_print_errors_fp(stdout);
                    continue;
                }

                /* It would be prudent to check the reason for the retry and handle
                * it appropriately here */
            }
        };
        printf("Received %d bytes\n",r);
        printf("Received total of %ld bytes of %d\n", i+r, content_length);
        memcpy(get+i, buffer, r);
        i += r;
    }
    printf("%s\r\n", buffer);

    /* clean up the SSL context resources for the encrypted link */
    SSL_CTX_free(ctx);

    free(get);  

    return (EXIT_SUCCESS);
}

我通常希望能够打印出完整的网页,但由于内容长度错误,我得到以下输出和段错误。

Received 1752 bytes
Received total of 248784 bytes of 105585

Program received signal SIGSEGV, Segmentation fault.
__memmove_sse2_unaligned_erms () at ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:404
404     ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S: No such file or directory.

我应该如何处理内容长度不正确的页面?

最佳答案

HEAD 请求的响应中的 Content-length 无关紧要。只有包含实际正文的响应中的 Content-length 是相关的(即对 GETPOST...的响应)。而这个Content-length应该用来读取HTTP body,即先读取HTTP header,确定长度,然后按照指定读取body。即使可以读取更多数据,它们也不属于响应正文。

除此之外,您正在执行 HTTP/1.1 请求。这意味着服务器可能使用 Transfer-Encoding: chunked 在这种情况下 Content-length 的值也无关紧要。相反,分块编码优先,您需要根据每个给定 block 的长度读取正文的所有 block 。

关于c - 当http header错误报告content-length时该怎么办,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/57258387/

相关文章:

go - 如何在 golang 中使用自签名证书设置 https 服务器

apache - 将 SSL 证书从 CPanel 迁移到 DigitalOcean Apache 服务器

c - 在 C 程序中获取段错误

python - 对需要另一个扩展的 Python 进行 C 扩展

javascript - 通过服务器将视频从一个客户端流式传输到另一个客户端

c - 使用 fork 和 dup 的简单 http 通用服务器

java - 在大型 Java 应用程序中,有没有办法确保只使用 TLSv1.2 或更高版本?

c - 源代码中初始化结构体时出错

c - 如何在Microsoft Visual Studio 2010中链接libm库

ruby - 如何使用 Unix 套接字对通信 Rust 和 Ruby 进程