session = requests.Session()
params = {'fd': 1, 'count': 1024, 'auth': 'auth_token'}
r = session.get('', params=params)

# b'\xb3_\\l\xe2\xbf/:\x07'


服务器 API 文档说:

You can push multiple requests over single connection without waiting for answer, to improve performance. The server will process the requests in the order they are received and you are guaranteed to receive answers in the same order. It is important however to send all requests with "Connection: keep-alive", otherwise the API server will close the connection without processing the pending requests.

他们正在谈论一个线程和多个请求,而不等待答案。我想它被称为 HTTP 管道

如何使用 Python Requests 库执行此操作?

类似的answer建议使用并行调用,但我的问题并非如此。它还说:“requests 会池化连接,保持 TCP 连接打开”。我该如何实现这个?



您可以并行获取多个页面,无需线程。它通过重置 HTTPSConnection 的状态(私有(private)变量!)来利用 HTTP 管道来欺骗它提前发送下一个请求。

from http.client import HTTPSConnection, _CS_IDLE
from urllib.parse import urlparse, urlunparse

def pipeline(host, pages, max_out_bound=4, debuglevel=0):
    page_count = len(pages)
    conn = HTTPSConnection(host)
    responses = [None] * page_count
    finished = [False] * page_count
    content = [None] * page_count
    headers = {'Host': host, 'Content-Length': 0, 'Connection': 'Keep-Alive'}

    while not all(finished):
        # Send
        out_bound = 0
        for i, page in enumerate(pages):
            if out_bound >= max_out_bound:
            elif page and not finished[i] and responses[i] is None:
                if debuglevel > 0:
                    print('Sending request for %r...' % (page,))
                conn._HTTPConnection__state = _CS_IDLE  # private variable!
                conn.request("GET", page, None, headers)
                responses[i] = conn.response_class(conn.sock, method=conn._method)
                out_bound += 1
        # Try to read a response
        for i, resp in enumerate(responses):
            if resp is None:
            if debuglevel > 0:
                print('Retrieving %r...' % (pages[i],))
            out_bound -= 1
            skip_read = False
            if debuglevel > 0:
                print('    %d %s' % (resp.status, resp.reason))
            if 200 <= resp.status < 300:
                # Ok
                content[i] =
                cookie = resp.getheader('Set-Cookie')
                if cookie is not None:
                    headers['Cookie'] = cookie
                skip_read = True
                finished[i] = True
                responses[i] = None
            elif 300 <= resp.status < 400:
                # Redirect
                loc = resp.getheader('Location')
                responses[i] = None
                parsed = loc and urlparse(loc)
                if not parsed:
                    # Missing or empty location header
                    content[i] = (resp.status, resp.reason)
                    finished[i] = True
                elif parsed.netloc != '' and parsed.netloc != host:
                    # Redirect to another host
                    content[i] = (resp.status, resp.reason, loc)
                    finished[i] = True
                    path = urlunparse(parsed._replace(scheme='', netloc='', fragment=''))
                    if debuglevel > 0:
                        print('  Updated %r to %r' % (pages[i], path))
                    pages[i] = path
            elif resp.status >= 400:
                # Failed
                content[i] = (resp.status, resp.reason)
                finished[i] = True
                responses[i] = None
            if resp.will_close:
                # Connection (will be) closed, need to resend
                if debuglevel > 0:
                    print('  Connection closed')
                for j, f in enumerate(finished):
                    if not f and responses[j] is not None:
                        if debuglevel > 0:
                            print('  Discarding out-bound request for %r' % (pages[j],))
                        responses[j] = None
            elif not skip_read:
        # read any data
            if any(not f and responses[j] is None for j, f in enumerate(finished)):
                # Send another pending request
            break  # All responses are None?
    return content

if __name__ == '__main__':
    domain = ''
    pages = ['/wiki/HTTP_pipelining', '/wiki/HTTP', '/wiki/HTTP_persistent_connection']
    data = pipeline(domain, pages, max_out_bound=3, debuglevel=1)
    for i, page in enumerate(data):
        print('==== Page %r ====' % (pages[i],))

