python - 如何使用 BeautifulSoup 修复 Python 中的 'Connection aborted.' 错误

标签 python beautifulsoup disconnect

我每天都运行这段代码几个星期,没有出现任何错误。今天早上,它正确运行 for 循环超过 100 次,然后出现连接问题。此后每次我尝试运行它,它都会运行 5 到 130 次,但在完成之前总是给出连接错误。

我仍然收到 200 的状态代码。我看到一些帖子引用了 Python 中的“内存泄漏”问题,但我不确定如何确定这是否是这里的问题。这也很奇怪,因为直到今天它一直运行良好。

我在同一网站上的其他页面上有类似的代码,但仍然可以一直正确运行。

这是代码:

import requests
from bs4 import BeautifulSoup

updates = []

print(f'Getting {total_timebanks} timebank details... ')
for timebank in range(len(timebanks)):
    url = f"http://community.timebanks.org/{timebanks['slug'][timebank]}"
    res = requests.get(url, headers=headers)
    soup = BeautifulSoup(res.content, 'lxml')

    update = {}
    update['members'] = soup.find('div', {'class': 'views-field-field-num-users-value'}).span.text.strip().replace(',', '')

    updates.append(update)

    time.sleep(1)

这是完整的错误消息:

---------------------------------------------------------------------------
RemoteDisconnected                        Traceback (most recent call last)
/anaconda3/envs/DSI-6/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    599                                                   body=body, headers=headers,
--> 600                                                   chunked=chunked)
    601 

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    383                     # otherwise it looks like a programming error was the cause.
--> 384                     six.raise_from(e, None)
    385         except (SocketTimeout, BaseSSLError, SocketError) as e:

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    379                 try:
--> 380                     httplib_response = conn.getresponse()
    381                 except Exception as e:

/anaconda3/envs/DSI-6/lib/python3.6/http/client.py in getresponse(self)
   1330             try:
-> 1331                 response.begin()
   1332             except ConnectionError:

/anaconda3/envs/DSI-6/lib/python3.6/http/client.py in begin(self)
    296         while True:
--> 297             version, status, reason = self._read_status()
    298             if status != CONTINUE:

/anaconda3/envs/DSI-6/lib/python3.6/http/client.py in _read_status(self)
    265             # sending a valid response.
--> 266             raise RemoteDisconnected("Remote end closed connection without"
    267                                      " response")

RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred:

ProtocolError                             Traceback (most recent call last)
/anaconda3/envs/DSI-6/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    448                     retries=self.max_retries,
--> 449                     timeout=timeout
    450                 )

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    637             retries = retries.increment(method, url, error=e, _pool=self,
--> 638                                         _stacktrace=sys.exc_info()[2])
    639             retries.sleep()

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
    366             if read is False or not self._is_method_retryable(method):
--> 367                 raise six.reraise(type(error), error, _stacktrace)
    368             elif read is not None:

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
    684         if value.__traceback__ is not tb:
--> 685             raise value.with_traceback(tb)
    686         raise value

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    599                                                   body=body, headers=headers,
--> 600                                                   chunked=chunked)
    601 

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    383                     # otherwise it looks like a programming error was the cause.
--> 384                     six.raise_from(e, None)
    385         except (SocketTimeout, BaseSSLError, SocketError) as e:

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    379                 try:
--> 380                     httplib_response = conn.getresponse()
    381                 except Exception as e:

/anaconda3/envs/DSI-6/lib/python3.6/http/client.py in getresponse(self)
   1330             try:
-> 1331                 response.begin()
   1332             except ConnectionError:

/anaconda3/envs/DSI-6/lib/python3.6/http/client.py in begin(self)
    296         while True:
--> 297             version, status, reason = self._read_status()
    298             if status != CONTINUE:

/anaconda3/envs/DSI-6/lib/python3.6/http/client.py in _read_status(self)
    265             # sending a valid response.
--> 266             raise RemoteDisconnected("Remote end closed connection without"
    267                                      " response")

ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response',))

During handling of the above exception, another exception occurred:

ConnectionError                           Traceback (most recent call last)
<ipython-input-17-31257fee2c23> in <module>
      5 for timebank in range(len(timebanks)):
      6     url = f"http://community.timebanks.org/{timebanks['slug'][timebank]}"
----> 7     res = requests.get(url, headers=headers)
      8     soup = BeautifulSoup(res.content, 'lxml')
      9 

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/requests/api.py in get(url, params, **kwargs)
     73 
     74     kwargs.setdefault('allow_redirects', True)
---> 75     return request('get', url, params=params, **kwargs)
     76 
     77 

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/requests/api.py in request(method, url, **kwargs)
     58     # cases, and look like a memory leak in others.
     59     with sessions.Session() as session:
---> 60         return session.request(method=method, url=url, **kwargs)
     61 
     62 

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    531         }
    532         send_kwargs.update(settings)
--> 533         resp = self.send(prep, **send_kwargs)
    534 
    535         return resp

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
    644 
    645         # Send the request
--> 646         r = adapter.send(request, **kwargs)
    647 
    648         # Total elapsed time of the request (approximately)

/anaconda3/envs/DSI-6/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    496 
    497         except (ProtocolError, socket.error) as err:
--> 498             raise ConnectionError(err, request=request)
    499 
    500         except MaxRetryError as e:

ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response',))

最佳答案

问题似乎已经自行解决。由于没有对代码进行任何更改,今天早上它恢复正常运行。

我不太清楚为什么昨天出现连接错误,但这似乎是网站的问题,而不是代码的问题。

感谢您的回复!作为引用,我还尝试将 sleep 计时器增加到 30,但这并没有解决昨天的问题。

关于python - 如何使用 BeautifulSoup 修复 Python 中的 'Connection aborted.' 错误,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/54698994/

相关文章:

python - 有没有办法创建带有参数的符号链接(symbolic link)? (Linux)

python - 如何在 Python 和 LXML 中解析 XML?

c# - 如何断开匿名事件?

python - 检查 BeautifulSoup 3 中的元素类型

python - 如何在 BeautifulSoup 中添加 'href contains' 条件

Swift:断开 AVAudioUnit 与播放 AVAudioEngine 的连接

java - 断开与 weblogic JMS 的连接

python - Boto3 S3 client.put_bucket_logging() 坏了?

Python:如何在嵌套循环中用下一个替换 tqdm 进度条?

python - 如何检查python pandas中列的dtype