python - selenium.common.exceptions : session deleted because of page crash from tab crashed

我有一个在 ubuntu(EC2 实例，t2.small)上运行的简单 python web-scraper，到目前为止它只打印出一个 url 列表:

from bs4 import BeautifulSoup
import requests
import string
import json
import geocoder
import mapbox
import selenium
from selenium import webdriver

from selenium import webdriver
from bs4 import BeautifulSoup as bs
import datetime
from datetime import datetime as dt
import re
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import ElementNotVisibleException
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException

from selenium.webdriver.common.by import By

chrome_options = Options()

chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--window-size=1420,1080')
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--remote-debugging-port=9222")
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.binary_location='/usr/bin/google-chrome-stable'
chrome_driver_binary = "/usr/bin/chromedriver"
driver = webdriver.Chrome(executable_path=chrome_driver_binary, chrome_options=chrome_options)

#Set base url (SAN FRANCISCO)
base_url = 'https://www.bandsintown.com/?place_id=ChIJIQBpAG2ahYAR_6128GcTUEo&page='#san francisco



events = []
eventContainerBucket = []

for i in range(1,3):
    print(i)
    #cycle through pages in range
    driver.get(base_url + str(i))
    pageURL = base_url + str(i)
    print(pageURL)
    # get events links
    event_list = driver.find_elements_by_css_selector('div[class^=_3buUBPWBhUz9KBQqgXm-gf] a[class^=_3UX9sLQPbNUbfbaigy35li]')
    # collect href attribute of events in even_list
    events.extend(list(event.get_attribute("href") for event in event_list))

print ("total events: ", (len(events)))

#GET request user-agent
headers = {'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"}


# iterate through all events and open them.
item = {}
allEvents = []
for event in events:

    driver.get(event)
    currentUrl = driver.current_url
    print(currentUrl)
    try:
        currentRequest = requests.get(currentUrl, headers=headers)
        print (currentRequest)
    except requests.exceptions.RequestException as e:
        print(e)
        continue

        print("continuing!")

但是，该脚本成功运行了大约 3 个 url，然后崩溃并出现以下错误:

Traceback (most recent call last):
  File "BandsintownWebScraper.py", line 117, in <module>
    driver.get(event)
  File "/home/ubuntu/.local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 333, in get
    self.execute(Command.GET, {'url': url})
  File "/home/ubuntu/.local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/home/ubuntu/.local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: session deleted because of page crash
from tab crashed
  (Session info: headless chrome=91.0.4472.114)

df -h在终端显示:

Filesystem      Size  Used Avail Use% Mounted on
udev            979M     0  979M   0% /dev
tmpfs           199M  768K  198M   1% /run
/dev/xvda1      7.7G  7.7G   60M 100% /
tmpfs           993M     0  993M   0% /dev/shm
tmpfs           5.0M     0  5.0M   0% /run/lock
tmpfs           993M     0  993M   0% /sys/fs/cgroup
/dev/loop0      100M  100M     0 100% /snap/core/11316
/dev/loop1       56M   56M     0 100% /snap/core18/2066
/dev/loop2       56M   56M     0 100% /snap/core18/2074
/dev/loop3      100M  100M     0 100% /snap/core/11187
/dev/loop4       29M   29M     0 100% /snap/amazon-ssm-agent/2012
/dev/loop5       34M   34M     0 100% /snap/amazon-ssm-agent/3552
tmpfs           199M     0  199M   0% /run/user/1000

根据我的阅读，这可能是由于可用内存不足？我怎样才能解决这个问题？具体来说，有没有办法我需要在 Ubuntu 上分区更多内存？

最佳答案

https://www.answertopia.com/ubuntu/adding-and-managing-ubuntu-swap-space/
添加一些交换空间。如果它有效，那么它与内存有关；如果不是，那么它很可能与 python 相关——尽管乍一看你的代码没有任何问题，并且它在我的计算机(16gb 的 RAM)上运行良好(完成)。

关于python - selenium.common.exceptions : session deleted because of page crash from tab crashed，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/68260448/

python - selenium.common.exceptions : session deleted because of page crash from tab crashed

上一篇：go - 当我尝试运行 main.go 时，我得到 cgo : exec gcc: exec: "gcc": executable file not found in $PATH

下一篇：Python scrapy+selenium scraper docker build 报错(错误码100)