我尝试自动化一个网站,但是当它进入一个没有元素的页面时,它就不再运行了。例如此页面:https://www.marks4sure.com/9A0-127-exam.html 我想做的是,如果它不存在任何细节,它应该返回,然后继续下一个。
感谢您的帮助。 这是我的代码:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
option = Options()
option.add_argument("--disable-infobars")
option.add_argument("start-maximized")
option.add_argument("--disable-extensions")
option.add_experimental_option("excludeSwitches", ['enable-automation'])
# Pass the argument 1 to allow and 2 to block
# option.add_experimental_option("prefs", {
# "profile.default_content_setting_values.notifications": 1
# })
driver = webdriver.Chrome(chrome_options=option, executable_path='C:\\Users\\Awais\\Desktop\\web crawling\\chromedriver.exe')
driver.implicitly_wait(100)
url = "https://www.marks4sure.com/allexams.html"
driver.get(url)
links = []
exam_code = []
exam_name = []
total_q = []
for x in range(70):
for i in range(1, 57):
more_details = driver.find_element_by_xpath(f'//*[@id="content"]/div/div[2]/div[{i}]/div/h5/a')
links.append(more_details.get_attribute('href'))
more_details.click()
try:
code = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[2]/div[2]')
exam_code.append(code.text)
except:
print('N/A')
try:
name = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[3]/div[2]/a')
exam_name.append(name.text)
except:
print('N/A')
try:
question = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[4]/div[2]/strong')
total_q.append(question.text)
except:
print('N/A')
driver.back()
next_page = driver.find_element_by_xpath('//*[@id="yw0"]/li[13]')
next_page.click()
all_info = list(zip(links, exam_name, exam_name, total_q))
print(all_info)
df = pd.DataFrame(all_info, columns = ["Links", "Exam Code", "Exam Name", "Total Question"])
df.to_csv("data.csv", encoding = 'utf-8')
driver.close()
最佳答案
您没有检查更多详细信息元素
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import time
option = Options()
option.add_argument("--disable-infobars")
option.add_argument("start-maximized")
option.add_argument("--disable-extensions")
option.add_experimental_option("excludeSwitches", ['enable-automation'])
# Pass the argument 1 to allow and 2 to block
# option.add_experimental_option("prefs", {
# "profile.default_content_setting_values.notifications": 1
# })
driver = webdriver.Chrome(chrome_options=option, executable_path='C:\\Users\\Awais\\Desktop\\web crawling\\chromedriver.exe')
driver.implicitly_wait(0.5)
url = "https://www.marks4sure.com/allexams.html"
driver.get(url)
links = []
exam_code = []
exam_name = []
total_q = []
for x in range(70):
for i in range(1, 57):
try:
more_details = driver.find_element_by_xpath(f'//*[@id="content"]/div/div[2]/div[{i}]/div/h5/a')
links.append(more_details.get_attribute('href'))
more_details.click()
except NoSuchElementException:
continue
try:
if driver.find_element_by_xpath('/html/body/div[4]/div').get_attribute('class') == 'alert alert-danger':
drier.back()
continue
except NoSuchElementException:
pass
try:
code = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[2]/div[2]')
exam_code.append(code.text)
except:
print('N/A')
try:
name = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[3]/div[2]/a')
exam_name.append(name.text)
except:
print('N/A')
try:
question = driver.find_element_by_xpath('//*[@id="content"]/div/div[1]/div[2]/div[4]/div[2]/strong')
total_q.append(question.text)
except:
print('N/A')
driver.back()
try:
next_page = driver.find_element_by_xpath('//*[@id="yw0"]/li[13]')
next_page.click()
except NoSuchElementException:
driver.refresh()
all_info = list(zip(links, exam_name, exam_name, total_q))
print(all_info)
df = pd.DataFrame(all_info, columns=["Links", "Exam Code", "Exam Name", "Total Question"])
driver.close()
关于python - 如果未找到元素,脚本将不起作用,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/60542130/