我的项目包括为一家机构制作一个具有竞争力的酒店价格观察表。这是我想要自动化的痛苦操作,代码正确提取了酒店的名称和我想要提取的价格,但它仅适用于第一家酒店,我不知道问题出在哪里。我为您提供代码和输出,如果你们中的任何人可以帮助我并提前感谢您。
注意:代码 2 工作正常,但是当我添加更多操作时,问题出现
代码 1
#!/usr/bin/env python
# coding: utf-8
import time
from time import sleep
import ast
import pandas as pd
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome("C:\\Users\\marketing2\\Documents\\chromedriver.exe")
driver.get('https://tn.tunisiebooking.com/')
# params to select
params = {
'destination': 'Tozeur',
'date_from': '11/09/2021',
'date_to': '12/09/2021',
'bedroom': '1'
}
# select destination
destination_select = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'ville_des'))))
destination_select.select_by_value(params['destination'])
# select bedroom
bedroom_select = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'select_ch'))))
bedroom_select.select_by_value(params['bedroom'])
# select dates
script = f"document.getElementById('checkin').value ='{params['date_from']}';"
script += f"document.getElementById('checkout').value ='{params['date_to']}';"
script += f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('arrivee').value ='{params['date_to']}';"
driver.execute_script(script)
# submit form
btn_rechercher = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="boutonr"]')))
btn_rechercher.click()
urls = []
hotels = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[starts-with(@id,'produit_affair')]")))
for hotel in hotels:
link = hotel.find_element_by_xpath(".//span[@class='tittre_hotel']/a").get_attribute("href")
urls.append(link)
for url in urls:
driver.get(url)
def existsElement(xpath):
try:
driver.find_element_by_id(xpath);
except NoSuchElementException:
return "false"
else:
return "true"
if (existsElement('result_par_arrangement')=="false"):
btn_t = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="moteur_rech"]/form/div/div[3]/div')))
btn_t.click()
sleep(10)
else :
pass
try:
name = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@class='bloc_titre_hotels']/h2"))).text
arropt = driver.find_element_by_xpath("//div[contains(@class,'line_result')][1]")
opt = arropt.find_element_by_tag_name("b").text
num = len(arropt.find_elements_by_tag_name("option"))
optiondata = {}
achats = {}
marges= {}
selection = Select(driver.find_element_by_id("arrangement"))
for i in range(num):
try:
selection = Select(driver.find_element_by_id("arrangement"))
selection.select_by_index(i)
time.sleep(2)
arr = driver.find_element_by_xpath("//select[@id='arrangement']/option[@selected='selected']").text
prize = driver.find_element_by_id("prix_total").text
optiondata[arr] = (int(prize))
btn_passe = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="resultat"]/div/form/div/div[2]/div[1]/div[2]/div[2]/div')))
btn_passe.click()
# params to select
params = {
'civilite_acheteur': 'Mlle',
'prenom_acheteur': 'test',
'nom_acheteur': 'test',
'e_mail_acheteur': 'test@gmail.com',
'portable_acheteur': '22222222',
'ville_acheteur': 'Test',
}
# select civilite
civilite_acheteur = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, 'civilite_acheteur'))))
civilite_acheteur.select_by_value(params['civilite_acheteur'])
# saisir prenom
script = f"document.getElementsByName('prenom_acheteur')[0].value ='{params['prenom_acheteur']}';"
script += f"document.getElementsByName('nom_acheteur')[0].value ='{params['nom_acheteur']}';"
script += f"document.getElementsByName('e_mail_acheteur')[0].value ='{params['e_mail_acheteur']}';"
script += f"document.getElementsByName('portable_acheteur')[0].value ='{params['portable_acheteur']}';"
script += f"document.getElementsByName('ville_acheteur')[0].value ='{params['ville_acheteur']}';"
driver.execute_script(script)
# submit form
btn_agence = driver.find_element_by_id('titre_Nabeul')
btn_agence.click()
btn_continuez = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'boutonr')))
btn_continuez.click()
achat = int(driver.find_element_by_xpath('/html/body/header/div[2]/div[1]/div[1]/div[4]/div[2]/div[2]').text.replace(' TND', ''))
achats[arr]=achat
marge =int(((float(prize) - float(achat)) / float(achat)) * 100);
marges[arr]=marge
optiondata[arr]=prize,achat,marge
driver.get(url)
btn_display = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="moteur_rech"]/form/div/div[3]/div')))
btn_display.click()
sleep(10)
except StaleElementReferenceException:
pass
except NoSuchElementException:
pass
s="- {} | {} : {}".format(name, opt, optiondata)
print(s)
ds = []
for l in s.splitlines():
d = l.split('-')
if len(d) > 1:
df = pd.DataFrame(ast.literal_eval(d[1].strip()))
ds.append(df)
for df in ds:
df.reset_index(drop=True, inplace=True)
df = pd.concat(ds, axis= 1)
cols = df.columns
cols = [((col.split('.')[0], col)) for col in df.columns]
df.columns=pd.MultiIndex.from_tuples(cols)
print(df.T)
#print("{} : {} - {}".format(name, opt, optiondata))
代码 2 from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import StaleElementReferenceException,NoSuchElementException
urls = []
hotels = driver.find_elements_by_xpath("//div[starts-with(@id,'produit_affair')]")
for hotel in hotels:
link = hotel.find_element_by_xpath(".//span[@class='tittre_hotel']/a").get_attribute("href")
urls.append(link)
for url in urls:
driver.get(url)
try:
name = driver.find_element_by_xpath("//div[@class='bloc_titre_hotels']/h2").text
arropt = driver.find_element_by_xpath("//div[contains(@class,'line_result')][1]")
opt = arropt.find_element_by_tag_name("b").text
num = len(arropt.find_elements_by_tag_name("option"))
optiondata = {}
selection = Select(driver.find_element_by_id("arrangement"))
for i in range(num):
try:
selection = Select(driver.find_element_by_id("arrangement"))
selection.select_by_index(i)
time.sleep(2)
arr = driver.find_element_by_xpath("//select[@id='arrangement']/option[@selected='selected']").text
prize = driver.find_element_by_id("prix_total").text
optiondata[arr]=prize
except StaleElementReferenceException:
pass
except NoSuchElementException:
pass
print("{} : {} - {} - {}".format(name,opt,num,optiondata))
最佳答案
boutonr
页面上不存在了。 time.sleep()
尽量减少,因为这会浪费您的代码执行时间。使用 WebDriverWait(...)
相反 我不会说法语,所以我无法理解您在代码中所追求的内容,但是下面这个最小化的示例应该可以帮助您理解原理。
#!/usr/bin/env python
# coding: utf-8
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome("C:\chromedriver.exe")
driver.get('https://tn.tunisiebooking.com/')
# params to select
params = { 'destination': 'Nabeul',
'date_from': '25/08/2021',
'date_to': '26/08/2021',
'bedroom': '1' }
# select destination
destination_select = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'ville_des'))))
destination_select.select_by_value(params['destination'])
# select bedroom
bedroom_select = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'select_ch'))))
bedroom_select.select_by_value(params['bedroom'])
# select dates
script = f"document.getElementById('checkin').value ='{params['date_from']}';"
script += f"document.getElementById('checkout').value ='{params['date_to']}';"
script += f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('arrivee').value ='{params['date_to']}';"
driver.execute_script(script)
# submit form
btn_rechercher = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//div[@onclick="return submit_hotel_recherche()"]')))
btn_rechercher.click()
urls = []
hotels = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[starts-with(@id,'produit_affair')]")))
for hotel in hotels:
link = hotel.find_element_by_xpath(".//span[@class='tittre_hotel']/a").get_attribute("href")
urls.append(link)
for url in urls:
driver.get(url)
try:
name = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@class='bloc_titre_hotels']/h2"))).text
arropt = driver.find_element_by_xpath("//div[contains(@class,'line_result')][1]")
opt = arropt.find_element_by_tag_name("b").text
num = len(arropt.find_elements_by_tag_name("option"))
optiondata = {}
achats = {}
marges= {}
for i in range(num):
try:
selection = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'arrangement')))).select_by_index(i)
time.sleep(0.5)
arr = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//select[@id='arrangement']/option[@selected='selected']"))).text
prize = driver.find_element_by_id("prix_total").text
optiondata[arr] = int(prize)
except StaleElementReferenceException:
pass
print("{} : {} - {}".format(name, opt, optiondata))
except NoSuchElementException:
pass
driver.quit()
结果:Byzance Nabeul : Chambre Double - {'All Inclusive soft': 93, 'Demi Pension': 38, 'Petit Dejeuner': 28, 'Pension Complete': 78}
Palmyra Club Nabeul Nabeul : Double Standard - {'All Inclusive soft': 92}
以下代码转到付款页面并提取那里的所有信息:#!/usr/bin/env python
# coding: utf-8
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome("/usr/local/bin/chromedriver")
driver.get('https://tn.tunisiebooking.com/')
# params to select
params = {
'destination': 'Nabeul',
'date_from': '29/08/2021',
'date_to': '30/08/2021',
'bedroom': '1'
}
# select destination
destination_select = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'ville_des'))))
destination_select.select_by_value(params['destination'])
# select bedroom
bedroom_select = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'select_ch'))))
bedroom_select.select_by_value(params['bedroom'])
# select dates
script = f"document.getElementById('checkin').value ='{params['date_from']}';"
script += f"document.getElementById('checkout').value ='{params['date_to']}';"
script += f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('arrivee').value ='{params['date_to']}';"
driver.execute_script(script)
# submit form
btn_rechercher = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//div[@onclick="return submit_hotel_recherche()"]')))
btn_rechercher.click()
urls = []
hotels = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[starts-with(@id,'produit_affair')]")))
for hotel in hotels:
link = hotel.find_element_by_xpath(".//span[@class='tittre_hotel']/a").get_attribute("href")
urls.append(link)
for url in urls:
driver.get(url)
try:
name = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@class='bloc_titre_hotels']/h2"))).text
arropt = driver.find_element_by_xpath("//div[contains(@class,'line_result')][1]")
opt = arropt.find_element_by_tag_name("b").text
num = len(arropt.find_elements_by_tag_name("option"))
optiondata = {}
achats = {}
marges= {}
try:
selection = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'arrangement'))))
time.sleep(0.5)
arr = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//select[@id='arrangement']/option[@selected='selected']"))).text
prize = driver.find_element_by_id("prix_total").text
optiondata[arr] = (int(prize))
btn_passe = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'resa')))
btn_passe.click()
tot = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'montant_total_apres_code')))
total = int(tot.text.replace(' €', ''))
# params to select
params = {
'civilite_acheteur': 'Mlle',
'prenom_acheteur': 'test',
'nom_acheteur': 'test',
'e_mail_acheteur': 'test@gmail.com',
'portable_acheteur': '22222222',
'ville_acheteur': 'Test',
}
# select civilite
civilite_acheteur = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, 'civilite_acheteur'))))
civilite_acheteur.select_by_value(params['civilite_acheteur'])
# saisir prenom
script = f"document.getElementsByName('prenom_acheteur')[0].value ='{params['prenom_acheteur']}';"
script += f"document.getElementsByName('nom_acheteur')[0].value ='{params['nom_acheteur']}';"
script += f"document.getElementsByName('e_mail_acheteur')[0].value ='{params['e_mail_acheteur']}';"
script += f"document.getElementsByName('portable_acheteur')[0].value ='{params['portable_acheteur']}';"
script += f"document.getElementsByName('ville_acheteur')[0].value ='{params['ville_acheteur']}';"
driver.execute_script(script)
# submit form
btn_agence = driver.find_element_by_class_name('continuez_resa')
btn_agence.click()
achat1 = int(driver.find_element_by_id('montant_a_payer').text.replace(' €', ''))
achat = int(driver.find_element_by_id('montant_restant').text.replace(' €', ''))
achat3 = float(driver.find_element_by_xpath('//div[@class="ligne_interne_total"]/div[3]/div[@class="prix_total1 text_shadow"]').text.replace(' TND', ''))
achats[arr]=achat
marge =int(((float(prize) - float(achat)) / float(achat)) * 100);
marges[arr]=marge
optiondata[arr]=prize,total,achat1,achat,achat3,marge
except StaleElementReferenceException:
pass
print("{} : {} - {}".format(name, opt, optiondata))
except NoSuchElementException:
pass
driver.quit()
输出:Byzance Nabeul : Chambre Double - {'Petit Dejeuner': (36, 41, 12, 29, 4.0, 24)}
在哪里:36 = Prix Total
41 = Montant Total
12 = Montant de l'acompte
29 = Vous payerez le reste à votre arrivée à l'hôtel
4.0 = Total taxe de séjour à payer sur place à l'hôtel est
24 = Marges
酒店页面:关于python - 为什么我的 Python 代码为我的列表中的所有元素提取相同的数据?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/68817652/