python - 从Google Play商店应用网站中提取评论

标签 python html selenium google-play screen-scraping

import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import pandas as pd

class FindByXpathCss():
    # Declaring variables
    Reviews = []  # List to store final set of reviews
    reviewText = []  # List to store reviews extracted from XPath
    reviewFullText = []

    # Chromedriver path
    driver = webdriver.Chrome(executable_path=r"F:\Chrome-webdriver\chromedriver.exe")
    driver.maximize_window()

    baseUrl = "https://play.google.com/store/apps/details?id=com.delta.mobile.android&hl=en_US&showAllReviews=true"

    driver.get(baseUrl)
    # driver.execute_script("scrollBy(0,300);")
    # Scrolling down
    for i in range(20):
        driver.find_element_by_xpath('//*[@id="yDmH0d"]').send_keys(Keys.ARROW_DOWN, i)
        time.sleep(0.5)

    # To click on Show more button
    #btnShowMore = driver.find_element_by_xpath('//*[@id="fcxH9b"]/div[4]/c-wiz/div/div[2]''/div/div[1]/div/div/div[1]/div[2]/div[2]/div/span/span').click()
    # Scrolling to top
    for j in range(10):
        driver.find_element_by_xpath('//*[@id="yDmH0d"]').send_keys(Keys.ARROW_UP, j)

    #for i in range(10):
    review_btn = driver.find_elements_by_xpath("//button[contains(@class,'')][contains(text(),'Full Review')]")
    single_review_btn = driver.find_element_by_xpath("//button[contains(@class,'')][contains(text(),'Full Review')]")
    #time.sleep(1)

带有2个标签的div html标签,其中一个的jsname为“fbQN7e”,该标签用于保存较大的评论,并且那些评论将具有称为“完整评论”的按钮。同一div html标签中的另一个跨度是“bN97Pc”,该位置用于容纳较小的评论,而该评论的末尾将没有“完整评论”按钮。我无法获得两种类型的跨度的评论。在这里,我尝试将reviewFullText列表直接写入数据框,但仅获取元素数据类型,而不获取文本。我不知道为什么会这样。
    for btn in review_btn:
        btn.click()
        reviewFullText = driver.find_elements_by_css_selector("span[jsname='fbQN7e']")

    #if(single_review_btn.is_enabled()==False):
        #reviewText = driver.find_elements_by_css_selector("span[jsname=\"bN97Pc\"]")
    ##else:
        #pass

    # Iterating each reviews and appending into list Reviews
    for txtreview in reviewText:
        reviewFullText.append(txtreview.text)

    print(len(reviewFullText))

        # Writing the list values into csv file
    df = pd.DataFrame(reviewFullText)
        #df = pd.DataFrame({'Reviews': 'Reviews'}) #'Sentiment': 'null'})
    df.to_csv('Reviews.csv', index=True, encoding='utf-8')

    driver.close()

最佳答案

我已修改您的解决方案以从页面中检索所有评论。

import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
class FindByXpathCss():
        driver = webdriver.Chrome(executable_path=r"C:\New folder\chromedriver.exe")
        driver.maximize_window()
        baseUrl = "https://play.google.com/store/apps/details?id=com.delta.mobile.android&hl=en_US&showAllReviews=true"
        driver.get(baseUrl)

        scrolls = 3
        while True:
            scrolls -= 1
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
            time.sleep(3)
            if scrolls < 0:
                break

        buttonClick = WebDriverWait(driver, 30).until(
            EC.visibility_of_all_elements_located((By.XPATH, "//button[contains(@class,'')][contains(text(),'Full Review')]")))
        for element in buttonClick:
            driver.execute_script("arguments[0].click();", element)

        reviewText = WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.XPATH, "//*[@class='UD7Dzf']")))


        for textreview in reviewText:
            print textreview.text

        reviewText = WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.XPATH, "//*[@class='UD7Dzf']")))

        # reviewText = driver.find_elements_by_xpath("//*[@class='UD7Dzf']")
        for textreview in reviewText:
            print textreview.text

输出:

enter image description here

关于python - 从Google Play商店应用网站中提取评论,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/61101495/

相关文章:

python - 如何在 python 中执行双向 SSL 身份验证?

python - 在 Odoo 8 上设置树字体

html - 如何在 html 中将一个 div 分成由边框分隔的 3 个部分

python - 尝试使用 selenium 和 python 登录网页时出错

javascript - Selenium JavascriptExecutor 计算

python - 如何解析通过IPFS Python API接收到的数据

python - 无法在heroku上恢复postgres转储

HTML/CSS网页结构

jquery - 使用 jQuery 使元素不可选择

javascript - Seleneium 异常 - Arguments[0].click 不是 Selenium Python 中使用 execute_script() 的函数