我正在抓取 url
https://www.youtube.com/trendsdashboard#loc0=ind
以上工作正常,但在同一页面上有多个链接,即:
https://www.youtube.com/trendsdashboard#loc0=ind&feed=shared
它给出了一个空的结果。对于:
https://www.youtube.com/trendsdashboard#loc0=ind&feed=shared&gen0=male https://www.youtube.com/trendsdashboard#loc0=ind&feed=shared&gen0=female .
def getVideoTrend(self):
binary = FirefoxBinary('/usr/bin/firefox')
driver = webdriver.Firefox(firefox_binary=binary)
driver.get("https://www.youtube.com/trendsdashboard#loc0=ind&feed=shared")
assert "YouTube Trends" in driver.title
video_trend = []
for s in driver.find_elements_by_class_name('video-item'):
print s
video = {}
videourl = s.find_element_by_css_selector('a').get_attribute('href')
video['url'] = videourl
videotitle = s.find_element_by_css_selector('a').get_attribute('alt')
video['title'] = videotitle
video_trend.append(video)
print video_trend
最佳答案
问题可能是您没有等待元素加载到 DOM 中。
尝试做这样的事情:
导入selenium webdriverwait
from selenium.webdriver.support.ui import WebDriverWait
在获取元素之前等待元素加载。
elements = WebDriverWait(driver, 10).until(lambda driver: driver.find_elements_by_class_name('video-item'))
for s in elements:
print s.text
video = {}
videourl = s.find_element_by_css_selector('a').get_attribute('href')
video['url'] = videourl
videotitle = s.find_element_by_css_selector('a').get_attribute('alt')
video['title'] = videotitle
video_trend.append(video)
print video_trend
这段代码对我有用
driver = webdriver.Firefox()
class Test(object):
url1 = "https://www.youtube.com/trendsdashboard#loc0=ind"
url2 = "https://www.youtube.com/trendsdashboard#loc0=ind&feed=shared"
url3 = "https://www.youtube.com/trendsdashboard#loc0=ind&feed=shared&gen0=male"
def get_video_trend(url):
driver.get(url)
assert "YouTube Trends" in driver.title
video_trend = []
element = WebDriverWait(driver, 10).until(lambda driver: driver.find_elements_by_class_name('video-item'))
for s in element:
print (s.text)
video = {}
videourl = s.find_element_by_css_selector('a').get_attribute('href')
video['url'] = videourl
videotitle = s.find_element_by_css_selector('a').get_attribute('alt')
video['title'] = videotitle
video_trend.append(video)
print (video_trend)
if __name__ == '__main__':
get_video_trend(Test.url1)
get_video_trend(Test.url2)
get_video_trend(Test.url3)
关于python - selenium 不能在 url 中使用 &,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/30775350/