python - AttributeError : 'NoneType' object has no attribute 'text' , but text is on page

我一直在尝试几种不同的方法来使用此代码从页面获取价格值:

    import requests
headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    "Accept_Encoding": "gzip, deflate, br",
    "Accept_Language": "en-GB,en-US;q=0.9,en;q=0.8",
    "Connection": "keep-alive",
    "Upgrade_Insecure_Requests": "1",
    "User_Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
    }
import csv
from bs4 import BeautifulSoup


#write a CSV file
with open("/Users/eezar/Desktop/reverbsolid.csv","w",newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["Guitar","Price"])
#get the URL of target page
    pages=[]
    for n in range(1,5,1):
        url=("https://reverb.com/marketplace/electric-guitars/solid-body?page={}".format(n))
    #create string for URL
        r = requests.get(url)
    # get the HTML parser
        soup = BeautifulSoup(r.text, "html.parser")
        [s.extract() for s in soup('sup')]
    #identify the parent tag/container for the information
        products = soup.find_all('ul', class_ = 'tiles tiles--four-wide tiles--sidebar-width')
    #loop through container - give a name for the individual component e.g. title.  Text.strip take out the text
        for title in products:
            Guitar = soup.find('img', alt=True)
            Price = soup.find('span',{'class' : 'price-display'}).text.strip()
            #write each line to the CSV using the loop
            print(Guitar)
            writer.writerow ([Guitar,Price])

但是我得到这个错误:

File "reverbsolid.py", line 32, in <module>
    Price = soup.find('span',{'class' : 'price-display'}).text.strip()
AttributeError: 'NoneType' object has no attribute 'text'

我可以在页面代码的文本中看到该值:

> <span class="price-display"><!-- react-text: 1023 -->$450<!--
> /react-text --></span>

不知道接下来要尝试什么？

最佳答案

这是我用来实现相同结果的代码的略微修改版本:

import requests
import csv
from bs4 import BeautifulSoup


headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    "Accept_Encoding": "gzip, deflate, br",
    "Accept_Language": "en-GB,en-US;q=0.9,en;q=0.8",
    "Connection": "keep-alive",
    "Upgrade_Insecure_Requests": "1",
    "User_Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
}


# write a CSV file
with open("reverbsolid.csv","w",newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["Guitar","Price"])
    # get the URL of target page
    pages=[]
    for n in range(1,5,1):
        url=("https://reverb.com/marketplace/electric-guitars/solid-body?page={}".format(n))
        # create string for URL
        r = requests.get(url)
        # get the HTML parser
        soup = BeautifulSoup(r.text, "html.parser")
        # get all products
        container = soup.find('ul', class_ = 'tiles tiles--four-wide-max')
        products = container.find_all('li', class_ = 'tiles__tile')
        # loop through container - give a name for the individual component e.g. title.  Text.strip take out the text
        for product in products:
            print(product)
            Guitar = product.find('img', alt=True)
            Price = product.find('span',{'class' : 'price-display'}).text.strip()
            #write each line to the CSV using the loop
            print(Guitar)
            writer.writerow ([Guitar,Price])

结果是:

AttributeError: 'NoneType' object has no attribute 'text'

如果我们研究其中一种产品:

<li class="tiles__tile">
  <div class="grid-card grid-card--placeholder">
    <div class="grid-card__inner">
      <div class="grid-card__main">
    <div class="grid-card__image"></div>
    <div class="grid-card__main__text">
      <div class="grid-card__title"></div>
    </div>
      </div>
      <div class="grid-card__footer">
    <div class="grid-card__footer__pricing">
      <div class="grid-card__price">
      </div>
    </div>
      </div>
    </div>
  </div>
</li>

我们可以看到没有标题或价格。原因是此页面使用javascript填充DOM，而requests不会为您运行javascript，因此您所拥有的都是空白产品位。如果您想要类似的东西，最简单的方法就是使用类似selenium(https://selenium-python.readthedocs.io/)的东西，它将为您运行完整的浏览器。

但是，在这种情况下，有一种更简单的方法来获取所需的信息。所有原始项目数据都包含在名为meta的apollo-state标记中。所以:

import csv
import json
import requests
from bs4 import BeautifulSoup


headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    "Accept_Encoding": "gzip, deflate, br",
    "Accept_Language": "en-GB,en-US;q=0.9,en;q=0.8",
    "Connection": "keep-alive",
    "Upgrade_Insecure_Requests": "1",
    "User_Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
}


# write a CSV file
with open("reverbsolid.csv","w",newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["Guitar","Price"])
    # get the URL of target page
    pages=[]
    for n in range(1,5,1):
        url=("https://reverb.com/marketplace/electric-guitars/solid-body?page={}".format(n))
        # create string for URL
        r = requests.get(url)
        # get the HTML parser
        soup = BeautifulSoup(r.text, "html.parser")
        container = soup.find('meta', {'name': 'apollo-state'})
        container = container['content']
        container = json.loads(container)
        # parse products here

在这里使用container字典应该能给您价格。

注意:这里我将使用selenium解决方案，因为即使执行速度较慢，它也更直观。

关于python - AttributeError : 'NoneType' object has no attribute 'text' , but text is on page，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/49211426/

python - AttributeError : 'NoneType' object has no attribute 'text' , but text is on page

上一篇：php - 尝试运行Behat测试时出错。 usr/bin/env参数无效

下一篇：powershell - Powershell数据库连接错误检测和错误处理