xlsxwriter 无法递归创建文件,任何人都可以看一下吗?
import scrapy
import xlsxwriter
class QuotesSpider(scrapy.Spider):
name = "quotes"
def start_requests(self):
start_urls = [
'https://www.hotelgg.com/venue/mittitlt/',
]
创建工作簿
filename = 'hotel-list.xlsx'
wb = xlsxwriter.Workbook(filename)
self.wb = wb
if start_urls[0] == 'https://www.hotelgg.com/venue/mittitlt/':
self.ws = wb.add_worksheet("nanshan")
已发送抓取请求
yield scrapy.Request(url=start_urls[0], callback=self.parse)
def parse(self, response):
ws = self.ws
i = 0
# parse response
for quote in response.css('ul.hotel_list div.info'):
item = {
'name': quote.css('h3.title a::text').extract_first(),
'region': quote.css('span.region::text').extract_first(),
'street': quote.css('span.street::text').extract_first(),
'space': quote.css('span.meetingroom_space_range::text').extract(),
}
将行写入 Excel
ws.write_string(i, 0, item['name'])
ws.write_string(i, 1, item['region'])
ws.write_string(i, 2, item['street'])
if item['space']:
ws.write_string(i, 3, item['space'][1])
else:
ws.write_string(i, 3, '0')
i += 1
获取下一页进行抓取
next_page = response.css('div.pager a:last-child::attr(href)').extract_first()
self.log(next_page)
if next_page is not None:
next_page = response.urljoin(next_page)
已发送下一页请求
yield scrapy.Request(next_page, callback=self.parse)
最佳答案
尝试这样的事情:
def process_item(self, item, spider):
for key, value in item.items():
if value is None or value is "":
item[key] = "-"
if item['yield_type'] == 'product':
self.prod_row += 1
self.products.write_string( "A%s" % self.prod_row, item["breadcrumb"] )
self.products.write_string( "B%s" % self.prod_row, item["last_category"] )
self.products.write_string( "C%s" % self.prod_row, item["product_href"] )
if item['yield_type'] == 'profile':
self.prof_row += 1
self.profiles.write_string( "A%s" % self.prof_row, item["profile_category"] )
self.profiles.write_string( "B%s" % self.prof_row, item["company_name"] )
self.profiles.write_string( "C%s" % self.prof_row, item["company_href"] )
将数据保存在工作簿的不同工作表中。
xlsxwriter 在发送 null 值时显示错误,因此请确保使用某些内容代替 null/空白:
if value is None or value is "":
item[key] = "-"
关于python - 在递归中使用 scrapy 回调时 xlsxwriter 无法创建文件,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/52934939/