我试图从超市网站获取所有类别的所有产品名称和价格,我发现的所有教程都只是针对一个 const url,我需要遍历所有这些。到目前为止我已经得到了这个
const puppeteer = require('puppeteer');
async function scrapeProduct(url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
const [el2] = await page.$x('//*[@id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/h1/div');
const text2 = await el2.getProperty('textContent');
const name = await text2.jsonValue();
const [el] = await page.$x('//*[@id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/div[2]/div[1]/div[2]/p[1]/em[2]/strong/text()');
const text = await el.getProperty('textContent');
const price = await text.jsonValue();
console.log({name,price});
await browser.close();
}
scrapeProduct('https://www.jumbo.com.ar/gaseosa-sprite-sin-azucar-lima-limon-1-25-lt/p');
仅适用于一个。我使用的是nodejs 和puppeteer。我怎样才能实现这个目标?
最佳答案
您可以尝试 for...of
循环,使用单个浏览器实例和单个页面,以便抓取工具不会使服务器过载:
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
const urls = [
'https://www.jumbo.com.ar/gaseosa-sprite-sin-azucar-lima-limon-1-25-lt/p',
// ...
];
for (const url of urls) {
await page.goto(url);
const [el2] = await page.$x('//*[@id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/h1/div');
const text2 = await el2.getProperty('textContent');
const name = await text2.jsonValue();
const [el] = await page.$x('//*[@id="product-nonfood-page"]/main/div/div/div[1]/div[1]/div/div[2]/div[2]/div[1]/div[2]/p[1]/em[2]/strong/text()');
const text = await el.getProperty('textContent');
const price = await text.jsonValue();
console.log({name,price});
}
await browser.close();
} catch (err) {
console.error(err);
}
})();
关于html - 如何遍历超市网站并获取产品名称和价格?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/64295559/