这就是我想要抓取的内容。
我在“www.google.com/maps”中输入搜索查询,例如“芝加哥花店”。当 Google map 结果中列出芝加哥的所有花店时,我想将 [商店名称] 和 [商店链接,例如 href 链接] 作为数组进行控制台记录。
我在这个项目中使用puppeteer.js
。我的代码能够打开 Chromium 输入我的搜索查询并输入以获取商店列表。但是,我无法仅控制台记录商店名称和链接。这是我的代码。基本上,我认为我很难找到正确的 css 选择器。
如果您能提供帮助,我会很高兴。这是我的代码
const puppeteer = require('puppeteer');
const xlsx = require("xlsx");
// Get the data
async function getPageData (url,page) {
await page.goto(url);
//Shop Name
await page.waitForSelector(".x3AX1-LfntMc-header-title-title span");
const shopName = await page.$eval(".x3AX1-LfntMc-header-title-title span", span => span.textContent);
//Shop Address
await page.waitForSelector(".QSFF4-text.gm2-body-2:nth-child(1)");
const address = await page.$eval(".QSFF4-text.gm2-body-2:nth-child(1)", address => address.textContent);
//Website
await page.waitForSelector(".HY5zDd");
const website = await page.$eval(".HY5zDd", website => website.innerText);
return {
shop: shopName,
address: address,
website: website
}
//await browser.close();
};
//Get Links
async function getLinks() {
const searchQuery = "flower shop chicago";
browser = await puppeteer.launch({ headless: false });
const [page] = await browser.pages();
await page.goto("https://www.google.com/maps/?q=" + searchQuery);
await page.waitForNavigation({ waitUntil: "load" });
// Scrolling to bottom of page
let newScrollHeight = 0;
let scrollHeight = 1000;
while (true) {
await page.waitForSelector("#pane > div > div > div > div > div");
await page.evaluate(
(scrollHeight) =>
document
.querySelectorAll("#pane > div > div > div > div > div")[3]
.querySelector("div")
.scrollTo(0, scrollHeight),
scrollHeight
);
await page.waitForTimeout(200);
newScrollHeight = await page.evaluate(
() =>
document
.querySelectorAll("#pane > div > div > div > div > div")[3]
.querySelector("div").scrollHeight
);
if (scrollHeight === newScrollHeight) {
break;
} else {
scrollHeight = newScrollHeight;
}
}
// Get results
const searchResults = await page.evaluate(() =>
Array.from(document.querySelectorAll("a"))
.map((el) => [el.getAttribute("aria-label"), el.href])
.filter((el) => {
if (!el[0]) return false;
if (el[0] === "Clear search") return false;
return true;
})
);
return searchResults;
}
async function main() {
const allLinks = await getLinks();
//console.log(allLinks);
const browser = await puppeteer.launch({ headless: false });
const page = await browser.pages();
const scrapedData = [];
for(let link of allLinks){
const data = getPageData (link,page);
scrapedData.push(data);
}
console.log(scrapedData);
}
main();
最佳答案
我想这就是你所要求的,
我做了一些更改:
- 注入(inject)
searchQuery
直接进入网址 - 查找所有
<a>
标签而不是特定的选择器,然后过滤这些标签并仅返回有效的标签。 - 使用
page.waitForNavigation
如果您需要允许 cookie,这会很有帮助 - 脚本将继续滚动,直到到达页面底部。
完整代码如下:
const puppeteer = require("puppeteer"); /// import puppeteer from "puppeteer";
const xlsx = require("xlsx");
// Get the data
async function getPageData(url, page) {
await page.goto(url);
//Shop Name
await page.waitForSelector(".x3AX1-LfntMc-header-title-title span");
const shopName = await page.$eval(
"#pane > div > div > div > div > div > div > div > div > h1",
(name) => name?.textContent
);
//Shop Address
await page.waitForSelector(".QSFF4-text.gm2-body-2:nth-child(1)");
let address = await page.$$eval(
"#pane > div > div > div > div > div > div > button > div > div > div",
(divs) =>
Array.from(divs)
.map((div) => div?.innerText)
.find((address) => /United States/g.test(address))
);
if (address === undefined) {
address = await page.$$eval(
"#pane > div > div > div > div > div > div > button > div > div > div",
(divs) => divs[1]
);
}
//Website
await page.waitForSelector(".HY5zDd");
const website = await page.$$eval(
"#pane > div > div > div > div > div > div > button > div > div > div",
(divs) =>
Array.from(divs)
.map((div) => div?.innerText)
.find((link) =>
/^((https?|ftp|smtp):\/\/)?(www.)?[a-z0-9]+(\.[a-z]{2,}){1,3}(#?\/?[a-zA-Z0-9#]+)*\/?(\?[a-zA-Z0-9-_]+=[a-zA-Z0-9-%]+&?)?$/.test(
link
)
)
);
let returnObj = {
shop: shopName?.trim(),
address: address?.trim(),
website: website?.trim(),
};
console.log(returnObj);
return returnObj;
//await browser.close();
}
//Get Links
async function getLinks(page) {
// Scrolling to bottom of page
let newScrollHeight = 0;
let scrollHeight = 1000;
let divSelector = "#pane > div > div > div > div > div:nth-child(4) > div";
while (true) {
await page.waitForSelector(divSelector);
await page.evaluate(
(scrollHeight, divSelector) =>
document.querySelector(divSelector).scrollTo(0, scrollHeight),
scrollHeight,
divSelector
);
await page.waitForTimeout(300);
newScrollHeight = await page.$eval(
divSelector,
(div) => div.scrollHeight
);
if (scrollHeight === newScrollHeight) {
break;
} else {
scrollHeight = newScrollHeight;
}
}
// Get results
const searchResults = await page.evaluate(() =>
Array.from(document.querySelectorAll("a"))
.map((el) => el.href)
.filter(
(link) =>
link.match(/https:\/\/www.google.com\/maps\//g, link) &&
!link.match(/\=https:\/\/www.google.com\/maps\//g, link)
)
);
return searchResults;
}
async function main(searchQuery = "flower shop chicago") {
const browser = await puppeteer.launch({ headless: false });
const [page] = await browser.pages();
await page.goto("https://www.google.com/maps/?q=" + searchQuery);
await page.waitForNavigation({ waitUntil: "domcontentloaded" });
await page.waitForTimeout(4000);
let allLinks = [];
while (
// Check if the 'Next page' button has been disabled
!(await page.$$eval(
"#pane > div > div > div > div > div > div > div",
(elements) =>
Array.from(elements).some(
(el) => el?.innerText === "No results found"
)
))
) {
// If it hasn't go to the next page
allLinks.push(...(await getLinks(page)));
await page.$$eval("button", (elements) =>
Array.from(elements)
.find((el) => el.getAttribute("aria-label") === " Next page ")
.click()
);
await page.waitForNavigation({ waitUntil: "load" });
}
console.log(allLinks);
const scrapedData = [];
for (let link of allLinks) {
const data = await getPageData(link, page);
scrapedData.push(data);
}
console.log(scrapedData);
}
main();
输出如下:(输出是一个包含 200 个项目的数组,我无法在此处显示所有项目)
{
shop: "Donna's Garden Flower Shop - Chicago, IL",
address: '4155 W Peterson Ave, Chicago, IL 60646, United States',
website: 'donnasgarden.com'
}
{
shop: 'Bunches (a flower shop)',
address: '1501 W Fullerton Ave, Chicago, IL 60614, United States',
website: 'buncheschicago.com'
}
{
shop: 'The Flower Shop of Chicago',
address: '2246 W Taylor St, Chicago, IL 60612, United States',
website: 'flowershopofchicago.com'
}
{
shop: "Kelly's Flower Shop",
address: '175 W Jackson Blvd, Chicago, IL 60604, United States',
website: 'kellysflowershop.com'
}
{
shop: 'Chicago Florist - Send Flowers',
address: undefined,
website: 'samedayflowerdeliverychicago.com'
}
{
shop: 'Chicago Flower',
address: '541 N Fairbanks Ct, Chicago, IL 60611, United States',
website: 'chicagosmarcelflorist.com'
}
{
shop: "Steve's Flower Market",
address: '1039 W Grand Ave, Chicago, IL 60642, United States',
website: 'pos.floranext.com'
}
{
shop: 'Bloom Floral Shop | Same Day Flower Delivery Chicago, IL | Best Chicago Florist',
address: undefined,
website: 'bloomfloralshop.com'
}
{
shop: 'Ashland Addison Florist - Lakeview',
address: '3118 N Lincoln Ave, Chicago, IL 60613, United States',
website: 'ashaddflorist.com'
}
{
shop: "Goldie's Flower Shop",
address: '901 W Irving Park Rd, Chicago, IL 60613, United States',
website: 'goldiesflowershop.com'
}
{
shop: 'Tea Rose Flower Shop',
address: '5203 N Kimball Ave, Chicago, IL 60625, United States',
website: 'tearosechicago.com'
}
{
shop: 'Designs by Ming: Florist & Flower Delivery - Chicago IL Florist, Wedding Flowers Arrangement, Custom Design Flower Shop',
address: '230 E Ontario St #2401, Chicago, IL 60611, United States',
website: 'yellowpages.com'
}
{
shop: 'Crystal Flower Shop, Inc.',
address: '2815 S Kedzie Ave, Chicago, IL 60623, United States',
website: 'doordash.com'
}
{
shop: "Wall's Flower Shop, Inc.",
address: '5862 W Higgins Ave, Chicago, IL 60630, United States',
website: 'wallsflowershop.com'
}
{
shop: 'Fleur de Lis Florist',
address: '715 N Franklin St, Chicago, IL 60654, United States',
website: 'fleurdelischicago.com'
}
{
shop: 'Secret Garden Flower Shop',
address: '3910 W 71st St, Chicago, IL 60629, United States',
website: 'secretgardenflowershopil.com'
}
{
shop: 'Marguerite Gardens Florist',
address: '2444 W Chicago Ave, Chicago, IL 60622, United States',
website: 'flowerpowerchicgo.com'
}
{
shop: "Leo's Metropolitan Florist",
address: '407 E 71st St, Chicago, IL 60619, United States',
website: 'doordash.com'
}
{
shop: 'Bonnie Flower Shop Inc',
address: '3400 W Irving Park Rd, Chicago, IL 60618, United States',
website: 'doordash.com'
}
{
shop: 'Flora Chicago',
address: '2835 N Southport Ave, Chicago, IL 60657, United States',
website: 'florachicago.com'
}
{
shop: "Donna's Garden Flower Shop - Chicago, IL",
address: '4155 W Peterson Ave, Chicago, IL 60646, United States',
website: 'donnasgarden.com'
}
{
shop: 'Bunches (a flower shop)',
address: '1501 W Fullerton Ave, Chicago, IL 60614, United States',
website: 'buncheschicago.com'
}
{
shop: 'The Flower Shop of Chicago',
address: '2246 W Taylor St, Chicago, IL 60612, United States',
website: 'flowershopofchicago.com'
}
{
shop: "Kelly's Flower Shop",
address: '175 W Jackson Blvd, Chicago, IL 60604, United States',
website: 'kellysflowershop.com'
}
{
shop: 'Chicago Florist - Send Flowers',
address: undefined,
website: 'samedayflowerdeliverychicago.com'
}
{
shop: 'Chicago Flower',
address: '541 N Fairbanks Ct, Chicago, IL 60611, United States',
website: 'chicagosmarcelflorist.com'
}
{
shop: "Steve's Flower Market",
address: '1039 W Grand Ave, Chicago, IL 60642, United States',
website: 'pos.floranext.com'
}
{
shop: 'Bloom Floral Shop | Same Day Flower Delivery Chicago, IL | Best Chicago Florist',
address: undefined,
website: 'bloomfloralshop.com'
}
{
shop: 'Ashland Addison Florist - Lakeview',
address: '3118 N Lincoln Ave, Chicago, IL 60613, United States',
website: 'ashaddflorist.com'
}
{
shop: "Goldie's Flower Shop",
address: '901 W Irving Park Rd, Chicago, IL 60613, United States',
website: 'goldiesflowershop.com'
}
{
shop: 'Tea Rose Flower Shop',
address: '5203 N Kimball Ave, Chicago, IL 60625, United States',
website: 'tearosechicago.com'
}
{
shop: 'Designs by Ming: Florist & Flower Delivery - Chicago IL Florist, Wedding Flowers Arrangement, Custom Design Flower Shop',
address: '230 E Ontario St #2401, Chicago, IL 60611, United States',
website: 'yellowpages.com'
}
{
shop: 'Crystal Flower Shop, Inc.',
address: '2815 S Kedzie Ave, Chicago, IL 60623, United States',
website: 'doordash.com'
}
{
shop: "Wall's Flower Shop, Inc.",
address: '5862 W Higgins Ave, Chicago, IL 60630, United States',
website: 'wallsflowershop.com'
}
{
shop: 'Fleur de Lis Florist',
address: '715 N Franklin St, Chicago, IL 60654, United States',
website: 'fleurdelischicago.com'
}
{
shop: 'Secret Garden Flower Shop',
address: '3910 W 71st St, Chicago, IL 60629, United States',
website: 'secretgardenflowershopil.com'
}
{
shop: 'Marguerite Gardens Florist',
address: '2444 W Chicago Ave, Chicago, IL 60622, United States',
website: 'flowerpowerchicgo.com'
}
{
shop: "Leo's Metropolitan Florist",
address: '407 E 71st St, Chicago, IL 60619, United States',
website: 'doordash.com'
}
{
shop: 'Bonnie Flower Shop Inc',
address: '3400 W Irving Park Rd, Chicago, IL 60618, United States',
website: 'doordash.com'
}
{
shop: 'Flora Chicago',
address: '2835 N Southport Ave, Chicago, IL 60657, United States',
website: 'florachicago.com'
}
{
shop: "Donna's Garden Flower Shop - Chicago, IL",
address: '4155 W Peterson Ave, Chicago, IL 60646, United States',
website: 'donnasgarden.com'
}
{
shop: 'Bunches (a flower shop)',
address: '1501 W Fullerton Ave, Chicago, IL 60614, United States',
website: 'buncheschicago.com'
}
{
shop: 'The Flower Shop of Chicago',
address: '2246 W Taylor St, Chicago, IL 60612, United States',
website: 'flowershopofchicago.com'
}
{
shop: "Kelly's Flower Shop",
address: '175 W Jackson Blvd, Chicago, IL 60604, United States',
website: 'kellysflowershop.com'
}
{
shop: 'Chicago Florist - Send Flowers',
address: undefined,
website: 'samedayflowerdeliverychicago.com'
}
{
shop: 'Chicago Flower',
address: '541 N Fairbanks Ct, Chicago, IL 60611, United States',
website: 'chicagosmarcelflorist.com'
}
{
shop: "Steve's Flower Market",
address: '1039 W Grand Ave, Chicago, IL 60642, United States',
website: 'pos.floranext.com'
}
{
shop: 'Bloom Floral Shop | Same Day Flower Delivery Chicago, IL | Best Chicago Florist',
address: undefined,
website: 'bloomfloralshop.com'
}
{
shop: 'Ashland Addison Florist - Lakeview',
address: '3118 N Lincoln Ave, Chicago, IL 60613, United States',
website: 'ashaddflorist.com'
}
{
shop: "Goldie's Flower Shop",
address: '901 W Irving Park Rd, Chicago, IL 60613, United States',
website: 'goldiesflowershop.com'
}
{
shop: 'Tea Rose Flower Shop',
address: '5203 N Kimball Ave, Chicago, IL 60625, United States',
website: 'tearosechicago.com'
}
{
shop: 'Designs by Ming: Florist & Flower Delivery - Chicago IL Florist, Wedding Flowers Arrangement, Custom Design Flower Shop',
address: '230 E Ontario St #2401, Chicago, IL 60611, United States',
website: 'yellowpages.com'
}
{
shop: 'Crystal Flower Shop, Inc.',
address: '2815 S Kedzie Ave, Chicago, IL 60623, United States',
website: 'doordash.com'
}
{
shop: "Wall's Flower Shop, Inc.",
address: '5862 W Higgins Ave, Chicago, IL 60630, United States',
website: 'wallsflowershop.com'
}
{
shop: 'Fleur de Lis Florist',
address: '715 N Franklin St, Chicago, IL 60654, United States',
website: 'fleurdelischicago.com'
}
{
shop: 'Secret Garden Flower Shop',
address: '3910 W 71st St, Chicago, IL 60629, United States',
website: 'secretgardenflowershopil.com'
}
{
shop: 'Marguerite Gardens Florist',
address: '2444 W Chicago Ave, Chicago, IL 60622, United States',
website: 'flowerpowerchicgo.com'
}
{
shop: "Leo's Metropolitan Florist",
address: '407 E 71st St, Chicago, IL 60619, United States',
website: 'doordash.com'
}
{
shop: 'Bonnie Flower Shop Inc',
address: '3400 W Irving Park Rd, Chicago, IL 60618, United States',
website: 'doordash.com'
}
{
shop: 'Flora Chicago',
address: '2835 N Southport Ave, Chicago, IL 60657, United States',
website: 'florachicago.com'
}
{
shop: "Donna's Garden Flower Shop - Chicago, IL",
address: '4155 W Peterson Ave, Chicago, IL 60646, United States',
website: 'donnasgarden.com'
}
{
shop: 'Bunches (a flower shop)',
address: '1501 W Fullerton Ave, Chicago, IL 60614, United States',
website: 'buncheschicago.com'
}
{
shop: 'The Flower Shop of Chicago',
address: '2246 W Taylor St, Chicago, IL 60612, United States',
website: 'flowershopofchicago.com'
}
{
shop: "Kelly's Flower Shop",
address: '175 W Jackson Blvd, Chicago, IL 60604, United States',
website: 'kellysflowershop.com'
}
{
shop: 'Chicago Florist - Send Flowers',
address: undefined,
website: 'samedayflowerdeliverychicago.com'
}
{
shop: 'Chicago Flower',
address: '541 N Fairbanks Ct, Chicago, IL 60611, United States',
website: 'chicagosmarcelflorist.com'
}
{
shop: "Steve's Flower Market",
address: '1039 W Grand Ave, Chicago, IL 60642, United States',
website: 'pos.floranext.com'
}
{
shop: 'Bloom Floral Shop | Same Day Flower Delivery Chicago, IL | Best Chicago Florist',
address: undefined,
website: 'bloomfloralshop.com'
}
{
shop: 'Ashland Addison Florist - Lakeview',
address: '3118 N Lincoln Ave, Chicago, IL 60613, United States',
website: 'ashaddflorist.com'
}
{
shop: "Goldie's Flower Shop",
address: '901 W Irving Park Rd, Chicago, IL 60613, United States',
website: 'goldiesflowershop.com'
}
{
shop: 'Tea Rose Flower Shop',
address: '5203 N Kimball Ave, Chicago, IL 60625, United States',
website: 'tearosechicago.com'
}
{
shop: 'Designs by Ming: Florist & Flower Delivery - Chicago IL Florist, Wedding Flowers Arrangement, Custom Design Flower Shop',
address: '230 E Ontario St #2401, Chicago, IL 60611, United States',
website: 'yellowpages.com'
}
{
shop: 'Crystal Flower Shop, Inc.',
address: '2815 S Kedzie Ave, Chicago, IL 60623, United States',
website: 'doordash.com'
}
{
shop: "Wall's Flower Shop, Inc.",
address: '5862 W Higgins Ave, Chicago, IL 60630, United States',
website: 'wallsflowershop.com'
}
{
shop: 'Fleur de Lis Florist',
address: '715 N Franklin St, Chicago, IL 60654, United States',
website: 'fleurdelischicago.com'
}
{
shop: 'Secret Garden Flower Shop',
address: '3910 W 71st St, Chicago, IL 60629, United States',
website: 'secretgardenflowershopil.com'
}
{
shop: 'Marguerite Gardens Florist',
address: '2444 W Chicago Ave, Chicago, IL 60622, United States',
website: 'flowerpowerchicgo.com'
}
{
shop: "Leo's Metropolitan Florist",
address: '407 E 71st St, Chicago, IL 60619, United States',
website: 'doordash.com'
}
{
shop: 'Bonnie Flower Shop Inc',
address: '3400 W Irving Park Rd, Chicago, IL 60618, United States',
website: 'doordash.com'
}
{
shop: 'Flora Chicago',
address: '2835 N Southport Ave, Chicago, IL 60657, United States',
website: 'florachicago.com'
}
{
shop: "Donna's Garden Flower Shop - Chicago, IL",
address: '4155 W Peterson Ave, Chicago, IL 60646, United States',
website: 'donnasgarden.com'
}
{
shop: 'Bunches (a flower shop)',
address: '1501 W Fullerton Ave, Chicago, IL 60614, United States',
website: 'buncheschicago.com'
}
{
shop: 'The Flower Shop of Chicago',
address: '2246 W Taylor St, Chicago, IL 60612, United States',
website: 'flowershopofchicago.com'
}
{
shop: "Kelly's Flower Shop",
address: '175 W Jackson Blvd, Chicago, IL 60604, United States',
website: 'kellysflowershop.com'
}
{
shop: 'Chicago Florist - Send Flowers',
address: undefined,
website: 'samedayflowerdeliverychicago.com'
}
{
shop: 'Chicago Flower',
address: '541 N Fairbanks Ct, Chicago, IL 60611, United States',
website: 'chicagosmarcelflorist.com'
}
{
shop: "Steve's Flower Market",
address: '1039 W Grand Ave, Chicago, IL 60642, United States',
website: 'pos.floranext.com'
}
{
shop: 'Bloom Floral Shop | Same Day Flower Delivery Chicago, IL | Best Chicago Florist',
address: undefined,
website: 'bloomfloralshop.com'
}
{
shop: 'Ashland Addison Florist - Lakeview',
address: '3118 N Lincoln Ave, Chicago, IL 60613, United States',
website: 'ashaddflorist.com'
}
{
shop: "Goldie's Flower Shop",
address: '901 W Irving Park Rd, Chicago, IL 60613, United States',
website: 'goldiesflowershop.com'
}
{
shop: 'Tea Rose Flower Shop',
address: '5203 N Kimball Ave, Chicago, IL 60625, United States',
website: 'tearosechicago.com'
}
其他更改
- 使用更通用的选择器来获取名称、地址和网站
- 不断获取结果,直到从所有页面获取数据
- 验证字段以确保数据准确
- 将与当前 puppeteer 实例相关的所有内容移至
main
- 如果商店未明确提供地址,则将地址设置为未定义
关于node.js - 使用 Puppeteer 抓取 Google map 搜索结果链接,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/68597758/