node.js - 使用 Puppeteer 抓取 Google map 搜索结果链接

标签 node.js web-scraping puppeteer

这就是我想要抓取的内容。

我在“www.google.com/maps”中输入搜索查询,例如“芝加哥花店”。当 Google map 结果中列出芝加哥的所有花店时,我想将 [商店名称] 和 [商店链接,例如 href 链接] 作为数组进行控制台记录。

我在这个项目中使用puppeteer.js。我的代码能够打开 Chromium 输入我的搜索查询并输入以获取商店列表。但是,我无法仅控制台记录商店名称和链接。这是我的代码。基本上,我认为我很难找到正确的 css 选择器。

如果您能提供帮助,我会很高兴。这是我的代码

const puppeteer = require('puppeteer');
const xlsx = require("xlsx");

// Get the data
async function getPageData (url,page) {

  await page.goto(url);
 
  //Shop Name
  await page.waitForSelector(".x3AX1-LfntMc-header-title-title span");
  const shopName = await page.$eval(".x3AX1-LfntMc-header-title-title span", span => span.textContent);
  
  //Shop Address
  await page.waitForSelector(".QSFF4-text.gm2-body-2:nth-child(1)");
  const address = await page.$eval(".QSFF4-text.gm2-body-2:nth-child(1)", address => address.textContent);
  
  //Website
  await page.waitForSelector(".HY5zDd");
  const website = await page.$eval(".HY5zDd", website => website.innerText);

  return {
      shop: shopName,
      address: address,
      website: website
  }


  //await browser.close();
};

//Get Links

async function getLinks() {
    const searchQuery = "flower shop chicago";

    browser = await puppeteer.launch({ headless: false });
    const [page] = await browser.pages();

    await page.goto("https://www.google.com/maps/?q=" + searchQuery);
    await page.waitForNavigation({ waitUntil: "load" });

    // Scrolling to bottom of page
    let newScrollHeight = 0;
    let scrollHeight = 1000;

    while (true) {
        await page.waitForSelector("#pane > div > div > div > div > div");

        await page.evaluate(
            (scrollHeight) =>
                document
                    .querySelectorAll("#pane > div > div > div > div > div")[3]
                    .querySelector("div")
                    .scrollTo(0, scrollHeight),
            scrollHeight
        );

        await page.waitForTimeout(200);

        newScrollHeight = await page.evaluate(
            () =>
                document
                    .querySelectorAll("#pane > div > div > div > div > div")[3]
                    .querySelector("div").scrollHeight
        );

        if (scrollHeight === newScrollHeight) {
            break;
        } else {
            scrollHeight = newScrollHeight;
        }
    }

    // Get results
    const searchResults = await page.evaluate(() =>
        Array.from(document.querySelectorAll("a"))
            .map((el) => [el.getAttribute("aria-label"), el.href])
            .filter((el) => {
                if (!el[0]) return false;
                if (el[0] === "Clear search") return false;
                return true;
            })
    );

    return searchResults;
    
}

async function main() {


    const allLinks = await getLinks();
    //console.log(allLinks);

    const browser = await puppeteer.launch({ headless: false });
    const page = await browser.pages();
    const scrapedData = [];

    for(let link of allLinks){

        const data = getPageData (link,page);
        scrapedData.push(data);
    }

     console.log(scrapedData);  
   
}

main();

最佳答案

我想这就是你所要求的,

我做了一些更改:

  • 注入(inject) searchQuery直接进入网址
  • 查找所有<a>标签而不是特定的选择器,然后过滤这些标签并仅返回有效的标签。
  • 使用page.waitForNavigation如果您需要允许 cookie,这会很有帮助
  • 脚本将继续滚动,直到到达页面底部。

完整代码如下:

const puppeteer = require("puppeteer"); /// import puppeteer from "puppeteer";
const xlsx = require("xlsx");

// Get the data
async function getPageData(url, page) {
    await page.goto(url);

    //Shop Name
    await page.waitForSelector(".x3AX1-LfntMc-header-title-title span");
    const shopName = await page.$eval(
        "#pane > div > div > div > div > div > div > div > div > h1",
        (name) => name?.textContent
    );

    //Shop Address
    await page.waitForSelector(".QSFF4-text.gm2-body-2:nth-child(1)");
    let address = await page.$$eval(
        "#pane > div > div > div > div > div > div > button > div > div > div",
        (divs) =>
            Array.from(divs)
                .map((div) => div?.innerText)
                .find((address) => /United States/g.test(address))
    );

    if (address === undefined) {
        address = await page.$$eval(
            "#pane > div > div > div > div > div > div > button > div > div > div",
            (divs) => divs[1]
        );
    }

    //Website
    await page.waitForSelector(".HY5zDd");
    const website = await page.$$eval(
        "#pane > div > div > div > div > div > div > button > div > div > div",
        (divs) =>
            Array.from(divs)
                .map((div) => div?.innerText)
                .find((link) =>
                    /^((https?|ftp|smtp):\/\/)?(www.)?[a-z0-9]+(\.[a-z]{2,}){1,3}(#?\/?[a-zA-Z0-9#]+)*\/?(\?[a-zA-Z0-9-_]+=[a-zA-Z0-9-%]+&?)?$/.test(
                        link
                    )
                )
    );

    let returnObj = {
        shop: shopName?.trim(),
        address: address?.trim(),
        website: website?.trim(),
    };

    console.log(returnObj);

    return returnObj;
    //await browser.close();
}

//Get Links

async function getLinks(page) {
    // Scrolling to bottom of page
    let newScrollHeight = 0;
    let scrollHeight = 1000;
    let divSelector = "#pane > div > div > div > div > div:nth-child(4) > div";

    while (true) {
        await page.waitForSelector(divSelector);

        await page.evaluate(
            (scrollHeight, divSelector) =>
                document.querySelector(divSelector).scrollTo(0, scrollHeight),
            scrollHeight,
            divSelector
        );

        await page.waitForTimeout(300);

        newScrollHeight = await page.$eval(
            divSelector,
            (div) => div.scrollHeight
        );

        if (scrollHeight === newScrollHeight) {
            break;
        } else {
            scrollHeight = newScrollHeight;
        }
    }

    // Get results
    const searchResults = await page.evaluate(() =>
        Array.from(document.querySelectorAll("a"))
            .map((el) => el.href)
            .filter(
                (link) =>
                    link.match(/https:\/\/www.google.com\/maps\//g, link) &&
                    !link.match(/\=https:\/\/www.google.com\/maps\//g, link)
            )
    );

    return searchResults;
}

async function main(searchQuery = "flower shop chicago") {
    const browser = await puppeteer.launch({ headless: false });
    const [page] = await browser.pages();

    await page.goto("https://www.google.com/maps/?q=" + searchQuery);
    await page.waitForNavigation({ waitUntil: "domcontentloaded" });
    await page.waitForTimeout(4000);

    let allLinks = [];

    while (
        // Check if the 'Next page' button has been disabled
        !(await page.$$eval(
            "#pane > div > div > div > div > div > div > div",
            (elements) =>
                Array.from(elements).some(
                    (el) => el?.innerText === "No results found"
                )
        ))
    ) {
        // If it hasn't go to the next page
        allLinks.push(...(await getLinks(page)));

        await page.$$eval("button", (elements) =>
            Array.from(elements)
                .find((el) => el.getAttribute("aria-label") === " Next page ")
                .click()
        );

        await page.waitForNavigation({ waitUntil: "load" });
    }

    console.log(allLinks);

    const scrapedData = [];

    for (let link of allLinks) {
        const data = await getPageData(link, page);
        scrapedData.push(data);
    }

    console.log(scrapedData);
}

main();

输出如下:(输出是一个包含 200 个项目的数组,我无法在此处显示所有项目)

{
  shop: "Donna's Garden Flower Shop - Chicago, IL",
  address: '4155 W Peterson Ave, Chicago, IL 60646, United States',
  website: 'donnasgarden.com'
}
{
  shop: 'Bunches (a flower shop)',
  address: '1501 W Fullerton Ave, Chicago, IL 60614, United States',
  website: 'buncheschicago.com'
}
{
  shop: 'The Flower Shop of Chicago',
  address: '2246 W Taylor St, Chicago, IL 60612, United States',
  website: 'flowershopofchicago.com'
}
{
  shop: "Kelly's Flower Shop",
  address: '175 W Jackson Blvd, Chicago, IL 60604, United States',
  website: 'kellysflowershop.com'
}
{
  shop: 'Chicago Florist - Send Flowers',
  address: undefined,
  website: 'samedayflowerdeliverychicago.com'
}
{
  shop: 'Chicago Flower',
  address: '541 N Fairbanks Ct, Chicago, IL 60611, United States',
  website: 'chicagosmarcelflorist.com'
}
{
  shop: "Steve's Flower Market",
  address: '1039 W Grand Ave, Chicago, IL 60642, United States',
  website: 'pos.floranext.com'
}
{
  shop: 'Bloom Floral Shop | Same Day Flower Delivery Chicago, IL | Best Chicago Florist',    
  address: undefined,
  website: 'bloomfloralshop.com'
}
{
  shop: 'Ashland Addison Florist - Lakeview',
  address: '3118 N Lincoln Ave, Chicago, IL 60613, United States',
  website: 'ashaddflorist.com'
}
{
  shop: "Goldie's Flower Shop",
  address: '901 W Irving Park Rd, Chicago, IL 60613, United States',
  website: 'goldiesflowershop.com'
}
{
  shop: 'Tea Rose Flower Shop',
  address: '5203 N Kimball Ave, Chicago, IL 60625, United States',
  website: 'tearosechicago.com'
}
{
  shop: 'Designs by Ming: Florist & Flower Delivery - Chicago IL Florist, Wedding Flowers Arrangement, Custom Design Flower Shop',
  address: '230 E Ontario St #2401, Chicago, IL 60611, United States',
  website: 'yellowpages.com'
}
{
  shop: 'Crystal Flower Shop, Inc.',
  address: '2815 S Kedzie Ave, Chicago, IL 60623, United States',
  website: 'doordash.com'
}
{
  shop: "Wall's Flower Shop, Inc.",
  address: '5862 W Higgins Ave, Chicago, IL 60630, United States',
  website: 'wallsflowershop.com'
}
{
  shop: 'Fleur de Lis Florist',
  address: '715 N Franklin St, Chicago, IL 60654, United States',
  website: 'fleurdelischicago.com'
}
{
  shop: 'Secret Garden Flower Shop',
  address: '3910 W 71st St, Chicago, IL 60629, United States',
  website: 'secretgardenflowershopil.com'
}
{
  shop: 'Marguerite Gardens Florist',
  address: '2444 W Chicago Ave, Chicago, IL 60622, United States',
  website: 'flowerpowerchicgo.com'
}
{
  shop: "Leo's Metropolitan Florist",
  address: '407 E 71st St, Chicago, IL 60619, United States',
  website: 'doordash.com'
}
{
  shop: 'Bonnie Flower Shop Inc',
  address: '3400 W Irving Park Rd, Chicago, IL 60618, United States',
  website: 'doordash.com'
}
{
  shop: 'Flora Chicago',
  address: '2835 N Southport Ave, Chicago, IL 60657, United States',
  website: 'florachicago.com'
}
{
  shop: "Donna's Garden Flower Shop - Chicago, IL",
  address: '4155 W Peterson Ave, Chicago, IL 60646, United States',
  website: 'donnasgarden.com'
}
{
  shop: 'Bunches (a flower shop)',
  address: '1501 W Fullerton Ave, Chicago, IL 60614, United States',
  website: 'buncheschicago.com'
}
{
  shop: 'The Flower Shop of Chicago',
  address: '2246 W Taylor St, Chicago, IL 60612, United States',
  website: 'flowershopofchicago.com'
}
{
  shop: "Kelly's Flower Shop",
  address: '175 W Jackson Blvd, Chicago, IL 60604, United States',
  website: 'kellysflowershop.com'
}
{
  shop: 'Chicago Florist - Send Flowers',
  address: undefined,
  website: 'samedayflowerdeliverychicago.com'
}
{
  shop: 'Chicago Flower',
  address: '541 N Fairbanks Ct, Chicago, IL 60611, United States',
  website: 'chicagosmarcelflorist.com'
}
{
  shop: "Steve's Flower Market",
  address: '1039 W Grand Ave, Chicago, IL 60642, United States',
  website: 'pos.floranext.com'
}
{
  shop: 'Bloom Floral Shop | Same Day Flower Delivery Chicago, IL | Best Chicago Florist',    
  address: undefined,
  website: 'bloomfloralshop.com'
}
{
  shop: 'Ashland Addison Florist - Lakeview',
  address: '3118 N Lincoln Ave, Chicago, IL 60613, United States',
  website: 'ashaddflorist.com'
}
{
  shop: "Goldie's Flower Shop",
  address: '901 W Irving Park Rd, Chicago, IL 60613, United States',
  website: 'goldiesflowershop.com'
}
{
  shop: 'Tea Rose Flower Shop',
  address: '5203 N Kimball Ave, Chicago, IL 60625, United States',
  website: 'tearosechicago.com'
}
{
  shop: 'Designs by Ming: Florist & Flower Delivery - Chicago IL Florist, Wedding Flowers Arrangement, Custom Design Flower Shop',
  address: '230 E Ontario St #2401, Chicago, IL 60611, United States',
  website: 'yellowpages.com'
}
{
  shop: 'Crystal Flower Shop, Inc.',
  address: '2815 S Kedzie Ave, Chicago, IL 60623, United States',
  website: 'doordash.com'
}
{
  shop: "Wall's Flower Shop, Inc.",
  address: '5862 W Higgins Ave, Chicago, IL 60630, United States',
  website: 'wallsflowershop.com'
}
{
  shop: 'Fleur de Lis Florist',
  address: '715 N Franklin St, Chicago, IL 60654, United States',
  website: 'fleurdelischicago.com'
}
{
  shop: 'Secret Garden Flower Shop',
  address: '3910 W 71st St, Chicago, IL 60629, United States',
  website: 'secretgardenflowershopil.com'
}
{
  shop: 'Marguerite Gardens Florist',
  address: '2444 W Chicago Ave, Chicago, IL 60622, United States',
  website: 'flowerpowerchicgo.com'
}
{
  shop: "Leo's Metropolitan Florist",
  address: '407 E 71st St, Chicago, IL 60619, United States',
  website: 'doordash.com'
}
{
  shop: 'Bonnie Flower Shop Inc',
  address: '3400 W Irving Park Rd, Chicago, IL 60618, United States',
  website: 'doordash.com'
}
{
  shop: 'Flora Chicago',
  address: '2835 N Southport Ave, Chicago, IL 60657, United States',
  website: 'florachicago.com'
}
{
  shop: "Donna's Garden Flower Shop - Chicago, IL",
  address: '4155 W Peterson Ave, Chicago, IL 60646, United States',
  website: 'donnasgarden.com'
}
{
  shop: 'Bunches (a flower shop)',
  address: '1501 W Fullerton Ave, Chicago, IL 60614, United States',
  website: 'buncheschicago.com'
}
{
  shop: 'The Flower Shop of Chicago',
  address: '2246 W Taylor St, Chicago, IL 60612, United States',
  website: 'flowershopofchicago.com'
}
{
  shop: "Kelly's Flower Shop",
  address: '175 W Jackson Blvd, Chicago, IL 60604, United States',
  website: 'kellysflowershop.com'
}
{
  shop: 'Chicago Florist - Send Flowers',
  address: undefined,
  website: 'samedayflowerdeliverychicago.com'
}
{
  shop: 'Chicago Flower',
  address: '541 N Fairbanks Ct, Chicago, IL 60611, United States',
  website: 'chicagosmarcelflorist.com'
}
{
  shop: "Steve's Flower Market",
  address: '1039 W Grand Ave, Chicago, IL 60642, United States',
  website: 'pos.floranext.com'
}
{
  shop: 'Bloom Floral Shop | Same Day Flower Delivery Chicago, IL | Best Chicago Florist',    
  address: undefined,
  website: 'bloomfloralshop.com'
}
{
  shop: 'Ashland Addison Florist - Lakeview',
  address: '3118 N Lincoln Ave, Chicago, IL 60613, United States',
  website: 'ashaddflorist.com'
}
{
  shop: "Goldie's Flower Shop",
  address: '901 W Irving Park Rd, Chicago, IL 60613, United States',
  website: 'goldiesflowershop.com'
}
{
  shop: 'Tea Rose Flower Shop',
  address: '5203 N Kimball Ave, Chicago, IL 60625, United States',
  website: 'tearosechicago.com'
}
{
  shop: 'Designs by Ming: Florist & Flower Delivery - Chicago IL Florist, Wedding Flowers Arrangement, Custom Design Flower Shop',
  address: '230 E Ontario St #2401, Chicago, IL 60611, United States',
  website: 'yellowpages.com'
}
{
  shop: 'Crystal Flower Shop, Inc.',
  address: '2815 S Kedzie Ave, Chicago, IL 60623, United States',
  website: 'doordash.com'
}
{
  shop: "Wall's Flower Shop, Inc.",
  address: '5862 W Higgins Ave, Chicago, IL 60630, United States',
  website: 'wallsflowershop.com'
}
{
  shop: 'Fleur de Lis Florist',
  address: '715 N Franklin St, Chicago, IL 60654, United States',
  website: 'fleurdelischicago.com'
}
{
  shop: 'Secret Garden Flower Shop',
  address: '3910 W 71st St, Chicago, IL 60629, United States',
  website: 'secretgardenflowershopil.com'
}
{
  shop: 'Marguerite Gardens Florist',
  address: '2444 W Chicago Ave, Chicago, IL 60622, United States',
  website: 'flowerpowerchicgo.com'
}
{
  shop: "Leo's Metropolitan Florist",
  address: '407 E 71st St, Chicago, IL 60619, United States',
  website: 'doordash.com'
}
{
  shop: 'Bonnie Flower Shop Inc',
  address: '3400 W Irving Park Rd, Chicago, IL 60618, United States',
  website: 'doordash.com'
}
{
  shop: 'Flora Chicago',
  address: '2835 N Southport Ave, Chicago, IL 60657, United States',
  website: 'florachicago.com'
}
{
  shop: "Donna's Garden Flower Shop - Chicago, IL",
  address: '4155 W Peterson Ave, Chicago, IL 60646, United States',
  website: 'donnasgarden.com'
}
{
  shop: 'Bunches (a flower shop)',
  address: '1501 W Fullerton Ave, Chicago, IL 60614, United States',
  website: 'buncheschicago.com'
}
{
  shop: 'The Flower Shop of Chicago',
  address: '2246 W Taylor St, Chicago, IL 60612, United States',
  website: 'flowershopofchicago.com'
}
{
  shop: "Kelly's Flower Shop",
  address: '175 W Jackson Blvd, Chicago, IL 60604, United States',
  website: 'kellysflowershop.com'
}
{
  shop: 'Chicago Florist - Send Flowers',
  address: undefined,
  website: 'samedayflowerdeliverychicago.com'
}
{
  shop: 'Chicago Flower',
  address: '541 N Fairbanks Ct, Chicago, IL 60611, United States',
  website: 'chicagosmarcelflorist.com'
}
{
  shop: "Steve's Flower Market",
  address: '1039 W Grand Ave, Chicago, IL 60642, United States',
  website: 'pos.floranext.com'
}
{
  shop: 'Bloom Floral Shop | Same Day Flower Delivery Chicago, IL | Best Chicago Florist',    
  address: undefined,
  website: 'bloomfloralshop.com'
}
{
  shop: 'Ashland Addison Florist - Lakeview',
  address: '3118 N Lincoln Ave, Chicago, IL 60613, United States',
  website: 'ashaddflorist.com'
}
{
  shop: "Goldie's Flower Shop",
  address: '901 W Irving Park Rd, Chicago, IL 60613, United States',
  website: 'goldiesflowershop.com'
}
{
  shop: 'Tea Rose Flower Shop',
  address: '5203 N Kimball Ave, Chicago, IL 60625, United States',
  website: 'tearosechicago.com'
}
{
  shop: 'Designs by Ming: Florist & Flower Delivery - Chicago IL Florist, Wedding Flowers Arrangement, Custom Design Flower Shop',
  address: '230 E Ontario St #2401, Chicago, IL 60611, United States',
  website: 'yellowpages.com'
}
{
  shop: 'Crystal Flower Shop, Inc.',
  address: '2815 S Kedzie Ave, Chicago, IL 60623, United States',
  website: 'doordash.com'
}
{
  shop: "Wall's Flower Shop, Inc.",
  address: '5862 W Higgins Ave, Chicago, IL 60630, United States',
  website: 'wallsflowershop.com'
}
{
  shop: 'Fleur de Lis Florist',
  address: '715 N Franklin St, Chicago, IL 60654, United States',
  website: 'fleurdelischicago.com'
}
{
  shop: 'Secret Garden Flower Shop',
  address: '3910 W 71st St, Chicago, IL 60629, United States',
  website: 'secretgardenflowershopil.com'
}
{
  shop: 'Marguerite Gardens Florist',
  address: '2444 W Chicago Ave, Chicago, IL 60622, United States',
  website: 'flowerpowerchicgo.com'
}
{
  shop: "Leo's Metropolitan Florist",
  address: '407 E 71st St, Chicago, IL 60619, United States',
  website: 'doordash.com'
}
{
  shop: 'Bonnie Flower Shop Inc',
  address: '3400 W Irving Park Rd, Chicago, IL 60618, United States',
  website: 'doordash.com'
}
{
  shop: 'Flora Chicago',
  address: '2835 N Southport Ave, Chicago, IL 60657, United States',
  website: 'florachicago.com'
}
{
  shop: "Donna's Garden Flower Shop - Chicago, IL",
  address: '4155 W Peterson Ave, Chicago, IL 60646, United States',
  website: 'donnasgarden.com'
}
{
  shop: 'Bunches (a flower shop)',
  address: '1501 W Fullerton Ave, Chicago, IL 60614, United States',
  website: 'buncheschicago.com'
}
{
  shop: 'The Flower Shop of Chicago',
  address: '2246 W Taylor St, Chicago, IL 60612, United States',
  website: 'flowershopofchicago.com'
}
{
  shop: "Kelly's Flower Shop",
  address: '175 W Jackson Blvd, Chicago, IL 60604, United States',
  website: 'kellysflowershop.com'
}
{
  shop: 'Chicago Florist - Send Flowers',
  address: undefined,
  website: 'samedayflowerdeliverychicago.com'
}
{
  shop: 'Chicago Flower',
  address: '541 N Fairbanks Ct, Chicago, IL 60611, United States',
  website: 'chicagosmarcelflorist.com'
}
{
  shop: "Steve's Flower Market",
  address: '1039 W Grand Ave, Chicago, IL 60642, United States',
  website: 'pos.floranext.com'
}
{
  shop: 'Bloom Floral Shop | Same Day Flower Delivery Chicago, IL | Best Chicago Florist',    
  address: undefined,
  website: 'bloomfloralshop.com'
}
{
  shop: 'Ashland Addison Florist - Lakeview',
  address: '3118 N Lincoln Ave, Chicago, IL 60613, United States',
  website: 'ashaddflorist.com'
}
{
  shop: "Goldie's Flower Shop",
  address: '901 W Irving Park Rd, Chicago, IL 60613, United States',
  website: 'goldiesflowershop.com'
}
{
  shop: 'Tea Rose Flower Shop',
  address: '5203 N Kimball Ave, Chicago, IL 60625, United States',
  website: 'tearosechicago.com'
}

其他更改

  • 使用更通用的选择器来获取名称、地址和网站
  • 不断获取结果,直到从所有页面获取数据
  • 验证字段以确保数据准确
  • 将与当前 puppeteer 实例相关的所有内容移至 main
  • 如果商店未明确提供地址,则将地址设置为未定义

关于node.js - 使用 Puppeteer 抓取 Google map 搜索结果链接,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/68597758/

相关文章:

javascript - 我应该如何处理许多嵌套匿名函数中的范围?

node.js - 如何使用nodejs连接我们的Jira

node.js - Puppeteer 内存增加问题

puppeteer - 如何使用 Puppeteer 粘贴文本?

javascript - 使用 td 而不是 nth-child 来抓取表

javascript - 将文件转换为数组然后输出对象值时遇到问题

node.js - knex 在开发环境中传输现有数据库模式

python - 有没有 beautifulsoup 函数可以选择重复的类名?

python - 使用Scrapy在管道内的MYSQL数据库中的2个表上添加项目

google-api - 如何以编程方式访问谷歌搜索右侧数据?