目标:将所有酒店名称保存在 .txt 文件中(点击网站中的输入后)。
问题:我相信我的 CSS 选择器 ('[class^="js-hotel-location"] > [class^="js-hotel-name"] > span. l-property-name'
) 可能不正确,因为我的 candidates
函数返回一个空数组,而且没有 if(!nameEl) return false;
行,我收到一个 TypeError: Cannot read properties of undefined (reading 'trim')
。
candidates_distance.txt
的预期输出: [“芝加哥市中心北河雅乐轩酒店”、“芝加哥密歇根大道格温豪华精选酒店”、“喜来登大酒店”芝加哥河滨步道”]
实际输出: []
导致问题的代码: 返回空数组的 candidates
函数:
// ! STUCK: candidates function returning empty array - likely CSS selector is incorrect
const candidates = await page.$$eval('[class^="js-hotel-location"] > [class^="js-hotel-name"] > span.l-property-name', elements => {
return elements
.filter(el => {
const nameEl = el.querySelector('[class^="js-hotel-location"] > [class^="js-hotel-name"] > span.l-property-name');
if(!nameEl) return false;
const name = nameEl.textContent.trim();
return name !== "";
})
.map(el => el.textContent.trim());
});
fs.writeFile('candidates_name.txt', JSON.stringify(candidates), function (err) {
if (err) throw err;
});
重现代码:
constants.js
:
module.exports = {
URL: "https://www.marriott.com/default.mi",
DESTINATION: "Chicago, IL, USA",
START_MONTH: "August",
START_DAY: 7,
END_MONTH: "August",
END_DAY: 10,
PROMO_CODE: "MMP"
}
index.js
:
const puppeteer = require('puppeteer-extra');
const constants = require('./marriot_constants.js');
const blockResources = require('puppeteer-extra-plugin-block-resources');
const fs = require('fs/promises');
// ! NOT WORKING: Configure the plugin to block all images and stylesheets
const pluginConfig = blockResources({
blockedTypes: new Set(['image', 'stylesheet'])
});
puppeteer.use(pluginConfig);
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
const ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36";
await page.setUserAgent(ua);
await page.setViewport({width: 800, height: 10700});
await page.goto(constants.URL, {waitUntil: "domcontentloaded"});
await page.type('[id$="-search-destination"]', constants.DESTINATION);
await page.click(".search__calendar-value");
await page.waitForSelector(".search-dates-popup");
const findMonth = async targetMonth => {
for(let i = 0; i < 12; i++){
const month = await page.$eval(".DayPicker-Month", el => el.textContent);
if(month.includes(targetMonth)) break;
await page.click('[aria-label="Next Month"]');
}
};
const chooseDay = day => page.$$eval(".DayPicker-Day-value", (els, day) =>
els.find(e => e.textContent.trim() === day).click(), String(day)
);
await findMonth(constants.START_MONTH);
await chooseDay(constants.START_DAY);
await findMonth(constants.END_MONTH);
await chooseDay(constants.END_DAY);
await page.click('button.m-button-secondary');
await page.click('[class^="StyledSpecialRatesDiv"] > [class="t-font-s"]');
await page.waitForSelector('[class^="StyledPopupMain"]');
await page.click('[class^="StyledRadioButtonDiv"]:nth-child(5)')
await page.type('[class^="StyledRadioButtonDiv"]:nth-child(5)', constants.PROMO_CODE);
await page.waitForSelector('[class="crop-promo-code"] > [class="m-button-secondary m-button-s"]');
await page.click('[class="crop-promo-code"] > [class="m-button-secondary m-button-s"]');
await page.click('[id$="find-a-hotel-homePage-form"] > [class^="StyledFindBtn"]');
await page.waitForNavigation();
// ! STUCK: candidates function returning null array or cannot pull Hotel Names - likely CSS selector is incorrect
const candidates = await page.$$eval('[class^="js-hotel-location"] > [class^="js-hotel-name"] > span.l-property-name', elements => {
return elements
.filter(el => {
const nameEl = el.querySelector('[class^="js-hotel-location"] > [class^="js-hotel-name"] > span.l-property-name');
if(!nameEl) return false;
const name = nameEl.textContent.trim();
return name !== "";
})
.map(el => el.textContent.trim());
});
fs.writeFile('candidates_name.txt', JSON.stringify(candidates), function (err) {
if (err) throw err;
});
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
最佳答案
您可以在浏览器控制台中测试选择器,以以下格式编写并运行它。它将显示您的选择器选择的内容。
$$('span.l-property-name');
就您而言,[class^="js-hotel-name"]
并不直接位于 [class^="js-hotel-location"]
下所以它什么也不选择。
应该是
[class^="js-hotel-location"] [class^="js-hotel-name"] > span.l-property-name
但这也可以选择其他度假屋
优惠,因此您的标签应该是
[id^="property-record-map"] span.l-property-name
您的代码中也不需要过滤器部分:
const candidates = await page.$$eval('[id^="property-record-map"] span.l-property-name', elements => elements.map(el => el.textContent.trim()));
对于您的 constants.js
文件中的内容,选择器返回的酒店数量比您编写的要多,这是我获取输出的唯一方式
["Aloft Chicago Downtown River North", "The Gwen, a Luxury Collection Hotel, Michigan Avenue Chicago", "Sheraton Grand Chicago Riverwalk"]
如果我执行以下额外步骤,按品牌(豪华精选、喜来登、雅乐轩)和价格(200+ 美元)进行筛选:
// add after -> await page.waitForNavigation();
async function waitClick(selector) {
let btn = await page.waitForSelector(selector);
await btn.click();
}
await waitClick('[data-component-name="searchFilters"] a.m-button'); // All Filters
await waitClick('a[data-code=Brands]'); // filters > brands btn
let selectBrands = ['a[data-for=brands_LC]','a[data-for=brands_SI]','a[data-for=brands_AL]']; // Brands to select
for (let s of selectBrands) {
await waitClick(s);
await page.waitForSelector('div.js-sort-and-filter-form',{visible : true}); // wait for selector
}
await waitClick('a[data-code=price]'); // filters > price btn
await waitClick('label[for=price_3]'); // 200+ USD
await page.waitForSelector('div.js-sort-and-filter-form',{visible : true});
await waitClick('form.js-submit-form button[type=Submit]'); // Apply filters
await page.waitForSelector('body');
const candidates = await page.$$eval('[id^="property-record-map"] span.l-property-name', elements => elements.map(el => el.textContent.trim()));
// next comes the fs.writeFile part
关于css - Puppeteer - 无法在酒店网站中拉出正确的 CSS 选择器,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/76125539/