我正在尝试使用“加载更多”按钮来抓取网站,但是我无法在 Nightmare 中执行递归功能。我的代码是这样的:
const Nightmare = require('nightmare');
const nightmare = Nightmare({
show:true
});// }
const request = require('request');
const cheerio = require('cheerio');
let url = 'https://www.housers.com/es/proyectos/avanzado';
let propertyArray = [];
var getThePage = function() {
nightmare
.goto('https://www.housers.com/es/proyectos/avanzado')
.wait(1500)
.click('#loadMore')
.evaluate(() =>{
return document.querySelector('.all-info').innerHTML;
})
.end()
.then((result) => {
let $ = cheerio.load(result);
let loadMore = $('#loadMore')
if (loadMore) {
getThePage();
}
return result
})
.catch((error) => {
console.error('Search failed:', error);
});
}
getThePage()
我不知道您是否可以通过这种方法或任何其他方法来执行此操作
最佳答案
如果您想将表格中的数据剪贴起来,则无需做 Nightmare 。在网络标签中,您会看到它调用了这个端点:
https://www.housers.com/es/proyectos/avanzado/scroll
加上一些分页和页面大小,让我们每页取200(不知道它是否超出限制)。
然后,您只需解析html并将数据放入数组中:
const axios = require('axios');
const querystring = require('querystring');
const cheerio = require('cheerio');
const entities = require("entities");
const url = 'https://www.housers.com/es/proyectos/avanzado/scroll';
const prices = [];
function doRequest(url, page){
return axios.post(url + '?page=' + page + '&size=200', querystring.stringify({
word: "",
country: "",
type: "",
order: "STOCK_PRICE_VARIATION",
orderDirection: "DESC"
}));
}
async function getPrices() {
var empty = false;
var page = 0;
while (!empty) {
//call API
console.log("GET page n°" + page);
var res = await doRequest(url, page);
page++;
//parse HTML
const $ = cheerio.load(res.data,{
xmlMode: true,
normalizeWhitespace: true,
decodeEntities: true
});
if (res.data.trim() !== ""){
//extract prices : put it in array
$('tr').map(function(){
var obj = [];
$(this).children('td').map(function(){
obj.push(entities.decodeHTML($(this).text().trim()));
});
prices.push(obj);
});
}
else {
empty = true;
}
}
console.log(prices);
console.log("total length : " + prices.length);
}
getPrices();
关于javascript - 使用 “load more”按钮JS抓取,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47591880/