我正在抓取一个粉丝网站以获取 Angular 色信息以显示在我的网络应用程序中,但我遇到了发送后无法设置标题的问题。
我试图在我的请求中使用 promise ,但是我想我可能对我的代码实际上在做什么有根本的误解。
最终目标是通过遍历老板姓名数组来抓取 100 多页数据,将数据存储在数组中,然后最终将其导出以备后用。目前我能够将数据存储在一个数组中,但即使我的代码执行并抓取数据仍然会出错。
服务器.js
var express = require('express');
var cheerio = require('cheerio');
var app = express();
var rp = require('request-promise');
var fsp = require('fs-promise');
app.get('/', function(req, res){
urls = [
'fansite/boss1', 'fansite/boss2'
];
var bosses = [];
function parse(html) {
var $ = cheerio.load(html);
$('.page-header__title').filter(function () {
var data = $(this);
name = data.text();
bosses.push(name);
})
console.log(bosses);
return bosses;
}
urls.forEach(function (url) {
rp(url)
.then(parse)
.then(res.send('Bosses Updated.'))
.catch(err => console.log('Error:', err));
});
})
app.listen('8081')
console.log('Running on port 8081');
exports = module.exports = app;
输出:
node server.js start
Running on port 8081
[ 'Obor' ]
[ 'Obor', 'Zulrah' ]
Error: Error: Can't set headers after they are sent.
at ServerResponse.OutgoingMessage.setHeader (_http_outgoing.js:356:11)
at ServerResponse.header (/Users/aaron/Personal Projects/node-scraper/node_modules/express/lib/response.js:767:10)
at ServerResponse.send (/Users/aaron/Personal Projects/node-scraper/node_modules/express/lib/response.js:170:12)
at rp.then.then (/Users/aaron/Personal Projects/node-scraper/server.js:31:21)
at tryCatcher (/Users/aaron/Personal Projects/node-scraper/node_modules/bluebird/js/release/util.js:16:23)
at Promise._settlePromiseFromHandler (/Users/aaron/Personal Projects/node-scraper/node_modules/bluebird/js/release/promise.js:512:31)
at Promise._settlePromise (/Users/aaron/Personal Projects/node-scraper/node_modules/bluebird/js/release/promise.js:569:18)
at Promise._settlePromise0 (/Users/aaron/Personal Projects/node-scraper/node_modules/bluebird/js/release/promise.js:614:10)
at Promise._settlePromises (/Users/aaron/Personal Projects/node-scraper/node_modules/bluebird/js/release/promise.js:693:18)
at Async._drainQueue (/Users/aaron/Personal Projects/node-scraper/node_modules/bluebird/js/release/async.js:133:16)
at Async._drainQueues (/Users/aaron/Personal Projects/node-scraper/node_modules/bluebird/js/release/async.js:143:10)
at Immediate.Async.drainQueues (/Users/aaron/Personal Projects/node-scraper/node_modules/bluebird/js/release/async.js:17:14)
at runCallback (timers.js:672:20)
at tryOnImmediate (timers.js:645:5)
at processImmediate [as _immediateCallback] (timers.js:617:5)
最佳答案
如果你想等待所有的url被处理后再发送响应
Promise.all(urls.map(function (url) {
return rp(url).then(parse);
}))
.then(() => res.send('Bosses Updated.'))
.catch(err => console.log('Error:', err));
或
Promise.all(urls.map(url => rp(url).then(parse)))
.then(() => res.send('Bosses Updated.'))
.catch(err => console.log('Error:', err));
关于javascript - NodeJS Express 网络抓取 header 问题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/48533672/