我正在尝试从一系列 HTML 页面中抓取数据。 对于访问,有一些参数组成查询。 这些参数被保存到json中。儿子是这样写的。
[
{"NM":"bla", "Code":"a12312"},
{"NM":"blabla", "Code":"a11231"},
{"NM":"dog", "Code":"b12311"},
...]
下一个是parsing1.js的代码
var jsdom = require("jsdom");
var schCD = require('./ggElemCD.json');
for(i in schCD){
jsdom.env("http://www.~~skipped~~&HG_CD="+schCD[i].HG_CD+"~~skipped~~", ["http://code.jquery.com/jquery.js"],
function(errors, window){
var tObj= window.$('.TableType1 tbody tr:last td');
console.log( "Name : "+schCD[i].HG_NM);
console.log( "Code : "+schCD[i].HG_CD);
var completeObj = {
"HG_name": schCD[i],
"HG_CD": schCD[i].HG_CD,
"G1st_m" : tObj.get(0).innerHTML,
"G1st_f" : tObj.get(1).innerHTML,
"G2nd_m" : tObj.get(3).innerHTML,
"G2nd_f" : tObj.get(4).innerHTML,
"G3rd_m" : tObj.get(6).innerHTML,
"G3rd_f" : tObj.get(7).innerHTML,
"G4th_m" : tObj.get(9).innerHTML,
"G4th_f" : tObj.get(10).innerHTML,
"G5th_m" : tObj.get(12).innerHTML,
"G5th_f" : tObj.get(13).innerHTML,
"G6th_m" : tObj.get(15).innerHTML,
"G6th_f" : tObj.get(16).innerHTML,
"sp_m" : tObj.get(18).innerHTML,
"sp_f" : tObj.get(19).innerHTML
}
console.log("value1 : "+completeObj.G1st_m);
console.log("value2 : "+completeObj.G1st_f);
});
}
如果删除 for 语法并将 HG_CD 手动放入此代码中,则可以正常工作。但是,当我插入 for 语法时, Node 打印出错误消息。
/Users/Snark/Dev/parsing/parsing1.js:8
var tObj= window.$('.TableType1 tbody tr:last td');
^
TypeError: Cannot read property '$' of undefined
at Object.done (/Users/Snark/Dev/parsing/parsing1.js:8:18)
at reportInitError (/Users/Snark/node_modules/jsdom/lib/jsdom.js:384:12)
at /Users/Snark/node_modules/jsdom/lib/jsdom.js:220:9
at Request._callback (/Users/Snark/node_modules/jsdom/lib/jsdom/browser/resource-loader.js:159:64)
at self.callback (/Users/Snark/node_modules/jsdom/node_modules/request/request.js:198:22)
at emitOne (events.js:77:13)
at Request.emit (events.js:169:7)
at Request.onRequestError (/Users/Snark/node_modules/jsdom/node_modules/request/request.js:820:8)
at emitOne (events.js:77:13)
at ClientRequest.emit (events.js:169:7)
最佳答案
您的问题与同时请求有关,因为您似乎有大量此类请求,因此您应该等待请求完成,然后再向同一服务器发出另一个请求。以下是如何实现它 using the async library :
var jsdom = require("jsdom");
var async = require("async");
var schCD = require('./ggElemCD.json');
async.eachSeries(schCD, function(item, callback){
jsdom.env(
"http://www.~~skipped~~&HG_CD=" + item.HG_CD + "~~skipped~~",
["http://code.jquery.com/jquery.js"],
function(err, window){
var tObj= window.$('.TableType1 tbody tr:last td');
console.log( "Name : " + item.HG_NM);
console.log( "Code : " + item.HG_CD);
var completeObj = {
"HG_name" : item,
"HG_CD" : item.HG_CD,
"G1st_m" : tObj.get(0).innerHTML,
"G1st_f" : tObj.get(1).innerHTML,
"G2nd_m" : tObj.get(3).innerHTML,
"G2nd_f" : tObj.get(4).innerHTML,
"G3rd_m" : tObj.get(6).innerHTML,
"G3rd_f" : tObj.get(7).innerHTML,
"G4th_m" : tObj.get(9).innerHTML,
"G4th_f" : tObj.get(10).innerHTML,
"G5th_m" : tObj.get(12).innerHTML,
"G5th_f" : tObj.get(13).innerHTML,
"G6th_m" : tObj.get(15).innerHTML,
"G6th_f" : tObj.get(16).innerHTML,
"sp_m" : tObj.get(18).innerHTML,
"sp_f" : tObj.get(19).innerHTML
};
console.log("value1 : " + completeObj.G1st_m);
console.log("value2 : " + completeObj.G1st_f);
callback(err);
}
);
}, function (err){
console.log("Done.");
err && console.log(err);
});
关于javascript - 带循环的node-jsdom不起作用,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34615756/