我正在使用 node.js 和 puppeteer 来获取一些数据。我尝试了几次,但很难获得我想要的第二个至第七个参数。
这是我在控制台中的输出之一:
Company 1
our error TypeError: formRow.evaluate is not a function
at main (/home/web/app.js:36:37)
at <anonymous>
at process._tickCallback (internal/process/next_tick.js:188:7)
$ node app.js
这就是我正在查看的 HTML:
<body>
<table summary="">...</table>
<table summary="">...</table>
<div>
<table summary="">
<tbody>
<tr>
<td></td>
<td></td>
<td valign="top" bgcolor="#E6E6E6" align="left">
<a href="/count=100">Company 1</a>
</td>
</tr>
<tr nowrap="nowrap" valign="top" align="left">
<td nowrap="nowrap">4</td>
<td nowrap="nowrap"><a href="/index.htm">[html]</a><a href="/abx.txt">[text]</a></td>
<td class="small">Categorie 1<br>Accession Number: 1243689234
</td>
<td nowrap="nowrap">2018-08-14<br>16:35:41</td>
<td nowrap="nowrap">2018-08-14</td>
<td nowrap="nowrap" align="left">
<a href="/count=100">001-32722</a><br>181018204
</td>
</tr>
<tr>
<td></td>
<td></td>
<td valign="top" bgcolor="#E6E6E6" align="left">
<a href="/count=100">Company 2</a>
</td>
</tr>
<tr nowrap="nowrap" valign="top" align="left">
<td nowrap="nowrap">4</td>
<td nowrap="nowrap"><a href="/index.htm">[html]</a><a href="/abx.txt">[text]</a></td>
<td class="small">Categorie 2<br>Accession Number: 0001179110
</td>
<td nowrap="nowrap">2018-08-14<br>16:35:41</td>
<td nowrap="nowrap">2018-08-14</td>
<td nowrap="nowrap" align="left">
<a href="/count=100">001-32722</a><br>181018204
</td>
</tr>
....
</tbody>
</table>
</div>
<form>...</form>
...
<table summary="">...</table>
</body>
这是我到目前为止的 puppeteer 设置。第一个参数(例如公司)工作正常。应用程序.js:
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch({ headless: false })
const page = await browser.newPage();
await page.goto('some page');
const table = await page.waitForSelector('body div table[summary]');
const titles = await page.$$('body div table[summary] tr td[bgcolor]');
console.log(titles.length);
const tableRows = await page.$$('body div table[summary] tr[nowrap]');
console.log(tableRows.length);
for (let i=0; i < tableRows.length; i++){
const ciks = await page.$$('body div table[summary] tr td[bgcolor]');
const cik = ciks[i];
const button = await cik.$('body div table[summary] tr td[bgcolor] a');
const titleName = await page.evaluate(button => button.innerText, button);
console.log(titleName);
const formRows = await page.$$('body div table[summary] tr[nowrap]');
const formRow = formRows[i];
const tableCell = await formRow.$('body div table[summary] tr[nowrap] td');
const cell = await tableCell.$eval(() => {
document.querySelector('body div table[summary] tr[nowrap] td:nth-child(1)');
});
console.log(cell);
//const cell = await tableCell.$eval('td', td => td.innerText);
//console.log(cell);
}
console.log('\n');
console.log('done');
await browser.close();
} catch (e) {
console.log('our error', e);
}
})();
在上面的尝试中,我试图获取 1 和 2 参数...但最终这是所需的结果:
Company 1
4
[html]
Categorie 1
2018-08-14
2018-08-14
001-32722
Company 2
4
[html]
Categorie 2
2018-08-14
2018-08-14
001-32722
...
我在带有 Chrominium 68 的 ubuntu 16.04 32 位上运行它...
最佳答案
您在 for
循环中重复为常量赋值。
const
声明创建对值的只读引用,这意味着无法为循环的每次迭代重新分配变量 ciks
、cik
、button
、titleName
、formRows
、formRow
、tableCell
和 cell
。
这会导致您的程序在第二次迭代后失败。
您应该使用let
相反,您应该在循环之前声明变量。
看看下面修改后的代码:
'use strict';
const puppeteer = require( 'puppeteer' );
( async () =>
{
try
{
const browser = await puppeteer.launch( { 'headless' : false } );
const page = await browser.newPage();
await page.goto( 'some page' );
const table = await page.waitForSelector( 'body div table[summary]' );
const titles = await page.$$( 'body div table[summary] tr td[bgcolor]' );
console.log( titles.length );
const tableRows = await page.$$( 'body div table[summary] tr[nowrap]' );
console.log( tableRows.length );
// Declare variables before loop ...
const ciks = await page.$$( 'body div table[summary] tr td[bgcolor]' );
const titleName = await page.evaluate( button => button.innerText, button );
const formRows = await page.$$( 'body div table[summary] tr[nowrap]' );
const cell = await page.$( 'body div table[summary] tr[nowrap] td:nth-child(1)' );
let cik;
let button;
let formRow;
let tableCell;
for ( let i = 0; i < tableRows.length; i++ )
{
cik = ciks[i];
button = await cik.$( 'body div table[summary] tr td[bgcolor] a' );
console.log( titleName );
formRow = formRows[i];
tableCell = await formRow.$( 'body div table[summary] tr[nowrap] td' );
console.log( cell );
}
console.log( '\n' );
console.log( 'done' );
await browser.close();
}
catch ( e )
{
console.log( 'our error', e );
}
})();
关于Node.js puppeteer - 获取所需值不起作用(td :nth-child(n),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/51850190/