我有一个用例,需要使用 Headless Chrome Network ( https://chromedevtools.github.io/devtools-protocol/tot/Network/ ) 拦截所有图像请求并在保存之前找出图像大小(基本上丢弃图标等小图像)。
但是,我无法找到在保存图像数据之前将其加载到内存中的方法。我需要将其加载到 Img 对象中以获取 width
和 height
。 Network.getResponseBody 正在获取我在 Network.requestIntercepted 中无法访问的 requestId。另外,Network.loadingFinished
总是在 encodedDataLength
变量中给出“0”。我不知道为什么。所以我的问题是:
如何拦截所有jpg/png请求的响应并获取图像数据?无需通过 URL 字符串将文件保存到磁盘并加载回来。
最佳:如何从标题响应中获取图像尺寸?这样我就根本不需要将数据读入内存了。
我的代码如下:
const chromeLauncher = require('chrome-launcher');
const CDP = require('chrome-remote-interface');
const file = require('fs');
(async function() {
async function launchChrome() {
return await chromeLauncher.launch({
chromeFlags: [
'--disable-gpu',
'--headless'
]
});
}
const chrome = await launchChrome();
const protocol = await CDP({
port: chrome.port
});
const {
DOM,
Network,
Page,
Emulation,
Runtime
} = protocol;
await Promise.all([Network.enable(), Page.enable(), Runtime.enable(), DOM.enable()]);
await Network.setRequestInterceptionEnabled({enabled: true});
Network.requestIntercepted(({interceptionId, request, resourceType}) => {
if ((request.url.indexOf('.jpg') >= 0) || (request.url.indexOf('.png') >= 0)) {
console.log(JSON.stringify(request));
console.log(resourceType);
if (request.url.indexOf("/unspecified.jpg") >= 0) {
console.log("FOUND unspecified.jpg");
console.log(JSON.stringify(interceptionId));
// console.log(JSON.stringify(Network.getResponseBody(interceptionId)));
}
}
Network.continueInterceptedRequest({interceptionId});
});
Network.loadingFinished(({requestId, timestamp, encodedDataLength}) => {
console.log(requestId);
console.log(timestamp);
console.log(encodedDataLength);
});
Page.navigate({
url: 'https://www.yahoo.com/'
});
Page.loadEventFired(async() => {
protocol.close();
chrome.kill();
});
})();
最佳答案
这应该能让你完成 90% 的任务。它获取每个图像请求的正文。您仍然需要进行 Base64 解码、检查大小并保存等...
const CDP = require('chrome-remote-interface');
const sizeThreshold = 1024;
async function run() {
try {
var client = await CDP();
const { Network, Page } = client;
// enable events
await Promise.all([Network.enable(), Page.enable()]);
// commands
const _url = "https://google.co.za";
let _pics = [];
Network.responseReceived(async ({requestId, response}) => {
let url = response ? response.url : null;
if ((url.indexOf('.jpg') >= 0) || (url.indexOf('.png') >= 0)) {
const {body, base64Encoded} = await Network.getResponseBody({ requestId }); // throws promise error returning null/undefined so can't destructure. Must be different in inspect shell to app?
_pics.push({ url, body, base64Encoded });
console.log(url, body, base64Encoded);
}
});
await Page.navigate({ url: _url });
await sleep(5000);
// TODO: process _pics - base64Encoded, check body.length > sizeThreshold, save etc...
} catch (err) {
if (err.message && err.message === "No inspectable targets") {
console.error("Either chrome isn't running or you already have another app connected to chrome - e.g. `chrome-remote-interface inspect`")
} else {
console.error(err);
}
} finally {
if (client) {
await client.close();
}
}
}
function sleep(miliseconds = 1000) {
if (miliseconds == 0)
return Promise.resolve();
return new Promise(resolve => setTimeout(() => resolve(), miliseconds))
}
run();
关于node.js - 使用headless chrome拦截图片请求数据,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/45578739/