试图抓取网站。为此,我想自动点击一个按钮。我似乎无法让按钮执行任何操作。
链接: http://shop.nordstrom.com/s/polo-ralph-lauren-pajama-pants/2849416
网站堆栈:ReactJS、JQueryJS
按钮选择器: # product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite -image.cover > 跨度 > img
尝试
JQuery click、mousedown、touchstart 和 native click...在 Chrome 开发工具控制台中。
$("#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img").click()
$("#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img")[0].click()
$("#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img").mousedown()
$('#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img').trigger('touchstart');
PhantomJS sendEvent 函数...通过 PhantomJS headless 浏览器。
var webpage = require('webpage');
var page = webpage.create();
var href = "http://shop.nordstrom.com/s/polo-ralph-lauren-pajama-pants/2849416";
page.open(href, function (status) {
var elem = "#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img";
var rect = page.evaluate(function(elem) {
return $(elem)[0].getBoundingClientRect();
}, elem);
function computeCenter(bounds) {
var x = Math.round(bounds.left + bounds.width / 2);
var y = Math.round(bounds.top + bounds.height / 2);
return [x, y];
}
var cor = computeCenter(rect);
page.sendEvent('click', cor.x, cor.y, 'left');
setTimeout(function() {
page.render('websiteAfterClick.png');
page.close();
}, 1000);
}
还有 HTML 事件......在 Chrome 开发工具控制台中。
var elem = $("#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img")[0];
var evt = document.createEvent("MouseEvents");
var center_x = 1, center_y = 1;
try {
var pos = elem.getBoundingClientRect();
center_x = Math.floor((pos.left + pos.right) / 2);
center_y = Math.floor((pos.top + pos.bottom) / 2);
} catch(e) {}
evt.initMouseEvent('click', true, false, window, 1, 1, 1, center_x, center_y, false, false, false, false, 0, elem);
React Test Utils...通过 PhantomJS headless 浏览器。
var webpage = require('webpage');
var page = webpage.create();
var href = "http://shop.nordstrom.com/s/polo-ralph-lauren-pajama-pants/2849416";
page.open(href, function (status) {
page.includeJs("https://cdnjs.cloudflare.com/ajax/libs/react/0.14.6/react-with-addons.js", function() {
var elem = "#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img";
page.evaluate(function(elem) {
React.addons.TestUtils.Simulate.click($(elem)[0]);
}, elem);
setTimeout(function() {
page.render('websiteAfterClick.png');
page.close();
}, 1000);
});
}
黑客尝试。该网站有一个选择,其选项与我要单击的按钮相同……在 Chrome 开发工具控制台中。
$('#product-selection-2849416 > section.color-filter > div > select').val('Black Royal Oxford').change();
$('#product-selection-2849416 > section.color-filter > div > select').val('Black Royal Oxford').trigger('change');
想法
想办法在他们的 React 组件中提取 props。它们还包含我想要的数据。还不确定该怎么做...
使用 WebDriver 和 Selenium 创建点击。不确定与 PhantonJS 的集成。
找到与点击处理程序关联的函数,并尝试调用它。致力于此...
使用 XPath Clicker。不知道该怎么做。在网上找不到很多资源。
结论
这里有人能帮帮我吗?不确定还能尝试什么。
最佳答案
我稍微调试了他们的代码,看起来他们连接到 mousedown/up 而不是点击。下面的代码应该可以工作:
var el = jQuery("#product-selection-2849416 > section.color-filter > div > ul > li:nth-child(2) > a > span > span.image-sprite-image.cover > span > img")[0];
var evtMouseDown = new MouseEvent("mousedown", {
bubbles: true, cancelable: true, cancelBubble: false,
defaultPrevented: false, isTrusted: true,
button: 0,buttons: 1, which: 1, view: window
});
var evtMouseUp = new MouseEvent("mouseup", {
bubbles: true, cancelable: true, cancelBubble: false,
defaultPrevented: false, isTrusted: true,
button: 0, buttons: 1, which: 1, view: window
});
el.dispatchEvent(evtMouseDown);
el.dispatchEvent(evtMouseUp);
关于javascript - 抓取网站。在抓取期间无法自动执行用户点击,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/35070220/