javascript - PhantjomJS 重定向原始 URL 请求

标签 javascript html phantomjs uri

我正在尝试从 URL http://wzdig.pbc.gov.cn:8080/dig/ui/search.action?hl=zh-CN&sr=score+desc&q=%E9%93%81&rp=&advepq=%E9%93%81&advoq=&adveq=&ext=&advtime=2&advrange=text%26title 获取 html 代码这是该网络上的特定搜索。

问题是它打印 http://wzdig.pbc.gov.cn:8080/dig/ui/search.action相反,这是进行空白搜索的结果。我做错了什么?

这是命令:

$ phantomjs get.js 'http://wzdig.pbc.gov.cn:8080/dig/ui/search.action?hl=zh-CN&sr=score+desc&q=%E9%93%81&rp=&advepq=%E9%93%81&advoq=&adveq=&ext=&advtime=2&advrange=text%26title'

和 get.js:

var system = require('system');
var page = require('webpage').create();

var system = require('system');
var page = require('webpage').create();

// Imitate a real browser
page.viewportSize = { width: 1440, height: 900 };
page.settings.userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36";

// Called every time a page is loaded
page.onLoadFinished = function(){

    // If the real page with search controls is loaded
    // then get page.content and exit
    var inputCount = page.evaluate(function(){
        return document.querySelectorAll("h3").length;
    });

    if(inputCount > 0) {
        console.log(page.content);
        phantom.exit();
    }

};

page.open(system.args[1]);

最佳答案

第一次尝试不会,但有可能获得您想要的页面。
运行此脚本: phantomjs --cookies-file=./xxx test.js >/dev/stdout

var page = require('webpage').create({viewportSize:{width: 1600,height: 900},
settings:{userAgent:'Mozilla/5.0 (X11; Linux x86_64; rv:49.0) Gecko/20100101 Firefox/49.0',
javascriptEnabled:'true',
loadImages:'false'
}}), system = require('system');
page.onResourceError = function(resourceError) {
  console.log('Unable to load resource (#' + resourceError.id + 'URL:' + resourceError.url + ')');
  console.log('Error code: ' + resourceError.errorCode + '. Description: ' + resourceError.errorString);
};
page.onConsoleMessage = function(msg, lineNum, sourceId) {
  console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
};
page.onResourceTimeout = function(request) {
    console.log('Response (#' + request.id + '): ' + JSON.stringify(request));
};

phantom.onError = function(msg, trace) {
  var msgStack = ['PHANTOM ERROR: ' + msg];
  if (trace && trace.length) {
    msgStack.push('TRACE:');
    trace.forEach(function(t) {
      msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : ''));
    });
  }
  console.error(msgStack.join('\n'));
  phantom.exit(1);
};

page.onError = function(msg, trace) {

  var msgStack = ['ERROR: ' + msg];

  if (trace && trace.length) {
    msgStack.push('TRACE:');
    trace.forEach(function(t) {
      msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));
    });
  }

  console.error(msgStack.join('\n'));

};
page.onNavigationRequested = function(url, type, willNavigate, main) {
  console.log('Trying to navigate to: ' + url);
  console.log('Caused by: ' + type);
  console.log('Will actually navigate: ' + willNavigate);
  console.log('Sent from the page\'s main frame: ' + main);
}

page.open('http://wzdig.pbc.gov.cn:8080/dig/ui/search.action?hl=zh-CN&sr=score+desc&q=%E9%93%81&rp=&advepq=%E9%93%81&advoq=&adveq=&ext=&advtime=2&advrange=text%26title', function(){
//page.evaluate(function(){location='http://wzdig.pbc.gov.cn:8080/dig/ui/search.action?hl=zh-CN&sr=score+desc&q=%E9%93%81&rp=&advepq=%E9%93%81&advoq=&adveq=&ext=&advtime=2&advrange=text%26title'})
setTimeout(function(){
//    console.log(page.content);
page.render('t.png')
    phantom.exit();
},30000);// wait 30s
});

该网站有问题,有时几乎一切正常,但有时则不然!

关于javascript - PhantjomJS 重定向原始 URL 请求,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41695896/

相关文章:

javascript - Rails 页面 JS 在真实浏览器/手动测试中有效,但在 PhantomJS 和 Selenium 规范中无效

PhantomJS:为我正在尝试的任何事情获取 "Killed: 9"

javascript - JS RegEx 删除 URL 的一部分?

javascript - Grails Controller 的值打破了gsp中的javascript

javascript - Jquery,恢复时的事件?

javascript - 在 javascript 中向 div 元素添加标记

javascript - 为什么我丢失了 iframe 的 src?

javascript - 我想防止另一个元素位置在 "display: block;"宽度鼠标悬停时减少,

javascript - 如何通过正则表达式替换在 hyperHTML 中用 html 标签包装字符串

javascript - Mobile Safari - 如何默认为十进制输入的全数字键盘 w/o 类型 ="number",或禁用 Safari 输入类型 ="number"更正