我正在使用 Node 请求模块来读取网站内容。当我在 Linux 上使用 Node 时,我无法获取 linkedin.com 的完整内容,但它在 Windows 和 Mac OS X 上运行良好。
我编写了以下代码:
var request = require('request')
request('https://www.linkedin.com/pulse/social-media-why-its-essential-tool-oliver-bussmann', function (error, response, body) {
if (!error && response.statusCode == 200) {
console.log(body)
} else {
// always return this response
console.log(response.statusCode,body)
});
我收到 999
状态代码和以下 HTML 内容:
\n\nwindow.onload = function() {\n // Parse the tracking code from cookies.\n var trk = "sentinel_org_block";\n var cookies = document.cookie.split("; ");\n for (var i = 0; i < cookies.length; ++i) {\n if ((cookies[i].indexOf("trkCode=") == 0) && (cookies[i].length > 8)) {\n trk = cookies[i].substring(8);\n }\n }\n\n // Get the protocol for the redirect url.\n var protocol = "http:";\n if (window.location.protocol == "https:") {\n protocol = "https:";\n } else {\n // If "sl" cookie is set, redirect to https.\n for (var i = 0; i < cookies.length; ++i) {\n if ((cookies[i].indexOf("sl=") == 0) && (cookies[i].length > 3)) {\n window.location.href = "https:" + window.location.href.substring(window.location.protocol.length);\n return;\n }\n }\n }\n\n // Get the new domain. For touch.www.linkedin.com or tablet.www.linkedin.com\n // we strip "touch." and "tablet.". For international domains such as\n // fr.linkedin.com, we convert it to www.linkedin.com\n var domain = location.host;\n if (domain.substr(0, 6) == "touch.") {\n domain = domain.substr(6);\n } else if (domain.substr(0, 7) == "tablet.") {\n domain = domain.substr(7);\n } else if (domain.charAt(2) == ".") {\n domain = "www" + domain.substr(2);\n }\n \n window.location.href = "https://" + domain + "/uas/login?trk=" + trk + "&session_redirect=" +\n encodeURIComponent(protocol + "//" + domain +\n window.location.href.substr(window.location.href.search(window.location.host) +\n window.location.host.length));\n}\n\n\n
我做错了什么?
最佳答案
当我尝试在 Mac OS X 计算机上使用 Node.js 程序访问 LinkedIn 个人资料时,遇到了同样的问题。这是带有缩进的代码,以便更好地理解:
window.onload = function() {
// Parse the tracking code from cookies.
var trk = "sentinel_org_block";
var cookies = document.cookie.split("; ");
for (var i = 0; i < cookies.length; ++i) {
if ((cookies[i].indexOf("trkCode=") == 0) && (cookies[i].length > 8)) {
trk = cookies[i].substring(8);
}
}
// Get the protocol for the redirect url.
var protocol = "http:";
if (window.location.protocol == "https:") {
protocol = "https:";
} else {
// If "sl" cookie is set, redirect to https.
for (var i = 0; i < cookies.length; ++i) {
if ((cookies[i].indexOf("sl=") == 0) && (cookies[i].length > 3)) {
window.location.href = "https:" + window.location.href.substring(window.location.protocol.length);
return;
}
}
}
// Get the new domain. For touch.www.linkedin.com or tablet.www.linkedin.com
// we strip "touch." and "tablet.". For international domains such as
// fr.linkedin.com, we convert it to www.linkedin.com
var domain = location.host;
if (domain.substr(0, 6) == "touch.") {
domain = domain.substr(6);
} else if (domain.substr(0, 7) == "tablet.") {
domain = domain.substr(7);
} else if (domain.charAt(2) == ".") {
domain = "www" + domain.substr(2);
}
window.location.href = "https://" + domain + "/uas/login?trk=" + trk + "&session_redirect=" + encodeURIComponent(protocol + "//" + domain + window.location.href.substr(window.location.href.search(window.location.host) + window.location.host.length));
}
这似乎是一个脚本,用于阻止自动化程序的连接,并在登录页面上重定向用户。它搜索存储在 cookie 中的名为“sl”的变量。如果收到,则会重定向到正确的网页。但如果您不这样做,脚本将不允许您查看此页面,并将您重定向到 LinkedIn 登录页面。 这就是我从这段代码中可以理解的,但不幸的是我无法解决这个问题......
编辑:我已经能够通过使用 PhantomJS 访问该页面来解决我的问题。这样您就可以修改您的用户代理,这样 LinkedIn 就不会阻止您的连接。 这是我使用的代码:
var phantom = require('phantom');
var sitepage = null;
var phInstance = null;
phantom
.create()
.then(function(instance) {
phInstance = instance;
return instance.createPage();
})
.then(function(page) {
sitepage = page;
page.setting('userAgent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36');
return page.open(this.url);
})
.then(function(status) {
console.log(status)
return sitepage.property('content');
})
.then(function (body) {
console.log(body);
sitepage.close();
phInstance.exit();
})
.catch(function(err) {
console.log(err);
phInstance.exit();
});
关于node.js - 无法使用 Node 请求模块在 Linux 上读取 LinkedIn 内容,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37588670/