javascript - 使用 NodeJS 从 url 下载大文件时服务器崩溃

标签 javascript node.js http web-scraping request

我正在尝试使用来自 nodejs 的请求模块从 url(需要登录)下载文件(+200mb),但是当它完成下载时,服务器开始变慢,直到它崩溃或变得真的慢。

这是我当前的代码(它下载了整个文件但我的服务器最终崩溃了):

//Required modules
var http = require('http'),
url = require("url"),
fs = require('fs'),
request = require('request'),
path = require("path"),
events = require("events"),
j = request.jar(),
request = request.defaults({ jar : j });

// make te request login in with cookies
console.log("downloading file :)");
request({
    url:"http://example.com/",
    method:"POST",
    form:{u: "username",p: "password"}
    },
    function(error,response,body){
        setTimeout(function(){
            request
              .get('http://example.com/test.ashx?file=15')
              .on('error', function(err) {
                console.log(err);
              })
              .pipe(fs.createWriteStream("/var/www/filesDir/CustomName.zip"));
                console.log(body);
        },1000)
    }
);

我尝试应用来自 this answer 的另一种解决方案但是由于某种原因文件没有被正确下载,它一直只显示“下载进度:0字节”可能与登录访问有关。

这里我放了我试图从最后一句实现的其他代码:

var http = require('http');
var fs = require('fs');
var url = require("url");
var request = require('request');
var path = require("path");
var events = require("events");
var j = request.jar();
var request = request.defaults({ jar : j });


request({
    url:"http://example.com/",
    method:"POST",
    form:{u:"username",p:"password"}
    },  function(error,response,body){
                var downloadfile = "http://example.com/test.ashx?file=15";
                var host = url.parse(downloadfile).hostname;
                var filename = "1977.zip";
                var req = http.request({port: 80, host: host, method: 'GET'});
                console.log("Downloading file: " + filename);
                console.log("Before download request");
                req.end();

                dlprogress = 0;

                setInterval(function () {
                   console.log("Download progress: " + dlprogress + " bytes");
                }, 1000);

                req.addListener('response', function (response) {
                    var downloadfile = fs.createWriteStream(filename, {'flags': 'a'});
                    console.log("File size " + filename + ": " + response.headers['content-length'] + " bytes.");
                    response.addListener('data', function (chunk) {
                        dlprogress += chunk.length;
                        downloadfile.write(chunk, encoding='binary');
                    });
                    response.addListener("end", function() {
                        downloadfile.end();
                        console.log("Finished downloading " + filename);
                    });

                });
        }
);

不管你决定用哪种方式帮助我。

最佳答案

我最终是这样做的,我已经多次测试代码并且服务器没有再崩溃了:

var request = require('request');
var filed = require('filed');
var j = request.jar();
var request = request.defaults({ jar : j });

// make the request and login
request({
    url: "http://example.com/login",
    method:"POST",
    // 'u' and 'p' are the field names on the form
    form:{u:"username",p:"password"}
    }, function(error,response,body){
        setTimeout(function(){
            var downloadURL = 'http://example.com/download/file.zip';
            var downloadPath = "/path/to/download/localNameForFile.zip";

            var downloadFile = filed(downloadPath);
            var r = request(downloadURL).pipe(downloadFile);

            r.on('data', function(data) {
                console.log('binary data received');
            });
            downloadFile.on('end', function () {
                console.log(downloadPath, 'file downloaded to path');
            });

            downloadFile.on('error', function (err) {
                console.log(err, 'error downloading file');
            });
        },3000)
    }
);

关于javascript - 使用 NodeJS 从 url 下载大文件时服务器崩溃,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/40699203/

相关文章:

javascript - jquery按钮点击事件后,php脚本的响应消失

javascript - 如何解除绑定(bind) Polymer 1.0 中的属性

javascript - 当多个div重叠时如何组织事件

javascript - 无法销毁 Express 创建的 JavaScript 中的 cookie

http - 如何在 Swift 3 中同时发出 https 请求

http - ResponseWriter.Write 返回的 int 是什么意思?

javascript - 添加到 javascript 关联数组

javascript - Microsoft Bot Framework - 如何从第 3 方 API 发送响应

node.js - 如何正确使用迁移

http - RESTful登录失败 : Return 401 or Custom Response