javascript - Node JS/Gzip : Image file download ends prematurely with no error

标签 javascript node.js

我正在 Node.js 中创建一个应用程序来下载图像文件。但是,我一直遇到一个问题,如果我的下载速度很慢或者我失去连接,我正在下载的图像将是 truncated。 .如果我的程序抛出一个错误警告我图片没有完成下载,这并没有那么糟糕,但是,它并没有像图片下载成功一样解析。下载后检查响应代码或探测图像时没有收到错误。

这是我用来下载图片的当前代码。

const fs = require('fs-extra');
const request = require('request');
var probe = require('probe-image-size');
var progress = require('request-progress');

var filename = 'C:/Users/User/Desktop/myimage.jpg';
var req = request(createRequestHeaders('www.linktomyimage.com/image.jpg'));
downloadImage(req, filename)

function createRequestHeaders(url) {
    var cookie = `userid=${userid}; phash=${phash};`;
    return {
        'url': url,
        'method': 'GET',
        'gzip': true,
        'headers': {
            'Referer': `https://${website}/`,
            'Cookie': cookie
        }
    };
}

function downloadImage(req, filename) {
    return new Promise((resolve, reject) => {
        var response = null;
        var bytes;
        var dirname = path.dirname(filename);
        if (!fs.existsSync(dirname)) fs.ensureDirSync(dirname);

        progress(req, { delay: 0 }).on('progress', function (state) {
            updateDownloadSpeed(state.speed);
        }).on('end', function () {
            if (response.statusCode == 200) {
                var input = require('fs').createReadStream(filename);
                probe(input).then(result => {
                    input.destroy();
                    if (result != null) {
                        resolve({bytes: bytes, width: result.width,
                                height: result.height,});
                    } else {
                        // The image size probe does not detect if the download was truncated
                        reject({ 'name': 'ImageMissingOrCorrupt');
                    }
                }).catch((error) => {
                    reject(error);
                });
            } else {
                // This is never triggered when the download stops and the image is truncated
                reject({ 'name': 'StatusCodeError', 'message': response.statusCode });
            }
        }).on('response', function (resp) {
            response = resp;
            bytes = response.headers['content-length'];
        }).on('error', function (error) {
            // This does not detect when an image is truncated either
            reject(error);
        }).pipe(fs.createWriteStream(filename));
    });
}

我问过这个问题的早期版本here但是在实现答案中的建议后,我的下载器仍然存在相同的问题。从那时起,我的下载器的代码也发生了变化。

我如何检测图像何时被 chop 以便我可以指示下载器重新尝试下载它?

编辑 1

看完this我认为我的问题可能与我正在下载的网站有关,该网站要求我使用 gzip,而当服务器停止响应时,客户端无法判断内容是否已真正完成下载。但是,我不确定如何测试这种情况。

编辑2

这是我的下载器成功连接到图像时响应 header 的样子。

cache-control:"public, max-age=31536000"
connection:"close"
content-disposition:"inline; filename=129.jpg"
content-length:"185756"
content-transfer-encoding:"binary"
content-type:"image/jpeg"
date:"Thu, 05 Sep 2019 00:15:11 GMT"
expires:"Fri, 04 Sep 2020 00:15:11 GMT"
server:"Apache"

最佳答案

我建议您尝试检查 writeStreambytesWritten属性等于您收到的 content-length header 。

我一直在研究你的代码,我发现请求的 end 事件在 writeStream 关闭之前被触发,所以我认为实际上此时您无法检查此属性。相反,您应该在 writeStreamclose 事件上验证它。

试试这个示例代码并告诉我们它是怎么回事:

const fs = require('fs-extra');
const request = require('request');
var probe = require('probe-image-size');
var progress = require('request-progress');

var filename = 'C:/Users/User/Desktop/myimage.jpg';
var writeSteam = fs.createWriteStream(filename)
var req = request(createRequestHeaders('www.linktomyimage.com/image.jpg'));
downloadImage(req, filename)

function createRequestHeaders(url) {
  var cookie = `userid=${userid}; phash=${phash};`;
  return {
    'url': url,
    'method': 'GET',
    'gzip': true,
    'headers': {
      'Referer': `https://${website}/`,
      'Cookie': cookie
    }
  };
}

function downloadImage(req, filename) {
  return new Promise((resolve, reject) => {
    var response = null;
    var bytes;
    var dirname = path.dirname(filename);
    if (!fs.existsSync(dirname)) fs.ensureDirSync(dirname);

    // NEW CODE
    writeSteam.on('close', () => {
      if (bytes !== writeSteam.bytesWritten) { // NEW CODE
        // Here the write stream is closed, so we can compare the property bytesWritten with the bytes we expected to receive
        console.log('The size is not equal! Image is corrupt!')
        reject({ 'name': 'ImageCorrupt'})
      } else if (response.statusCode === 200) {
        var input = require('fs').createReadStream(filename);
        probe(input).then(result => {
          input.destroy();
          if (result != null) {
            resolve({bytes: bytes, width: result.width,
              height: result.height,});
          } else {
            // The image size probe does not detect if the download was truncated
            reject({ 'name': 'ImageMissingOrCorrupt'});
          }
        }).catch((error) => {
          reject(error);
        });
      } else {
        // This is never triggered when the download stops and the image is truncated
        reject({ 'name': 'StatusCodeError', 'message': response.statusCode });
      }
    })

    progress(req, { delay: 0 }).on('progress', function (state) {
      updateDownloadSpeed(state.speed);
    }).on('end', function () {
      console.log('Ended request!!') // NEW CODE
    }).on('response', function (resp) {
      response = resp;
      bytes = response.headers['content-length'];
    }).on('error', function (error) {
      // This does not detect when an image is truncated either
      reject(error);
    }).pipe(writeSteam);
  });
}

关于javascript - Node JS/Gzip : Image file download ends prematurely with no error,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/57735784/

相关文章:

node.js - 我应该如何在同一台生产机器上安装和使用多个版本的 Node?

javascript - ng-include 删除模板后第二次包含模板失败

node.js - REST API : Infinite scroll pagination in the GUI, 但允许搜索所有条目

javascript - 根据用户所在位置向上和向下按钮

javascript - 捕获从 iframe 抛出的alert()?

node.js - 通过 npm 安装 Twitter Bootstrap 的目的?

node.js - Mongoose 覆盖文档而不是 `$set` 字段

node.js - 使用 NodeJS 实例化一个 Admin SDK 目录服务对象

javascript - Div animate 消失,而不是调整大小

javascript - 增加unix时间