我拥有的最小文件有 > 850k 行,每行的长度未知。目标是阅读 n
浏览器中此文件的行。完全阅读它是不会发生的。
这是 HTML <input type="file" name="file" id="file">
和我拥有的 JS:
var n = 10;
var reader = new FileReader();
reader.onload = function(progressEvent) {
// Entire file
console.log(this.result);
// By lines
var lines = this.result.split('\n');
for (var line = 0; line < n; line++) {
console.log(lines[line]);
}
};
显然,这里的问题是它试图首先将整个文件真实化,然后用换行符将其拆分。所以不管n
,它会尝试读取整个文件,当文件很大时最终什么也读不到。
我应该怎么做?
注意:我愿意删除整个函数并从头开始,因为我将能够 console.log()
我们阅读的每一行。
*"every line is of unknown length"-> 表示文件是这样的:
(0, (1, 2))
(1, (4, 5, 6))
(2, (7))
(3, (8))
编辑:
要走的路类似于 filereader api on big files ,但我看不出如何修改它以读取 n
文件的行...
通过使用 Uint8Array to string in Javascript也可以从那里做:
var view = new Uint8Array(fr.result);
var string = new TextDecoder("utf-8").decode(view);
console.log("Chunk " + string);
但是这样可能不能把最后一行作为一个整体来读,那你以后要怎么确定这些行呢?例如,这是它打印的内容:
((7202), (u'11330875493', u'2554375661'))
((1667), (u'9079074735', u'6883914476',
最佳答案
逻辑和我在filereader api on big files的回答中写的非常相似,除非您需要跟踪到目前为止已处理的行数(以及到目前为止读取的最后一行,因为它可能尚未结束)。下一个示例适用于与 UTF-8 兼容的任何编码;如果您需要其他编码,请查看 TextDecoder
的选项构造函数。
如果您确定输入是 ASCII(或任何其他单字节编码),那么您也可以跳过 TextDecoder
的使用,直接使用 FileReader
's readAsText
method 将输入读取为文本.
// This is just an example of the function below.
document.getElementById('start').onclick = function() {
var file = document.getElementById('infile').files[0];
if (!file) {
console.log('No file selected.');
return;
}
var maxlines = parseInt(document.getElementById('maxlines').value, 10);
var lineno = 1;
// readSomeLines is defined below.
readSomeLines(file, maxlines, function(line) {
console.log("Line: " + (lineno++) + line);
}, function onComplete() {
console.log('Read all lines');
});
};
/**
* Read up to and including |maxlines| lines from |file|.
*
* @param {Blob} file - The file to be read.
* @param {integer} maxlines - The maximum number of lines to read.
* @param {function(string)} forEachLine - Called for each line.
* @param {function(error)} onComplete - Called when the end of the file
* is reached or when |maxlines| lines have been read.
*/
function readSomeLines(file, maxlines, forEachLine, onComplete) {
var CHUNK_SIZE = 50000; // 50kb, arbitrarily chosen.
var decoder = new TextDecoder();
var offset = 0;
var linecount = 0;
var linenumber = 0;
var results = '';
var fr = new FileReader();
fr.onload = function() {
// Use stream:true in case we cut the file
// in the middle of a multi-byte character
results += decoder.decode(fr.result, {stream: true});
var lines = results.split('\n');
results = lines.pop(); // In case the line did not end yet.
linecount += lines.length;
if (linecount > maxlines) {
// Read too many lines? Truncate the results.
lines.length -= linecount - maxlines;
linecount = maxlines;
}
for (var i = 0; i < lines.length; ++i) {
forEachLine(lines[i] + '\n');
}
offset += CHUNK_SIZE;
seek();
};
fr.onerror = function() {
onComplete(fr.error);
};
seek();
function seek() {
if (linecount === maxlines) {
// We found enough lines.
onComplete(); // Done.
return;
}
if (offset !== 0 && offset >= file.size) {
// We did not find all lines, but there are no more lines.
forEachLine(results); // This is from lines.pop(), before.
onComplete(); // Done
return;
}
var slice = file.slice(offset, offset + CHUNK_SIZE);
fr.readAsArrayBuffer(slice);
}
}
Read <input type="number" id="maxlines"> lines from
<input type="file" id="infile">.
<input type="button" id="start" value="Print lines to console">
关于javascript - 读取一个大文本文件的 n 行,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/39479090/