好吧,我正在尝试做标题中描述的事情。两个电子表格都只有一张我正在比较的工作表。一个电子表格是另一个电子表格的更新,所以我试图只获取新内容。 (如果它是一个 fc(dos 命令)之类的函数,这将很容易......)
在做了一些搜索之后,我有以下脚本应该适用于大多数情况,它为每个工作表使用数组。
function test() {
var Folder = DriveApp.getFoldersByName('theFolder').next();
var FolderId =Folder.getId();
//call old_spreadsheet
var searchFor ="fullText contains 'sheet_old' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var old_file = files.next();
var old_spreadsheet = SpreadsheetApp.openById(old_file.getId());
var old_sheet = old_spreadsheet.getSheets()[0];
var old_sheetname = old_sheet.getName();
var old_array = old_sheet.getDataRange().getValues();
Logger.log(old_file.getName() + ' : ' + old_sheetname + ' : ' + old_array.length);
//call spreadsheet
var searchFor ="fullText contains 'sheet' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var file = files.next();
var spreadsheet = SpreadsheetApp.openById(file.getId());
var sheet = spreadsheet.getSheets()[0];
var sheetname = sheet.getName();
var array = sheet.getDataRange().getValues();
Logger.log(file.getName() + ' : ' + sheetname + ' : ' + array.length);
var newarray = getNewData(array,old_array);
Logger.log('there are ' + newarray.length + 'different rows');
}
function getNewData(array1,array2){
var diff =array2;
for (var i = 0; i<array1.length; i++){
var duplicate = false;
for (var j = 0;j<diff.length;j++){
if (array1[i].join() == diff[j].join()){
Logger.log('duplicated line found on rows ' + i + ':' + j);
diff.splice(j,1);
var duplicate= true;
break;
}
}
if (duplicate==false) {
Logger.log('not duplicated line found on row ' + i);
diff.push(array1[i]);
}
}
return diff;
}
问题是文件太大了,将近 30000 行,所以脚本超过了 5 分钟的执行时间限制。
有没有办法改善这一点,例如消除内部 for 循环?
或者有办法分部分完成吗?像第一个前 5000 行,依此类推。
问候,
编辑:稍微分析电子表格后,我发现每一行都有一个 ID,所以现在我只能将搜索集中在每个电子表格的一列中。所以这是我的新实现:
function test(){
var Folder = DriveApp.getFoldersByName('theFolder').next();
var FolderId =Folder.getId();
//call old_spreadsheet
var searchFor ="fullText contains 'sheet_old' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var old_file = files.next();
var old_spreadsheet = SpreadsheetApp.openById(old_file.getId());
var old_sheet = old_spreadsheet.getSheets()[0];
var old_sheetname = old_sheet.getName();
var old_array = old_sheet.getDataRange().getValues();
Logger.log(old_file.getName() + ' : ' + old_sheetname + ' : ' + old_array.length);
//call spreadsheet
var searchFor ="fullText contains 'sheet' and '" + FolderId + "' in parents";
var files = DriveApp.searchFiles(searchFor);
var file = files.next();
var spreadsheet = SpreadsheetApp.openById(file.getId());
var sheet = spreadsheet.getSheets()[0];
var sheetname = sheet.getName();
var array = sheet.getDataRange().getValues();
Logger.log(file.getName() + ' : ' + sheetname + ' : ' + array.length);
//The COlumn has an indicator, so i search for that. I don't control the formatting of the files, so i search in both spreadsheet for the indicator
var searchString = 'NAME';
for (var i = 0; i < old_array.length; i++) {
for (var j = 0; j < old_array[i].length; j++) {
if (old_array[i][j] == searchString) {
var Row_old = i+1;
var Column_old = j;
break;
}
}
if (Row_old != undefined){
break;
}
}
for (var i = 0; i < array.length; i++) {
for (var j = 0; j < array[i].length; j++) {
if (array[i][j] == searchString) {
var Row = i+1;
var Column = j;
break;
}
}
if (Row != undefined){
break;
}
}
Logger.log(Row_old+':::'+Column_old+'\n'+Row+':::'+Column);
var diff_index =[];
var row_ind = 0;
for (var i=Row;i<array.length;i++){
Logger.log(i);
var existe = ArrayLib.indexOf(old_array, Column_old, array[i][Column]);
if (existe==-1){
Logger.log(row_ind+'!!!');
diff_index[row_ind]=i;
row_ind++;
}
}
Logger.log(diff_index);
}
这仍然没有时间......我现在将尝试合并您的评论。
最佳答案
您的脚本有几个主要瓶颈会大大减慢它的速度:
我们可以通过以下方式规避这些问题:
我们将使用 ArrayLib用于排序(我希望它是一种快速排序算法)。
让我们从一个函数开始,查找第一列与值匹配的第一行(当前行的第一列):
function firstRowMatchingCol1(target, lookupRange) {
var min = 0;
var max = lookupRange.length - 1;
var guess;
var guessVal;
while(min <= max) {
guess = (min + max) / 2 | 0;
guessVal = lookupRange[guess][0];
if (guessVal < target) {
min = guess + 1;
} else if (guessVal > target) {
max = guess - 1;
} else {
while (guess > 0 && lookupRange[guess - 1][0] === target) {
guess -= 1;
}
return guess;
}
}
return -1;
}
现在我们可以线性地遍历每一行并检查列是否匹配,直到第一列不再匹配为止。
function matchExists(row, lookupRange) {
var index = firstRowMatchingCol1(row[0], lookupRange);
if (index === -1) {return false;}
while (index < lookupRange.length && lookupRange[index][0] === row[0]) {
for (var col = 1; col < row.length; col++) {
if (row[col] !== lookupRange[index][col]) {break;}
if (col === row.length - 1) {return true;} // This only works if the ranges are at least two columns wide but if they are one column wide you can just check if index > -1
}
index += 1;
}
return false;
}
最后我们可以得到这样的重复项:
function getNonDuplicates(r1, r2) {
r2 = ArrayLib.sort(r2, 0, true);
return r1.filter(function(row) {return !matchExists(row, r2);});
}
像 mTorres 的代码一样,这是未经测试的
关于google-apps-script - 比较两个电子表格并使用谷歌应用程序脚本输出差异,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/44391955/