excel - 比较具有不同记录数和不同顺序的两个数据集

标签 excel vba comparison

我在一个工作簿中有两个数据集(主数据和 BAZA OLD)。两张表中的每张记录都有 11 列 A:K。使用 Power Query 不时更新主数据工作表。我想将新的更新数据(主数据)与旧数据(BAZA OLD)进行比较,并将所有不匹配的记录复制并粘贴到名为“输出”的工作表中,并在 L 列“新发票”中提供信息。
我发现一些宏可以部分满足我的需要,但问题是主数据表中的记录顺序在通过 Power Query 上传后可能会发生变化。
以下是我目前拥有的代码。
公共(public)子比较()

Dim dumpSheet, icdSheet, outputSheet As Worksheet
Dim startRow, outputRow, tempDumpRow, tempICDRow, icdRowCount, finishedICDIndex As Integer
Dim finishedICD() As String
Dim isExist As Boolean

'Set sheets
Set dumpSheet = Sheets("BAZA OLD")
Set icdSheet = Sheets("Master data")
Set outputSheet = Sheets("Output")

'Start row of each sheet for data
startRow = 2
outputRow = 2

'Get row count from ICD sheet
icdRowCount = icdSheet.Range("A:K").End(xlDown).Row

'Index
finishedICDIndex = 0

'Re-define array
ReDim finishedICD(0 To icdRowCount - 1)

'Start row
tempDumpRow = startRow

'Here I looped with OR state, you can modify it to AND start if you want
Do While dumpSheet.Range("A" & tempDumpRow) <> "" Or dumpSheet.Range("B" & tempDumpRow) <> "" Or dumpSheet.Range("C" & tempDumpRow) <> "" And _
    ("D" & tempDumpRow) <> "" Or dumpSheet.Range("E" & tempDumpRow) <> "" Or dumpSheet.Range("F" & tempDumpRow) <> "" And _
    ("G" & tempDumpRow) <> "" Or dumpSheet.Range("H" & tempDumpRow) <> "" Or dumpSheet.Range("I" & tempDumpRow) <> "" And _
    ("J" & tempDumpRow) <> "" Or dumpSheet.Range("K" & tempDumpRow) <> ""
    
    
    'Reset exist flag
    isExist = False

    'loop all row in ICD sheet
    For tempICDRow = 1 To icdRowCount Step 1

        'If row is not finished for checking.
        If UBound(Filter(finishedICD, tempICDRow)) < 0 Then

            'If all cell are equal
            If dumpSheet.Range("A" & tempDumpRow) = icdSheet.Range("A" & tempICDRow) And _
               dumpSheet.Range("B" & tempDumpRow) = icdSheet.Range("B" & tempICDRow) And _
               dumpSheet.Range("C" & tempDumpRow) = icdSheet.Range("C" & tempICDRow) And _
               dumpSheet.Range("D" & tempDumpRow) = icdSheet.Range("D" & tempICDRow) And _
               dumpSheet.Range("E" & tempDumpRow) = icdSheet.Range("E" & tempICDRow) And _
               dumpSheet.Range("F" & tempDumpRow) = icdSheet.Range("F" & tempICDRow) And _
               dumpSheet.Range("G" & tempDumpRow) = icdSheet.Range("G" & tempICDRow) And _
               dumpSheet.Range("H" & tempDumpRow) = icdSheet.Range("H" & tempICDRow) And _
               dumpSheet.Range("I" & tempDumpRow) = icdSheet.Range("I" & tempICDRow) And _
               dumpSheet.Range("I" & tempDumpRow) = icdSheet.Range("J" & tempICDRow) And _
               dumpSheet.Range("J" & tempDumpRow) = icdSheet.Range("K" & tempICDRow) Then
               
                'Set true to exist flag
                isExist = True

                'Store finished row
                finishedICD(finishedICDIndex) = tempICDRow

                finishedICDIndex = finishedICDIndex + 1

                'exit looping
                Exit For

            End If

        End If

    Next tempICDRow

    'Show result
    outputSheet.Range("A" & outputRow) = dumpSheet.Range("A" & tempDumpRow)
    outputSheet.Range("B" & outputRow) = dumpSheet.Range("B" & tempDumpRow)
    outputSheet.Range("C" & outputRow) = dumpSheet.Range("C" & tempDumpRow)
    outputSheet.Range("D" & outputRow) = dumpSheet.Range("D" & tempDumpRow)
    outputSheet.Range("E" & outputRow) = dumpSheet.Range("E" & tempDumpRow)
    outputSheet.Range("F" & outputRow) = dumpSheet.Range("F" & tempDumpRow)
    outputSheet.Range("G" & outputRow) = dumpSheet.Range("G" & tempDumpRow)
    outputSheet.Range("H" & outputRow) = dumpSheet.Range("H" & tempDumpRow)
    outputSheet.Range("I" & outputRow) = dumpSheet.Range("I" & tempDumpRow)
    outputSheet.Range("J" & outputRow) = dumpSheet.Range("J" & tempDumpRow)
    outputSheet.Range("K" & outputRow) = dumpSheet.Range("K" & tempDumpRow)

    If isExist Then
        outputSheet.Range("L" & outputRow) = ""
    Else
        outputSheet.Range("L" & outputRow) = "Item found in ""BAZA OLD"" but not in ""Saldeo"""
    End If

    'increase output row
    outputRow = outputRow + 1

    'go next row
    tempDumpRow = tempDumpRow + 1

Loop

'loop all row in ICD sheet
For tempICDRow = 1 To icdRowCount Step 1

    'If row is not finished for checking.
    If UBound(Filter(finishedICD, tempICDRow)) < 0 Then

        'Show result
        outputSheet.Range("A" & outputRow) = icdSheet.Range("A" & tempICDRow)
        outputSheet.Range("B" & outputRow) = icdSheet.Range("B" & tempICDRow)
        outputSheet.Range("C" & outputRow) = icdSheet.Range("C" & tempICDRow)
        outputSheet.Range("D" & outputRow) = icdSheet.Range("D" & tempICDRow)
        outputSheet.Range("E" & outputRow) = icdSheet.Range("E" & tempICDRow)
        outputSheet.Range("F" & outputRow) = icdSheet.Range("F" & tempICDRow)
        outputSheet.Range("G" & outputRow) = icdSheet.Range("G" & tempICDRow)
        outputSheet.Range("H" & outputRow) = icdSheet.Range("H" & tempICDRow)
        outputSheet.Range("I" & outputRow) = icdSheet.Range("I" & tempICDRow)
        outputSheet.Range("J" & outputRow) = icdSheet.Range("J" & tempICDRow)
        outputSheet.Range("K" & outputRow) = icdSheet.Range("K" & tempICDRow)
        
        'outputSheet.Range("P" & outputRow) = "Item found in ""Baza Faktur Saldeo"" but not in ""BAZA OLD"""
        outputSheet.Range("L" & outputRow) = "NEW INVOICE"


        'increase output row
        outputRow = outputRow + 1

    End If

Next tempICDRow
结束子

最佳答案

通过连接单元格值为每一行创建一个键字符串,并使用字典对象比较两张表上的键

Sub Comparison()

    Const COL_MATCH = 11
    Const SEP = "~"

    Dim dumpSheet As Worksheet, icdSheet As Worksheet, outputSheet As Worksheet
    Dim rng As Range
    Dim i As Long, lastrow As Long, outrow As Long
    
    'Set sheets
    With ThisWorkbook
        Set dumpSheet = .Sheets("BAZA OLD")
        Set icdSheet = .Sheets("Master data")
        Set outputSheet = .Sheets("Output")
    End With
    
    ' dump sheet
    Dim dict As Object, k as String, ar
    Set dict = CreateObject("Scripting.Dictionary")
    With dumpSheet
        lastrow = .UsedRange.Row + .UsedRange.Rows.Count - 1
        For i = 2 To lastrow
            Set rng = .Range("A" & i).Resize(, COL_MATCH) 'A-K
            ar = Application.Transpose(rng)
            k = Join(Application.Transpose(ar), SEP) ' key
            ' check not blank
            If Len(k) >= COL_MATCH Then
                If dict.exists(k) Then
                    MsgBox "Key not unique '" & k & "'", vbCritical, .Name & " Row " & i & " and Row " & dict(k)
                Else
                    dict.Add k, i
                End If
            End If
        Next
    End With
    
    ' compare master to dump sheet
    outrow = 2
    With icdSheet
        lastrow = .UsedRange.Row + .UsedRange.Rows.Count - 1
        For i = 2 To lastrow
            Set rng = .Range("A" & i).Resize(, COL_MATCH)
            ar = Application.Transpose(rng) 'A-K
            k = Join(Application.Transpose(ar), SEP) ' key
            If Len(k) >= COL_MATCH And Not dict.exists(k) Then
                outputSheet.Range("A" & outrow).Resize(, COL_MATCH).Value2 = rng.Value2
                outputSheet.Cells(outrow, COL_MATCH + 1) = "new invoice"
                outrow = outrow + 1
            End If
        Next
    End With
    
    ' result
    If outrow = 2 Then
        MsgBox "No new invoices", vbInformation
    Else
        MsgBox outrow - 2 & " new invoices", vbInformation
    End If
    
End Sub

关于excel - 比较具有不同记录数和不同顺序的两个数据集,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/70985684/

相关文章:

excel - Excel 2010 中的条件聚合/中位数

vba - 循环浏览工作表

excel - 如何使用具有两个匹配函数的地址函数作为宏或 VBA 的输入?

java - 大阵列比较

java - jackson VS。 Gson

excel - 使用 vba 连接多个范围

excel - 计算列中的非空白文本单元格

regex - 如何使用 Excel 公式来验证自定义的电子邮件地址?

excel - 鼠标滚动在用户窗体 VBA 中不起作用

Ruby 比较运算符? == 与 ===