VBA,高级过滤和删除重复项

标签 vba excel

我在 A 列中有一个充满不同路径的列表。 我在 B 和 C 中有详细信息列表。

如何在新工作表上:1)提取每个唯一路径,2)为每个路径编译 B * C 中的值并删除重复项。 3)在最后一行完成这些操作后,重复下一个路径。

我确实有一个错误的宏,但为了简洁和准确,我不会发布。除非有人想读,否则请提出请求

enter image description here

任何帮助将不胜感激。

这是我所拥有的(我知道它很长,我会尝试稍微清理一下):

Sub FileDetail()
'Does not fill down, go to bottom to unleased fill down
'Skips unreadable files
'This Macro retrieves data from files picked. The data is based on header. Data is also filtered for unique values.
'You must make sure headers are in the first row and delimted.


Dim wb As Workbook, fileNames As Object, errCheck As Boolean
    Dim ws As Worksheet, wks As Worksheet, wksSummary As Worksheet
    Dim y As Range, intRow As Long, i As Integer

Dim r As Range, lr As Long, myrg As Range, z As Range
    Dim boolWritten As Boolean, lngNextRow As Long
    Dim intColNode As Integer, intColScenario As Integer
    Dim intColNext As Integer, lngStartRow As Long
    Dim lngLastNode As Long, lngLastScen As Long
    Dim intColinstrument As Integer, lngLastinstrument As Long



   'Skipped worksheet for file names
   Dim wksSkipped As Worksheet
   Set wksSkipped = ThisWorkbook.Worksheets("Skipped")


     ' Turn off screen updating and automatic calculation
    With Application
        .ScreenUpdating = False
        .Calculation = xlCalculationManual
    End With

     ' Create a new worksheet, if required
    On Error Resume Next
    Set wksSummary = ActiveWorkbook.Worksheets("Unique data")
    On Error GoTo 0
    If wksSummary Is Nothing Then
        Set wksSummary = ActiveWorkbook.Worksheets.Add(After:=ActiveWorkbook.Worksheets(ActiveWorkbook.Worksheets.Count))
        wksSummary.Name = "Unique data"
    End If

     ' Set the initial output range, and assign column headers
    With wksSummary
        Set y = .Cells(.Rows.Count, 3).End(xlUp).Offset(1, 0)
        Set r = y.Offset(0, 1)
        Set z = y.Offset(0, -2)
        lngStartRow = y.Row
        .Range("A1:E1").Value = Array("File Name", "Sheet Name", "Node", "Book", "Instrument")
    End With

'get user input for files to search
Set fileNames = CreateObject("Scripting.Dictionary")
errCheck = UserInput.FileDialogDictionary(fileNames)
If errCheck Then
   Exit Sub
End If
'''
For Each Key In fileNames 'loop through the dictionary




On Error Resume Next
Set wb = Workbooks.Open(fileNames(Key))
If Err.Number <> 0 Then
    Set wb = Nothing    ' or set a boolean error flag
End If
On Error GoTo 0    ' or your custom error handler

If wb Is Nothing Then
wksSkipped.Cells(wksSkipped.Cells(wksSkipped.Rows.Count, "A").End(xlUp).Row + 1, 1) = fileNames(Key)

Else
    Debug.Print "Successfully loaded " & fileNames(Key)
    wb.Application.Visible = False 'make it not visible
    ' more working with wb




 ' Check each sheet in turn
    For Each ws In ActiveWorkbook.Worksheets
        With ws
             ' Only action the sheet if it's not the 'Unique data' sheet
            If .Name <> wksSummary.Name Then
                boolWritten = False



       ''''''''''''''''''testing additional column..trouble here



                                 ' Find the Anchor Date
                intColScenario = 0
                On Error Resume Next
                intColScenario = WorksheetFunction.Match("instrument.instrumentType", .Rows(1), 0)
                On Error GoTo 0

                If intColScenario > 0 Then
                     ' Only action if there is data in column E
                    If Application.WorksheetFunction.CountA(.Columns(intColScenario)) > 1 Then
                       lr = .Cells(.Rows.Count, intColScenario).End(xlUp).Row


                         ' Copy unique values from the formula column to the 'Unique data' sheet, and write sheet & file details
                        .Range(.Cells(1, intColScenario), .Cells(lr, intColScenario)).AdvancedFilter xlFilterCopy, , r, True
                        r.Offset(0, -2).Value = ws.Name
                        r.Offset(0, -3).Value = ws.Parent.Name



                         ' Delete the column header copied to the list
                        r.Delete Shift:=xlUp
                        boolWritten = True
                    End If
                End If

          ''''''''''''''''''''''''''''''''''''below is working'''''''''''''''''''''''

                 ' Find the Desk column
                intColNode = 0
                On Error Resume Next
                intColNode = WorksheetFunction.Match("book.reportingLine.pathName", .Rows(1), 0)
                On Error GoTo 0

                If intColNode > 0 Then
                     ' Only action if there is data in column A
                    If Application.WorksheetFunction.CountA(.Columns(intColNode)) > 1 Then
                        lr = .Cells(.Rows.Count, intColNode).End(xlUp).Row

                         ' Copy unique values from column A to the 'Unique data' sheet, and write sheet & file details (if not already written)
                        .Range(.Cells(1, intColNode), .Cells(lr, intColNode)).AdvancedFilter xlFilterCopy, , y, True
                        If Not boolWritten Then
                            y.Offset(0, -1).Value = ws.Name
                            y.Offset(0, -2).Value = ws.Parent.Name
                        End If

                         ' Delete the column header copied to the list
                        y.Delete Shift:=xlUp
                    End If
                End If

          ' Find the Intrument
                intColinstrument = 0
                On Error Resume Next
                intColinstrument = WorksheetFunction.Match("instrument.instrumentType", .Rows(1), 0)
                On Error GoTo 0

                If intColinstrument > 0 Then
                     ' Only action if there is data in column A
                    If Application.WorksheetFunction.CountA(.Columns(intColinstrument)) > 1 Then
                        lr = .Cells(.Rows.Count, intColinstrument).End(xlUp).Row

                         ' Copy unique values from column A to the 'Unique data' sheet, and write sheet & file details (if not already written)
                        .Range(.Cells(1, intColinstrument), .Cells(lr, intColinstrument)).AdvancedFilter xlFilterCopy, , z, True
                        If Not boolWritten Then
                            z.Offset(0, -3).Value = ws.Name
                            z.Offset(0, -4).Value = ws.Parent.Name
                        End If

                         ' Delete the column header copied to the list
                        z.Delete Shift:=xlUp
                    End If
                End If




         ' Identify the next row, based on the most rows used in columns C & D
                lngLastNode = wksSummary.Cells(wksSummary.Rows.Count, 3).End(xlUp).Row
                lngLastScen = wksSummary.Cells(wksSummary.Rows.Count, 4).End(xlUp).Row
                lngLastinstrument = wksSummary.Cells(wksSummary.Rows.Count, 5).End(xlUp).Row
                lngNextRow = WorksheetFunction.Max(lngLastNode, lngLastScen) + 1

                If (lngNextRow - lngStartRow) > 1 Then

                   ' Fill down the workbook and sheet names
                    z.Resize(lngNextRow - lngStartRow, 2).FillDown


                    ''''''''Optional if you want headers to be filled down.

                    'If (lngNextRow - lngLastNode) > 1 Then


                         ' Fill down the last Node value
                        'wksSummary.Range(wksSummary.Cells(lngLastNode, 3), wksSummary.Cells(lngNextRow - 1, 3)).FillDown
                    'End If
                    'If (lngNextRow - lngLastScen) > 1 Then
                         ' Fill down the last Scenario value
                        'wksSummary.Range(wksSummary.Cells(lngLastScen, 4), wksSummary.Cells(lngNextRow - 1, 4)).FillDown
                    'End If


                End If



                Set y = wksSummary.Cells(lngNextRow, 3)
                Set r = y.Offset(0, 1)
                Set z = y.Offset(0, -2)
                lngStartRow = y.Row
            End If
        End With
    Next ws
wb.Close savechanges:=False 'close the workbook do not save
Set wb = Nothing 'release the object
End If

Next 'End of the fileNames loop
Set fileNames = Nothing

 ' Autofit column widths of the report
wksSummary.Range("A1:E1").EntireColumn.AutoFit

' Reset system settings
With Application
   .Calculation = xlCalculationAutomatic
   .ScreenUpdating = True
   .Visible = True
End With
End Sub

因此,此代码获取文件名、工作表名称和我指定的数据列。

1)但是我在添加额外的列时遇到了麻烦。 (我目前得到了 2 个提取的列),还有

2) 我无法将其设置为各列相互依赖的格式。 ex 它会给我每条路径的独特值(value),但不是每项运动的独特值(value)。

编辑以包含数据(我还想包含第四列和第五列,但为了简单起见将其保留为 3):

+-------------------------------+------------+--------------+
| path                          | sport      | Teams        |
+-------------------------------+------------+--------------+
| stack/over/flow/larrybird     | basketball | celtics      |
+-------------------------------+------------+--------------+
| stack/over/flow/michaeljordan | basketball | bulls        |
+-------------------------------+------------+--------------+
| stack/over/flow/tigerwoods    | golf       | pga          |
+-------------------------------+------------+--------------+
| stack/over/flow/josebautista  | baseball   | bluejays     |
+-------------------------------+------------+--------------+
| stack/over/flow/jordanspeith  | golf       | pga          |
+-------------------------------+------------+--------------+
| stack/over/flow/kevinlove     | basketball | timberwolves |
+-------------------------------+------------+--------------+
| stack/over/flow/lebronjames   | basketball | cavs         |
+-------------------------------+------------+--------------+
| stack/over/flow/stephencurry  | basketball | warriors     |
+-------------------------------+------------+--------------+
| stack/over/flow/larrybird     | baseball   | redsox       |
+-------------------------------+------------+--------------+
| stack/over/flow/michaeljordan | baseball   | whitesox     |
+-------------------------------+------------+--------------+
| stack/over/flow/michaeljordan | chess      | knight       |
+-------------------------------+------------+--------------+
| stack/over/flow/michaeljordan | basketball | hornets      |
+-------------------------------+------------+--------------+
| stack/over/flow/kevinlove     | basketball | cavs         |
+-------------------------------+------------+--------------+
| stack/over/flow/tigerwoods    | golf       | pga          |
+-------------------------------+------------+--------------+

和预期结果(我在其中填写了内容)

+-------------------------------+------------+--------------+
| path                          | sport      | teams        |
+-------------------------------+------------+--------------+
| stack/over/flow/larrybird     | basketball | celtics      |
+-------------------------------+------------+--------------+
|                               | baseball   | red sox      |
+-------------------------------+------------+--------------+
| stack/over/flow/tigerwoods    | golf       | pga          |
+-------------------------------+------------+--------------+
| stack/over/flow/michaeljordan | basketball | bulls        |
+-------------------------------+------------+--------------+
|                               |            | hornets      |
+-------------------------------+------------+--------------+
|                               | baseball   | whitesox     |
+-------------------------------+------------+--------------+
|                               | chess      | knight       |
+-------------------------------+------------+--------------+
| stack/over/flow/kevinlove     | basketball | timberwolves |
+-------------------------------+------------+--------------+
|                               |            | cavs         |
+-------------------------------+------------+--------------+
| stack/over/flow/josebautista  | baseball   | bluejays     |
+-------------------------------+------------+--------------+

第三列(还有第四列和第五列)获取唯一值似乎是一个问题。

最佳答案

简单的方法是复制整个范围,对其进行排序,然后运行一些计算:

Sub Macro1()
  Application.ScreenUpdating = False
  Dim str As String
  With Sheet1
      str = .Range(.Cells(.Rows.Count, 1).End(xlUp), .Cells(1, 3)).Address
      .Range(str).Copy Sheet2.Cells(1, 1)
  End With
  Application.CutCopyMode = False
  With Sheet2
    .Activate
    Dim str2 As String
    str2 = .Range(str).Offset(1).Resize(.Range(str).Rows.Count - 1).Address
    .Range(str2).Value = Evaluate("if(" & str2 & "="""",-1E+99," & str2 & ")")
    .Sort.SortFields.Clear
    .Sort.SortFields.Add .Range(str).Offset(1).Resize(, 1), 0, 1, , 0
    .Sort.SortFields.Add .Range(str).Offset(1, 1).Resize(, 1), 0, 1, , 0
    .Sort.SortFields.Add .Range(str).Offset(1, 2).Resize(, 1), 0, 1, , 0
    .Sort.SetRange .Range(str).Offset(1)
    .Sort.Header = 2
    .Sort.Apply
    .Range(str2).Value = Evaluate("if(" & str2 & "=-1E+99,""""," & str2 & ")")
    Dim val As Variant, i As Long, rng2 As Range
    val = .Range(str).Value
    Set rng2 = .Range(str).Offset(.Range(str).Rows.Count).Resize(1)
    For i = 3 To UBound(val)
      If val(i - 1, 1) = val(i, 1) And val(i - 1, 2) = val(i, 2) And val(i - 1, 3) = val(i, 3) Then Set rng2 = Union(rng2, .Range(str).Rows(i))
    Next
    i = .Range(str).Rows.Count - rng2.Rows.Count
    rng2.EntireRow.Delete xlShiftUp
    With .Range(str).Offset(1).Resize(i - 1, 1)
      .Value = Evaluate("if(" & .Address & "=" & .Offset(-1).Address & ",""""," & .Address & ")")
      With .Offset(, 1)
        .Value = Evaluate("if((" & .Address & "=" & .Offset(-1).Address & ")*(" & .Offset(, -1).Address & "=""""),""""," & .Address & ")")
      End With
    End With
  End With
End Sub

通过电话完成,可能包含错误!
现在改变很多,请复制整个代码并再次测试。

编辑

好吧,一个完全不同的解决方案。应该很快,但其工作方式可能不是很清楚:P

Sub Macro2()

  Dim inVal As Variant, outVal() As Variant, orderArr() As Variant
  Dim startRng As Range
  Dim i As Long, j As Long, k As Long, iCount As Long

  Set startRng = Sheet1.Range("A2:C2") 'upmost row-range of the range to be copied (exclude headers!)
  With startRng.Parent
    inVal = .Range(startRng, .Cells(.Rows.Count, startRng.Column).End(xlUp)).Value
  End With
  ReDim orderArr(1 To UBound(inVal))
  For i = 1 To UBound(inVal)
    iCount = 1
    For j = 1 To UBound(inVal)
      For k = 1 To UBound(inVal, 2)
        If StrComp(inVal(i, k), inVal(j, k), 1) = 1 Then iCount = iCount + 1
        If StrComp(inVal(i, k), inVal(j, k), 1) <> 0 Then Exit For
      Next
    Next
    orderArr(i) = iCount
  Next
  k = 1
  ReDim outVal(1 To UBound(inVal, 2), 1 To UBound(inVal))
  For i = 0 To Application.Max(orderArr)
    If IsNumeric(Application.Match(i, orderArr, 0)) Then
      iCount = Application.Match(i, orderArr, 0)
      For j = 1 To UBound(inVal, 2)
        outVal(j, k) = inVal(iCount, j)
      Next
      k = k + 1
    End If
  Next
  ReDim Preserve outVal(1 To UBound(inVal, 2), 1 To k - 1)
  For i = 1 To UBound(outVal)
    For j = UBound(outVal, 2) To 2 Step -1
      If outVal(i, j - 1) = outVal(i, j) Then
        If i = 1 Then
          outVal(i, j) = ""
        ElseIf outVal(i - 1, j) = "" Then
          outVal(i, j) = ""
        End If
      End If
    Next
  Next
  'upper left cell of the output-range
  Sheet2.Range("A2").Resize(UBound(outVal, 2), UBound(outVal)).Value = Application.Transpose(outVal)
End Sub

随意将起始范围 (Sheet1.Range("A2:C2")) 设置为 Selection,然后只需选择范围并启动宏即可。适用于任何大小(虽然非常大的范围可能会卡住 Excel 一段时间)。

一如既往:如果您有任何疑问,请提出:)

关于VBA,高级过滤和删除重复项,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37330738/

相关文章:

vba - 根据日期更改工作簿中工作表上选项卡的颜色

vba - 我可以将数组写入某个范围并仅重新计算更改的单元格吗?

vba - 在列中的每个单元格中运行方程

vba - 在 Excel/VBA 中从选择中排除列

sql-server - 将非规范化的关系数据从 Excel 导入 SQL Server

excel - 时间值 ("23:50")

python - 将 pandas 数据帧写入 xlsm 文件(启用宏的 Excel)

vba - 使用可变但行数相同的不同列进行格式化,并且列不相邻

vba - 如何传递光标并将焦点设置在 VBA 中的用户窗体文本框上?

java - Apache POI SXSSFWorkbook 无法创建工作表