html - 按类名的数据抓取元素

标签 html excel vba web-scraping

我正在尝试从网站中提取数据,我想在第 1 行复制 '10' x 5'unit (class name is "unit_size medium") ',我可以成功地复制数据,但我也想要促销(类(class)名称是“promo_offers”)“第一个月免费!”在第 2 行中,问题是此促销仅针对特定单元格。因此数据具有误导性,我在第 1 个单元格中获得促销,然后出现错误。但是,我只想为那些提供促销信息的单元复制促销,否则单元格应该为空白或需要设置任何其他值。下面是代码...

请建议如何构建代码。

Sub GetClassNames()

    Dim html As HTMLDocument

    Dim objIE As Object
    Dim element As IHTMLElement
    Dim ie As InternetExplorer
    Dim elements As IHTMLElementCollection
    Dim result As String 'string variable that will hold our result link

    Dim count As Long
    Dim erow As Long

    'initiating a new instance of Internet Explorer and asigning it to objIE
    Set objIE = New InternetExplorer

    'make IE browser visible (False would allow IE to run in the background)
    objIE.Visible = True

    'navigate IE to this web page (a pretty neat search engine really)
    objIE.navigate "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"

    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
    count = 0

    Set html = objIE.document
    Set elements = html.getElementsByClassName("unit_size medium")

    For Each element In elements
        If element.className = "unit_size medium" Then
            erow = Sheet2.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
            Cells(erow, 1) = html.getElementsByClassName("unit_size medium")(count).innerText

            Cells(erow, 2) = html.getElementsByClassName("promo_offers")(count).innerText
            count = count + 1      
        End If
    Next element
End Sub

最佳答案

我会简单地用 On Error Resume Next 包裹起来。尝试访问元素时。在输出数组中已经为它保留了一个位置,因此如果不存在,则该位置保持为空。

Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetData()
    Dim ie As New InternetExplorer, ws As Worksheet
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    With ie
        .Visible = True
        .Navigate2 "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Dim listings As Object, listing As Object, headers(), results(), r As Long, c As Long
        headers = Array("size", "features", "promo", "in store", "web")
        Set listings = .document.getElementById("small_units_accordion_panel").getElementsByTagName("li")
        '.unit_size medium, .features, .promo_offers, .board_rate_wrapper p, .board_rate

        ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)
        For Each listing In listings
            r = r + 1
            On Error Resume Next
            results(r, 1) = listing.getElementsByClassName("unit_size medium")(0).innerText
            results(r, 2) = listing.getElementsByClassName("features")(0).innerText
            results(r, 3) = listing.getElementsByClassName("promo_offers")(0).innerText
            results(r, 4) = listing.getElementsByClassName("board_rate")(0).innerText
            results(r, 5) = listing.getElementsByClassName("price")(0).innerText
            On Error GoTo 0
        Next
        ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
        .Quit
    End With
End Sub

所有箱子:
Option Explicit

'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetData()
    Dim ie As New InternetExplorer, ws As Worksheet
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    With ie
        .Visible = True
        .Navigate2 "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Dim listings As Object, listing As Object, headers(), results()
        Dim r As Long, list As Object, item As Object
        headers = Array("size", "features", "promo", "in store", "web")
        Set list = .document.getElementsByClassName("main_unit")
        '.unit_size medium, .features, .promo_offers, .board_rate_wrapper p, .board_rate
        Dim rowCount As Long
        rowCount = .document.querySelectorAll(".main_unit li").Length
        ReDim results(1 To rowCount, 1 To UBound(headers) + 1)
        For Each listing In list
            For Each item In listing.getElementsByTagName("li")
                r = r + 1
                On Error Resume Next
                results(r, 1) = item.getElementsByClassName("unit_size medium")(0).innerText
                results(r, 2) = item.getElementsByClassName("features")(0).innerText
                results(r, 3) = item.getElementsByClassName("promo_offers")(0).innerText
                results(r, 4) = item.getElementsByClassName("board_rate")(0).innerText
                results(r, 5) = item.getElementsByClassName("price")(0).innerText
                On Error GoTo 0
            Next
        Next
        ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
        .Quit
    End With
End Sub

关于html - 按类名的数据抓取元素,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/54822598/

相关文章:

javascript - 使用函数为变量分配 HTML id

jquery - 如何根据页面滚动位置为对象设置动画

Word 变量集合的 Excel VBA 等效项

vba - VSTO 加载项迁移到 Office 365

javascript - 在 React 中使用 Vidyard 嵌入式播放器

html - 无法排列有序列表

sql - 不能公开具有内联函数的 MS Access 查询

vba - 允许目标表扩展的动态数组公式

excel - 有没有办法检测用户是否单击了禁用按钮?

vba - 使用 VBA 更改 Powerpoint 2013 中幻灯片元素的颜色