我正在尝试使用此代码抓取网站以提取姓名和联系人...
Sub Test()
Dim htmlDoc As Object
Dim htmlDoc2 As Object
Dim elem As Variant
Dim tag As Variant
Dim dns As String
Dim pageSource As String
Dim pageSource2 As String
Dim url As String
Dim row As Long
row = 2
dns = "https://www.zillow.com/detroit-mi/real-estate-agent-reviews/"
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", dns, True
.send
While .readyState <> 4: DoEvents: Wend
If .statusText <> "OK" Then
MsgBox "ERROR" & .Status & " - " & .statusText, vbExclamation
Exit Sub
End If
pageSource = .responseText
End With
Set htmlDoc = CreateObject("htmlfile")
htmlDoc.body.innerHTML = pageSource
昏暗xx '这里有错误 设置 xx = htmlDoc.getElementsByClassName("ldb-contact-summary")
Set htmlDoc = Nothing
Set htmlDoc2 = Nothing
End Sub
尝试使用这条线时
Set xx = htmlDoc.getElementsByClassName("ldb-contact-summary")
我收到错误“对象不支持该属性或方法”(438) 你能帮帮我吗,因为我不太擅长抓取问题?
最佳答案
要获取姓名及其对应的电话号码,您可以尝试以下代码段:
Sub GetProfileInfo()
Const URL$ = "https://www.zillow.com/detroit-mi/real-estate-agent-reviews/?page="
Dim Http As New XMLHTTP60, Html As New HTMLDocument
Dim post As HTMLDivElement, R&, P&
For p = 1 To 3 'put here the highest number you wanna traverse
With Http
.Open "GET", URL & p, False
.send
Html.body.innerHTML = .responseText
End With
For Each post In Html.getElementsByClassName("ldb-contact-summary")
With post.querySelectorAll(".ldb-contact-name a")
If .Length Then R = R + 1: Cells(R, 1) = .item(0).innerText
End With
With post.getElementsByClassName("ldb-phone-number")
If .Length Then Cells(R, 2) = .item(0).innerText
End With
Next post
Next p
End Sub
引用添加到库中执行上面的脚本:
Microsoft xml, v6.0
Microsoft Html Object Library
关于excel - 使用 XMLHTTP 进行抓取会在特定类名处抛出错误,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/52841969/