我正在构建一个 VBA 网络爬虫来获取一个项目的所有名称和一个项目的价格。我对 VBA 非常陌生,并且对此查询的最终细节有疑问。
我希望它访问这个刀网站,获取名称和价格并将它们存储到工作表上,移动到下一个项目,一旦在该页面上完成,继续到下一页,直到所有项目都被刮掉。有人可以指出我在完成此计划时所犯的错误吗?
Sub printnumbs()
'
Dim i As Integer
For i = 0 To 10
Range("D6").Offset(0, (i * 2)).Value = i + 2
Next i
'
End Sub
Sub scrape()
Dim i2 As Integer
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = True
For i2 = 1 To 2
'Max is For i2 = 0 To 400
.navigate "https://www.knifecenter.com/kc_new/store_store.html?ttl=Hunting%20and%20Bushcraft%20Knives&desc=Hunting%20Bushcraft&wnWWWCAT_1datarq=video&wnWWWCAT_1datarq=notepad&s=" & (i2 + (i2 * 30))
Application.Wait (Now + TimeValue("0:00:02"))
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Dim doc As HTMLDocument
Set doc = ie.document
While ie.readyState <> 4
Wend
On Error Resume Next
Dim i As Integer
For i = 0 To 29
Range("B9").Offset(i + (i2 * 30), (0)).Value = doc.getElementsByClassName("maincontent")(0).getElementsByClassName("product_listing")(0).getElementsByClassName("row-fluid")(0).getElementsByClassName("listing_item span4")(0).getElementsByClassName("price-row")(0).getElementsByClassName("left-col")(0).innerText
Range("C9").Offset(i + (i2 * 30), (0)).Value = doc.getElementsByClassName("maincontent")(0).getElementsByClassName("product_listing")(0).getElementsByClassName("row-fluid")(0).getElementsByClassName("listing_item span4")(0).getElementsByClassName("price-row")(0).getElementsByClassName("product_name")(0).innerText
Next i
Next i2
ie.Quit
Application.EnableEvents = True
End With
'
End Sub
最佳答案
试一试。当有一种强大的方法时,为什么要坚持使用 IE。获取所有数据的时间不应超过 5 秒:
Sub Web_Data()
Const URL As String = "https://www.knifecenter.com/kc_new/store_store.html?ttl=Hunting%20and%20Bushcraft%20Knives&desc=Hunting%20Bushcraft&wnWWWCAT_1datarq=video&wnWWWCAT_1datarq=notepad&s="
Dim http As New XMLHTTP60, html As New HTMLDocument, page As Long, row As Long
Dim topic As HTMLHtmlElement
For page = 1 To 151 Step 30
With http
.Open "GET", URL & page, False
.send
html.body.innerHTML = .responseText
End With
For Each topic In html.getElementsByClassName("listing_item span4")
With topic.getElementsByClassName("product_name")
If .Length Then row = row + 1: Cells(row, 1) = .Item(0).innerText
End With
With topic.getElementsByClassName("our_price")
If .Length Then Cells(row, 2) = .Item(0).innerText
End With
Next topic
Next page
End Sub
添加到库的引用:
1. Microsoft HTML Object Library
2. Microsoft XML, V6.0 ''Or whatever version you have
关于excel - VBA网页抓取问题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47503494/