目前,我正在努力解析 data.cnbc.com/quotes/sdrl 中的报价表,并将 innerhtml 放入我指定的股票代码旁边的列中。
因此,我会从 A2 中获取符号,然后将 yield 数据放入 C2,然后移至下一个符号。
HTML 看起来像:
<table id="fundamentalsTableOne">
<tbody>
<tr scope="row">
<th scope="row">EPS</th>
<td>8.06</td>
</tr>
<tr scope="row">
<th scope="row">Market Cap</th>
<td>5.3B</td>
</tr>
<tr scope="row">
<th scope="row">Shares Out</th>
<td>492.8M</td>
</tr>
<tr scope="row">
<th scope="row">Price/Earnings</th>
<td>1.3x</td>
</tr>
</tbody>
</table>
<table id="fundamentalsTableTwo">
<tbody>
<tr scope="row">
<th scope="row">Revenue (TTM)</th>
<td>5.0B</td>
</tr>
<tr scope="row">
<th scope="row">Beta</th>
<td>1.84</td>
</tr>
<tr scope="row">
<th scope="row">Dividend</th>
<td>--</td>
</tr>
<tr scope="row">
<th scope="row">Yield</th>
<td><span class="pos">0.00%</span></td>
</tr>
</tbody>
</table>
目前,我有:
Sub getInfoWeb()
Dim cell As Integer
Dim xhr As MSXML2.XMLHTTP60
Dim doc As MSHTML.HTMLDocument
Dim table As MSHTML.HTMLTable
Dim tableCells As MSHTML.IHTMLElementCollection
Set xhr = New MSXML2.XMLHTTP60
For cell = 2 To 5
ticker = Cells(cell, 1).Value
With xhr
.Open "GET", "http://data.cnbc.com/quotes/" & ticker, False
.send
If .readyState = 4 And .Status = 200 Then
Set doc = New MSHTML.HTMLDocument
doc.body.innerHTML = .responseText
Else
MsgBox "Error" & vbNewLine & "Ready state: " & .readyState & _
vbNewLine & "HTTP request status: " & .Status
End If
End With
Set table = doc.getElementById("fundamentalsTableOne")
Set tableCells = table.getElementsByTagName("td")
For Each tableCell In tableCells
Cells(cell, 2).Value = tableCell.NextSibling.innerHTML
Next tableCell
Next cell
End Sub
但是,我收到“访问被拒绝”错误,并且在我的 set tablecells 行中收到运行时 91。这是因为每一行只有一个元素并且表格单元格被设置为一个集合吗?另外,“访问被拒绝”错误是由于 javascript 生成的 HTML 造成的吗?我认为这不会成为问题。
如果有人知道如何使其正常工作,我们将不胜感激。谢谢。
最佳答案
以下示例展示了如何获取所需的数据:
GetData "sdrl"
Sub GetData(sSymbol)
Dim sRespText, arrName, oDict, sResult, sItem
XmlHttpRequest "GET", "http://data.cnbc.com/quotes/" & sSymbol, "", "", "", sRespText
ParseToNestedArr "<span data-field=""name"">([\s\S]*?)</span>", sRespText, arrName
XmlHttpRequest "GET", "http://apps.cnbc.com/company/quote/newindex.asp?symbol=" & sSymbol, "", "", "", sRespText
ParseToDict "<tr[\s\S]*?><th[\s\S]*?>([\s\S]*?)</th><td>(?:<span[\s\S]*?>)*([\s\S]*?)(?:</span>)*</td></tr>", sRespText, oDict
sResult = arrName(0)(0) & vbCrLf & vbCrLf
For Each sItem in oDict.Keys
sResult = sResult & sItem & " = " & oDict(sItem) & vbCrLf
Next
MsgBox sResult
End Sub
Sub ParseToDict(sPattern, sResponse, oList)
Dim oMatch, arrSMatches
Set oList = CreateObject("Scripting.Dictionary")
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.IgnoreCase = True
.Pattern = sPattern
For Each oMatch In .Execute(sResponse)
oList(oMatch.SubMatches(0)) = oMatch.SubMatches(1)
Next
End With
End Sub
Sub ParseToNestedArr(sPattern, sResponse, arrMatches)
Dim oMatch, arrSMatches, sSubMatch
arrMatches = Array()
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.IgnoreCase = True
.Pattern = sPattern
For Each oMatch In .Execute(sResponse)
arrSMatches = Array()
For Each sSubMatch in oMatch.SubMatches
PushItem arrSMatches, sSubMatch
Next
PushItem arrMatches, arrSMatches
Next
End With
End Sub
Sub PushItem(arrList, varItem)
ReDim Preserve arrList(UBound(arrList) + 1)
arrList(UBound(arrList)) = varItem
End Sub
Sub XmlHttpRequest(sMethod, sUrl, arrSetHeaders, sFormData, sRespHeaders, sRespText)
Dim arrHeader
With CreateObject("Msxml2.ServerXMLHTTP.3.0")
.SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS
.Open sMethod, sUrl, False
If IsArray(arrSetHeaders) Then
For Each arrHeader In arrSetHeaders
.SetRequestHeader arrHeader(0), arrHeader(1)
Next
End If
.Send sFormData
sRespHeaders = .GetAllResponseHeaders
sRespText = .ResponseText
End With
End Sub
它使用后期绑定(bind),因为初始目标语言是 VBScript,但如果您愿意,将它们更改为早期绑定(bind)并不难。 第二个链接 http://apps.cnbc.com/company/quote/newindex.asp?symbol=SDRL您可以在网页内容中找到 iframe 源。
关于html - 使用 VBA 解析 HTML 内容,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/29545228/