我想使用pyqt5
获取网页。
网址是 https://land.3fang.com/LandAssessment/b6d8b2c8-bd4f-4bd4-9d22-ca49a7a2dc1f.html
。
该网页将使用 javascript 生成两个值。
只需在文本框中输入5
,然后按红色按钮即可。
将返回两个红色值。
请引用图片。
下面的代码用于获取网页。
可是我等了半天也没有反应。
我应该在我的代码中更改什么?
非常感谢。
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
from PyQt5.QtWebEngineWidgets import QWebEngineView
from bs4 import BeautifulSoup
import pandas as pd
class Render(QWebEngineView):
def __init__(self, url):
self.html = None
self.first_pass = True
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._load_finished)
self.load(QUrl(url))
self.app.exec_()
def _load_finished(self, result):
if self.first_pass:
self._first_finished()
self.first_pass = False
else:
self._second_finished()
def _first_finished(self):
self.page().runJavaScript('document.getElementById("txtDistance").value = "5";')
self.page().runJavaScript("void(0)")
self.page().runJavaScript("CheckUserWhere();")
def _second_finished(self):
self.page().toHtml(self.callable)
def callable(self, data):
self.html = data
self.app.quit()
url = "https://land.3fang.com/LandAssessment/b6d8b2c8-bd4f-4bd4-9d22-ca49a7a2dc1f.html"
web = Render(url)
soup = BeautifulSoup(web.html, 'html.parser')
element = soup.find('div', {'id':"divResult"})
df = pd.read_html(str(element))
最佳答案
看来你有几个误解:
- 执行js时,页面不会重新加载,所以不会调用_second_finished函数。
- 如果您不想显示窗口,那么最好使用 QWebEnginePage。
考虑到上面获得的 html 是:
<div class="p8-5" id="divResult" style="display:block;">
<div align="center" display="block" id="rsloading" style="display: block;">
<img src="//img2.soufunimg.com/qyb/loading.gif"/>
正在为您加载数据...
</div>
<table border="0" cellpadding="0" cellspacing="0" class="tablebox01" display="none" id="tbResult" style="display: none;" width="600">
<tbody><tr>
<td style="width:260px;"><span class="gray8">建设用地面积:</span>14748平方米</td>
<td style="width:340px;"><span class="gray8">所在城市:</span>山西省 长治市 </td>
</tr>
<tr>
<td><span class="gray8">规划建筑面积:</span>51617平方米</td>
<td><span class="gray8">土地评估楼面价:</span><b class="redc00 font14" id="_bpgj">867.61</b> 元/平方米</td>
</tr>
<tr>
<td><span class="gray8">容积率:</span>大于1并且小于或等于3.5</td>
<td><span class="gray8">土地评估总价:</span><b class="redc00 font14" id="_bSumPrice">4478.34</b> 万元</td>
</tr>
<tr>
<td><span class="gray8">规划用途:</span>住宅用地</td>
<td><span class="gray8">推出楼面价:</span>27.51元/平方米</td>
</tr>
</tbody></table>
</div>
所以最简单的做法是通过 ID“_bpgj”和“_bSumPrice”进行过滤
import sys
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
from bs4 import BeautifulSoup
class Render(QtWebEngineWidgets.QWebEnginePage):
def __init__(self, url):
self.html = ""
self.first_pass = True
self.app = QtWidgets.QApplication(sys.argv)
super(Render, self).__init__()
self.loadFinished.connect(self._load_finished)
self.loadProgress.connect(print)
self.load(QtCore.QUrl(url))
self.app.exec_()
def _load_finished(self, result):
if result:
self.call_js()
def call_js(self):
self.runJavaScript('document.getElementById("txtDistance").value = "5";')
self.runJavaScript("void(0)")
self.runJavaScript("CheckUserWhere();")
self.toHtml(self.callable)
def callable(self, data):
self.html = data
self.app.quit()
url = "https://land.3fang.com/LandAssessment/b6d8b2c8-bd4f-4bd4-9d22-ca49a7a2dc1f.html"
web = Render(url)
soup = BeautifulSoup(web.html, 'html.parser')
_bpgj = soup.find('b', {'id':"_bpgj"}).string
_bSumPrice = soup.find('b', {'id':"_bSumPrice"}).string
print(_bpgj, _bSumPrice)
输出:
867.61 4478.34
关于javascript - 如何使用pyqt5获取网页?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55558956/