lxml中有一个页面我想解析,点击后表格数据会变成不同的形式。
from urllib.request import urlopen
import lxml.html
url="http://f10.eastmoney.com/f10_v2/FinanceAnalysis.aspx?code=sz300059"
material=urlopen(url).read()
root=lxml.html.parse(material)
如果我写
set=root.xpath('//table[@id="BBMX_table"]//tr')
我得到对应的表数据
<li class="first current" onclick="ChangeRptF10AssetStatement('30005902','8','All',this,'');">
我得到的是:
我想得到的表数据是对应的
<li class="" onclick="ChangeRptF10AssetStatement('30005902','8','Year',this,'');">
我想得到的是:
我如何编写我的 xpath 表达式
root.xpath
正确吗?更多信息:当您点击 按年度
时,该表将变为另一张。 onclick="ChangeRptF10AssetStatement('30005902','8','Year',this,'')
我试过 Selenium :
import lxml
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument("--headless")
browser = webdriver.Chrome(options=chrome_options,executable_path='/usr/bin/chromedriver')
browser.get("http://f10.eastmoney.com/f10_v2/FinanceAnalysis.aspx?code=sz300059")
root = lxml.html.document_fromstring(browser.page_source)
mystring = lxml.etree.tostring(root, encoding = "unicode")
with open("/tmp/test.html","w") as fh:
fh.write(mystring)
开通
/tmp/test.html
,里面没有数据,我怎样才能得到我的期望数据?
最佳答案
这是您使用代码获得的 HTML,表格位于 内脚本标签 :
<script type="text/template" id="tmpl_zyzb">
{{if (zyzb==null||zyzb.length<=0)}}
<div>
暂无数据
</div>
{{else}}
<table>
<tbody>
<tr>
<th class="tips-colname-Left">
<span>每股指标</span>
</th>
{{each zyzb as value i}}
<th class="tips-fieldname-Right" data-value="{{value.date}}">
<span>{{value.date.substr(2,8)}}</span>
</th>
{{/each}}
</tr>
要使用 Selenium,您需要等待页面加载。在下面的代码中,您可以找到带有
WebDriverWait
的示例:from selenium.webdriver.support.ui import WebDriverWait
browser.get("http://f10.eastmoney.com/f10_v2/FinanceAnalysis.aspx?code=sz300059")
WebDriverWait(browser, 10).until(lambda d: d.execute_script(
'return ["complete", "interactive"].indexOf(document.readyState) != -1'))
root = lxml.html.document_fromstring(browser.page_source)
print(root.xpath("//*[@class='name']//strong")[0].text)
print(root.xpath("//div[@id='report_zyzb']//th//span")[0].text)
您可以使用 API 请求获取页面中所有表的信息。每个标签的第一个 主要指标 部分的表有一个 URL 不同
type
(0、1 和 2)参数。同样的方法也适用于其他表:import requests
headers = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'Accept': '*/*',
'DNT': '1',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36',
'Referer': 'http://f10.eastmoney.com/f10_v2/FinanceAnalysis.aspx?code=sz300059',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'ru,en-US;q=0.9,en;q=0.8,tr;q=0.7',
}
# Section: 主要指标
section_url = 'http://f10.eastmoney.com/NewFinanceAnalysis/MainTargetAjax'
data_code = 'SZ300059'
with requests.Session() as s:
# 按报告期
data_type = 0
response = s.get(f'{section_url}?type={data_type}&code={data_code}', headers=headers, verify=False)
print(response.text)
# 按年度
data_type = 1
response = s.get(f'{section_url}?type={data_type}&code={data_code}', headers=headers, verify=False)
print(response.text)
# 按单季度
data_type = 2
response = s.get(f'{section_url}?type={data_type}&code={data_code}', headers=headers, verify=False)
print(response.text)
Json 响应(格式化):
[
{
"date":"2018-12-31",
"jbmgsy":"0.1858",
"kfmgsy":"0.1836",
"xsmgsy":"0.1858",
"mgjzc":"2.8010",
"mggjj":"1.0650",
"mgwfply":"0.6603",
"mgjyxjl":"0.5161",
"yyzsr":"31.2亿",
"mlr":"8.51亿",
"gsjlr":"9.59亿",
"kfjlr":"9.47亿",
"yyzsrtbzz":"22.64",
"gsjlrtbzz":"50.52",
"kfjlrtbzz":"53.87",
"yyzsrgdhbzz":"-1.17",
"gsjlrgdhbzz":"-2.19",
"kfjlrgdhbzz":"-0.92",
"jqjzcsyl":"6.32",
"tbjzcsyl":"6.11",
"tbzzcsyl":"2.35",
"mll":"27.25",
"jll":"30.68",
"sjsl":"14.51",
"yskyysr":"0.10",
"xsxjlyysr":"1.12",
"jyxjlyysr":"2.04",
"zzczzy":"0.08",
"yszkzzts":"80.72",
"chzzts":"139.58",
"zcfzl":"60.58",
"ldzczfz":"79.99",
"ldbl":"1.78",
"sdbl":"1.77"
},
{
"date":"2017-12-31",
"jbmgsy":"0.1488",
"kfmgsy":"0.1438",
"xsmgsy":"0.1488",
"mgjzc":"3.1381",
"mggjj":"1.4559",
"mgwfply":"0.6116",
"mgjyxjl":"-1.4363",
"yyzsr":"25.5亿",
"mlr":"4.75亿",
"gsjlr":"6.37亿",
"kfjlr":"6.16亿",
"yyzsrtbzz":"8.29",
"gsjlrtbzz":"-10.77",
"kfjlrtbzz":"3.43",
"yyzsrgdhbzz":"7.48",
"gsjlrgdhbzz":"6.80",
"kfjlrgdhbzz":"9.79",
"jqjzcsyl":"4.86",
"tbjzcsyl":"4.34",
"tbzzcsyl":"1.84",
"mll":"18.64",
"jll":"24.93",
"sjsl":"6.51",
"yskyysr":"0.12",
"xsxjlyysr":"1.05",
"jyxjlyysr":"-5.54",
"zzczzy":"0.07",
"yszkzzts":"98.08",
"chzzts":"125.67",
"zcfzl":"64.92",
"ldzczfz":"80.05",
"ldbl":"1.67",
"sdbl":"1.66"
},
{
"date":"2016-12-31",
"jbmgsy":"0.2059",
"kfmgsy":"0.1717",
"xsmgsy":"0.2059",
"mgjzc":"3.6042",
"mggjj":"1.9186",
"mgwfply":"0.6112",
"mgjyxjl":"-1.1882",
"yyzsr":"23.5亿",
"mlr":"6.47亿",
"gsjlr":"7.14亿",
"kfjlr":"5.95亿",
"yyzsrtbzz":"-19.62",
"gsjlrtbzz":"-61.39",
"kfjlrtbzz":"-66.86",
"yyzsrgdhbzz":"-1.13",
"gsjlrgdhbzz":"-24.72",
"kfjlrgdhbzz":"-26.92",
"jqjzcsyl":"6.60",
"tbjzcsyl":"5.57",
"tbzzcsyl":"2.81",
"mll":"27.49",
"jll":"30.29",
"sjsl":"10.74",
"yskyysr":"0.11",
"xsxjlyysr":"1.04",
"jyxjlyysr":"-3.51",
"zzczzy":"0.09",
"yszkzzts":"90.54",
"chzzts":"75.18",
"zcfzl":"52.45",
"ldzczfz":"97.77",
"ldbl":"1.56",
"sdbl":"1.55"
},
{
"date":"2015-12-31",
"jbmgsy":"1.0897",
"kfmgsy":"1.0585",
"xsmgsy":"1.0897",
"mgjzc":"4.4066",
"mggjj":"2.3754",
"mgwfply":"0.9065",
"mgjyxjl":"0.2953",
"yyzsr":"29.3亿",
"mlr":"20.5亿",
"gsjlr":"18.5亿",
"kfjlr":"18.0亿",
"yyzsrtbzz":"378.08",
"gsjlrtbzz":"1015.45",
"kfjlrtbzz":"1002.51",
"yyzsrgdhbzz":"13.62",
"gsjlrgdhbzz":"17.11",
"kfjlrgdhbzz":"14.51",
"jqjzcsyl":"66.42",
"tbjzcsyl":"22.63",
"tbzzcsyl":"12.36",
"mll":"70.05",
"jll":"63.18",
"sjsl":"14.85",
"yskyysr":"0.07",
"xsxjlyysr":"0.98",
"jyxjlyysr":"0.19",
"zzczzy":"0.20",
"yszkzzts":"27.67",
"chzzts":"--",
"zcfzl":"65.55",
"ldzczfz":"96.64",
"ldbl":"1.31",
"sdbl":"1.31"
},
{
"date":"2014-12-31",
"jbmgsy":"0.1370",
"kfmgsy":"0.1346",
"xsmgsy":"0.1370",
"mgjzc":"1.5540",
"mggjj":"0.2420",
"mgwfply":"0.2640",
"mgjyxjl":"1.9535",
"yyzsr":"6.12亿",
"mlr":"1.94亿",
"gsjlr":"1.66亿",
"kfjlr":"1.63亿",
"yyzsrtbzz":"146.31",
"gsjlrtbzz":"3213.59",
"kfjlrtbzz":"--",
"yyzsrgdhbzz":"39.62",
"gsjlrgdhbzz":"82.92",
"kfjlrgdhbzz":"90.55",
"jqjzcsyl":"9.38",
"tbjzcsyl":"8.82",
"tbzzcsyl":"3.85",
"mll":"31.68",
"jll":"27.07",
"sjsl":"16.01",
"yskyysr":"0.22",
"xsxjlyysr":"1.08",
"jyxjlyysr":"3.86",
"zzczzy":"0.14",
"yszkzzts":"45.05",
"chzzts":"--",
"zcfzl":"69.60",
"ldzczfz":"99.89",
"ldbl":"1.38",
"sdbl":"1.38"
},
{
"date":"2013-12-31",
"jbmgsy":"0.0100",
"kfmgsy":"-0.0039",
"xsmgsy":"0.0100",
"mgjzc":"2.5136",
"mggjj":"1.1785",
"mgwfply":"0.2745",
"mgjyxjl":"0.7084",
"yyzsr":"2.48亿",
"mlr":"-339万",
"gsjlr":"500万",
"kfjlr":"-262万",
"yyzsrtbzz":"11.57",
"gsjlrtbzz":"-86.69",
"kfjlrtbzz":"-108.51",
"yyzsrgdhbzz":"28.64",
"gsjlrgdhbzz":"--",
"kfjlrgdhbzz":"--",
"jqjzcsyl":"0.29",
"tbjzcsyl":"0.30",
"tbzzcsyl":"0.24",
"mll":"-1.36",
"jll":"2.01",
"sjsl":"-0.42",
"yskyysr":"0.39",
"xsxjlyysr":"0.94",
"jyxjlyysr":"1.92",
"zzczzy":"0.12",
"yszkzzts":"62.86",
"chzzts":"--",
"zcfzl":"30.57",
"ldzczfz":"99.25",
"ldbl":"3.02",
"sdbl":"3.02"
},
{
"date":"2012-12-31",
"jbmgsy":"0.1100",
"kfmgsy":"0.0900",
"xsmgsy":"0.1100",
"mgjzc":"5.1175",
"mggjj":"3.3624",
"mgwfply":"0.6399",
"mgjyxjl":"0.0600",
"yyzsr":"2.23亿",
"mlr":"3533万",
"gsjlr":"3758万",
"kfjlr":"3074万",
"yyzsrtbzz":"-20.55",
"gsjlrtbzz":"-64.72",
"kfjlrtbzz":"-68.18",
"yyzsrgdhbzz":"-12.07",
"gsjlrgdhbzz":"-45.99",
"kfjlrgdhbzz":"-50.55",
"jqjzcsyl":"2.20",
"tbjzcsyl":"2.19",
"tbzzcsyl":"2.07",
"mll":"15.86",
"jll":"16.88",
"sjsl":"13.29",
"yskyysr":"0.27",
"xsxjlyysr":"0.77",
"jyxjlyysr":"0.09",
"zzczzy":"0.12",
"yszkzzts":"56.91",
"chzzts":"--",
"zcfzl":"4.54",
"ldzczfz":"97.80",
"ldbl":"20.02",
"sdbl":"20.02"
},
{
"date":"2011-12-31",
"jbmgsy":"0.5100",
"kfmgsy":"0.4600",
"xsmgsy":"0.5100",
"mgjzc":"8.1000",
"mggjj":"5.9674",
"mgwfply":"0.9669",
"mgjyxjl":"0.7431",
"yyzsr":"2.80亿",
"mlr":"1.10亿",
"gsjlr":"1.07亿",
"kfjlr":"9661万",
"yyzsrtbzz":"51.55",
"gsjlrtbzz":"59.62",
"kfjlrtbzz":"35.11",
"yyzsrgdhbzz":"12.27",
"gsjlrgdhbzz":"11.64",
"kfjlrgdhbzz":"4.62",
"jqjzcsyl":"6.44",
"tbjzcsyl":"6.27",
"tbzzcsyl":"6.08",
"mll":"39.14",
"jll":"38.01",
"sjsl":"12.25",
"yskyysr":"0.39",
"xsxjlyysr":"1.12",
"jyxjlyysr":"0.56",
"zzczzy":"0.16",
"yszkzzts":"38.93",
"chzzts":"--",
"zcfzl":"6.76",
"ldzczfz":"100.00",
"ldbl":"13.13",
"sdbl":"13.13"
},
{
"date":"2010-12-31",
"jbmgsy":"0.5100",
"kfmgsy":"0.5400",
"xsmgsy":"0.5100",
"mgjzc":"11.5200",
"mggjj":"9.4387",
"mgwfply":"0.9209",
"mgjyxjl":"0.4991",
"yyzsr":"1.85亿",
"mlr":"7032万",
"gsjlr":"6674万",
"kfjlr":"7150万",
"yyzsrtbzz":"12.01",
"gsjlrtbzz":"-7.13",
"kfjlrtbzz":"6.78",
"yyzsrgdhbzz":"1.73",
"gsjlrgdhbzz":"-10.81",
"kfjlrgdhbzz":"0.68",
"jqjzcsyl":"5.27",
"tbjzcsyl":"4.14",
"tbzzcsyl":"6.67",
"mll":"38.02",
"jll":"36.10",
"sjsl":"9.82",
"yskyysr":"0.37",
"xsxjlyysr":"1.19",
"jyxjlyysr":"0.38",
"zzczzy":"0.18",
"yszkzzts":"50.99",
"chzzts":"--",
"zcfzl":"4.09",
"ldzczfz":"100.00",
"ldbl":"23.80",
"sdbl":"23.80"
}
]
关于python - 如何提取lxml中指定的div表数据?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/22902617/