我正在尝试在数据框中添加两列。我无法检查他们的属性。 我该怎么办?
import re
import textwrap
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from pandas import DataFrame
URL = "https://www.soccerbase.com/teams/team.sd?team_id=536&comp_id=1&teamTabs=results"
# URL = "http://worldpopulationreview.com/countries/countries-by-gdp/"
r = requests.get(URL)
# soup = BeautifulSoup(r.content, 'html.parser')
soup = BeautifulSoup(r.content, 'html.parser')
table = soup.find('table', {'class': 'soccerGrid'})
def rowgetdatatext(tr, coltag='td', true=None): # td (data) or th (header)
cols = []
for td in tr.find_all(coltag):
cols.append(td.get_text(strip=true))
return cols
def tabledatatext(table):
rows = []
trs = table.find_all('tr')
headerow = rowgetdatatext(trs[0], 'th')
if headerow: # if there is a header row include first
rows.append(headerow)
trs = trs[1:]
for tr in trs: # for every table row
rows.append(rowgetdatatext(tr, 'td')) # data row
return rows
d = tabledatatext(table)
pd.set_option('display.width', 400)
pd.set_option('display.max_columns', 20)
df = pd.DataFrame(d)
Frame = pd.DataFrame(df.values,
columns=["Competition", "Date", "Omit", "Home Team", "Score", "Away Team", "Omit",
"Omit", "Omit", "DateKeep"
])
Frame = Frame.drop(columns=["Omit", "Date"])
Frame = Frame.drop([0, 1], axis=0)
Frame[['Home Score', 'Away Score']] = Frame['Score'].str.split('-', expand=True)
Frame = Frame.drop(columns="Score")
Frame = Frame[["Competition", "Home Team", "Home Score", "Away Team", "Away Score",
"DateKeep"]]
Frame['Home Team'] = Frame['Home Team'].str[:-20]
Frame['Away Team'] = Frame['Away Team'].str[:-20]
Frame['DateKeep'] = Frame['DateKeep'].str[3:]
Frame['Competition'] = Frame['Competition'].str[:-18]
# Frame['Home Score'] = Frame['Home Score'].str.split()
# Frame['Away Score'] = Frame['Away Score'].str.split()
# pd.to_numeric(Frame['Away Score'], errors='coerce')
F2 = Frame.index(Frame)
print(Frame)
print(F2)
示例输出为:
比赛 主队 主队比分 客队比分 DateKeep 2 英超联赛 英超联赛 曼联 4 切尔西 0 2019-08-11 16:30 3 欧洲 super 杯 欧洲 super 杯 利物浦 2 切尔西 2 2019-08-14 20:00 4 英超 英超 切尔西 1 莱斯特城 1 2019-08-18 16:30 5 英超 英超 诺维奇 2 切尔西 3 2019-08-24 12:30 6 英超 英超 切尔西 2 谢菲尔德联 2 2019-08-31 15:00 7 英超 英超狼队 2 切尔西 5 2019-09-14 15:00
如果我想添加主场得分和客场得分列,它将连接两个值而不是相加。 我哪里没看懂? 谢谢 编辑:添加当前输出和所需输出的屏幕截图 enter image description here
最佳答案
我将使用其中一个分数列的长度来收集感兴趣的每列值的列表,以将其他列列表大小限制为相同。然后压缩这些列表并转换为 df。如果您已将之前的分数列转换为整数,则可以计算最后两列。
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
headers = ['Competition','Home Team','Home Score','Away Team','Away Score','Date Keep','Total Score (Home + Away Score)','Goal Difference (Home - Away Score)']
r = requests.get('https://www.soccerbase.com/teams/team.sd?team_id=536&comp_id=1&teamTabs=results')
soup = bs(r.content, 'lxml')
h_scores = [int(i.text) for i in soup.select('.score a em:first-child')]
a_scores = [int(i.text) for i in soup.select('.score a em + em')]
total_scores = [h+a for h,a in zip(h_scores, a_scores)]
diff_scores = [h-a for h,a in zip(h_scores, a_scores)]
limit = len(a_scores)
comps = [i.text for i in soup.select('.tournament a', limit=limit)]
dates = [i.text for i in soup.select('.dateTime .hide', limit=limit)]
h_teams = [i.text for i in soup.select('.homeTeam a', limit=limit)]
a_teams = [i.text for i in soup.select('.awayTeam a', limit=limit)]
df = pd.DataFrame(zip(comps, h_teams, h_scores, a_teams, a_scores, dates, total_scores, diff_scores), columns = headers)
print(df)
关于python - 我无法在数据框中添加两列,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58846951/