import csv
from urllib.parse import urljoin
import time
import requests
from bs4 import BeautifulSoup
def cleaning(info, team, data):
result = []
for trs in data:
temp = [i.get_text(strip=True) for i in trs.select('th, td')]
temp[0] = eval(temp[0].rstrip('分'))
temp[1] = temp[1].replace('(PK)', '').strip()
result.append(info + [team] + temp)
return result
def scraping(n, url):
r = requests.get(url)
if r.status_code == requests.codes.ok:
soup = BeautifulSoup(r.content, 'html5lib')
score_season = soup.select_one(
'div.score-header > h2.score-meta > span.score-season').get_text(
strip=True).split()
score_season[1] = score_season[1].strip('第節')
score_date = soup.select_one(
'div.score-header > h2.score-meta > span.score-date').get_text(
strip=True).split()
score_table = soup.select_one('table.score-table')
home_team = score_table.select_one('th.score-team1').get_text(strip=True)
away_team = score_table.select_one('th.score-team2').get_text(strip=True)
game_info = [n] + score_season + score_date + [home_team, away_team]
for i in soup.select('div.section > h3'):
if i.text == '得 点':
table = [trs for trs in i.parent.select('div.score-frame > div.score-left > table > tbody > tr')]
home_data = cleaning(game_info, home_team, table)
table = [trs for trs in i.parent.select('div.score-frame > div.score-right > table > tbody > tr')]
away_data = cleaning(game_info, away_team, table)
score_data = home_data + away_data
return (score_data)
return None
if __name__ == "__main__":
url = 'http://www.jfl.or.jp/jfl-pc/view/s.php?a=1270&f=2018A001_spc.html'
r = requests.get(url)
if r.status_code == requests.codes.ok:
soup = BeautifulSoup(r.content, 'html5lib')
with open('result.csv', 'w') as fw:
writer = csv.writer(fw, dialect='excel', lineterminator='\n')
writer.writerow(
['試合', 'シーズン', '節', '日付', '時刻', 'ホーム', 'アウェイ', '所属チーム', '時間', '背番号', '名前'])
n = 0
for link in soup.select('td.detail-link > a'):
if link.text == '詳細':
n += 1
spc_url = urljoin(url, link.get('href'))
score_data = scraping(n, spc_url)
if score_data:
writer.writerows(score_data)
time.sleep(1)
df = pd.read_csv('result.csv')
df['得点'] = 1
pv = df.pivot_table(
values='得点', index=['選手名', 'チーム名'], aggfunc=sum, fill_value=0)
pv = pv.reset_index()
pv.drop(pv.index[pv['選手名'] == 'オウンゴール'], inplace=True)
pv['順位'] = pv['得点'].rank(ascending=False, method='min')
pv.sort_values(
['順位', 'チーム名', '選手名'], ascending=[True, True, True], inplace=True)
pv.set_index('順位', inplace=True)
df_goal = pv.loc[:, ['順位', '選手名', 'チーム名', '得点']]
df_goal.to_excel('jfl_goal_ranking.xlsx')