!pip install lxml
!apt install fonts-ipafont-gothic
!rm /content/.cache/matplotlib/fontList.json
"""再起動"""
import time
import csv
import requests
from bs4 import BeautifulSoup
n = 10 + 1
with open('fcimabari_goal.tsv', 'w') as fw:
writer = csv.writer(fw, dialect='excel-tab', lineterminator='\n')
writer.writerow(['節', 'ホーム', 'アウェイ', '時間', 'チーム名', 'スコア', '背番号', '名前'])
for i in range(1, n):
url = 'http://www.fcimabari.com/team/game/result/JFL180{0:02d}.html'.format(i)
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
}
r = requests.get(url, headers=headers)
if r.status_code == requests.codes.ok:
soup = BeautifulSoup(r.content, 'lxml')
home = soup.select_one('table.tableTypeResult > tr:nth-of-type(2) > td:nth-of-type(1)').get_text(strip=True)
away = soup.select_one('table.tableTypeResult > tr:nth-of-type(2) > td:nth-of-type(3)').get_text(strip=True)
tables = soup.select_one('table.tableType03')
for trs in tables.select('tr'):
temp = []
for tds in trs.select('td'):
temp.append(tds.get_text(strip=True))
if len(temp) > 0:
temp[0] = temp[0].strip('分')
writer.writerow([i, home, away] + temp)
time.sleep(3)
import pandas as pd
df = pd.read_table('fcimabari_goal.tsv', dtype = {'節' : 'int', 'ホーム' : 'object', 'アウェイ' : 'object', '時間' : 'int', 'チーム名' : 'object', 'スコア' : 'object', '背番号' : 'object', '名前' : 'object'})
df
from google.colab import files
df.to_csv('goal_list.csv')
files.download('goal_list.csv')
df1 = df.iloc[:, 3:5]
df1['カウント'] = 1
'''
# FC今治以外を欠測値に書き換え
df1['チーム名'] = df1['チーム名'].where(df1['チーム名'] == 'FC今治')
# 欠測値を敵チームに書き換え
df1.fillna('敵チーム', inplace=True)
df1
'''
df1.loc[df1['チーム名'] != 'FC今治', 'チーム名'] = '敵チーム'
df1.head()
pv = df1.pivot_table(values = 'カウント', index = '時間', columns='チーム名', aggfunc = sum, fill_value = 0)
pv.head()
pv2 = pd.DataFrame(data=[[i, 0, 0] for i in range(1,100)], columns=['時間', '敵チーム', 'FC今治'])
pv2.set_index('時間', inplace=True)
pv2
pv3 = pv2.append(pv)
grouped = pv3.groupby(level=0)
pv_goal = grouped.last()
pv_goal
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'IPAPGothic'
pv_goal[::-1].plot.barh(figsize=(5, 20), xticks=[0, 1, 2, 3, 4])
df.sort_values(by=['時間'], ascending=True).reset_index(drop=True).head(10)
from google.colab import files
pv_goal.to_csv('goal.csv')
files.download('goal.csv')
goal_time = df.loc[:, ['節','時間','チーム名']]
goal_time
first_goal = goal_time.groupby('節').apply(lambda x: x[x['時間'] == x['時間'].min()])
first_goal
print(first_goal['チーム名'].value_counts()['FC今治'], '/', df['節'].max())
df_time = df[df['チーム名']=='FC今治']['時間']
df_time
df_time[df_time <= 48].count()
df_time[df_time > 48].count()
df_time.count()
df_time.describe()
df_count = df_time.value_counts().sort_index()
df_count