qiita.com
qiita.com
qiita.com
!pip install pystan
!pip install japanize-matplotlib
import pandas as pd
import japanize_matplotlib
df_1st = pd.read_html('http://www.jfl.or.jp/jfl-pc/view/s.php?a=1270&f=2018A001_spc.html', skiprows=1, na_values='-')
df_2nd = pd.read_html('http://www.jfl.or.jp/jfl-pc/view/s.php?a=1271&f=2018A003_spc.html', skiprows=1, na_values='-')
dfs = df_1st + df_2nd
dfs
jfl_2018 = pd.concat(dfs, keys=[i for i in range(1, len(dfs)+1)], names=['節', '番号'])
jfl_2018.columns=['日にち', '時間', 'ホーム', 'スコア', 'アウェイ', 'スタジアム', '備考']
jfl_2018.drop('備考', axis=1, inplace=True)
jfl_2018.head()
df_score = jfl_2018['スコア'].str.split('-', expand=True)
df_score.columns = ['home', 'away']
df_score = df_score.astype(int)
df_score.dtypes
jfl_2018 = pd.concat([jfl_2018, df_score], axis=1)
jfl_2018
df = df_score.stack().reset_index()
df.drop(columns=['節','番号'], inplace=True)
df.columns = ['team', 'score']
df['count'] = 1
df
pv = df.pivot_table(values='count', columns='team', index='score', aggfunc = 'count', fill_value=0)
pv.plot.bar(rot=0)
import pystan
model_code = """
data {
int N; // N Games
int K; // K Teams
int Th[N]; // Home Team ID
int Ta[N]; // Away Team ID
int Sh[N]; // Home Team score point
int Sa[N]; // Away Team score point
}
parameters {
real atk[K];
real def[K];
real home_power[K];
real<lower=0> sigma;
real<lower=0> hp_sigma;
}
model {
for (k in 1:K) {
atk[k] ~ normal(0, sigma);
def[k] ~ normal(0, sigma);
home_power[k] ~ normal(0, hp_sigma);
}
for (n in 1:N) {
Sh[n] ~ poisson(exp(
(home_power[Th[n]] + atk[Th[n]]) - (def[Ta[n]])
));
Sa[n] ~ poisson(exp(
(atk[Ta[n]]) - (def[Th[n]] + home_power[Th[n]])
));
}
}
generated quantities {
real games[K, K, 2];
for (th in 1:K) {
for (ta in 1:K) {
games[th, ta, 1] = poisson_rng(exp((home_power[th] + atk[th]) - (def[ta])));
games[th, ta, 2] = poisson_rng(exp((atk[ta]) - (def[th] + home_power[th])));
}
}
}
"""
sm = pystan.StanModel(model_code=model_code)
labels, team_name = pd.factorize(jfl_2018['ホーム'])
jfl_2018['home_team_id'] = labels + 1
jfl_2018['away_team_id'] = team_name.get_indexer(jfl_2018['アウェイ']) + 1
N = len(jfl_2018)
K = len(team_name)
stan_input = {'N':N,
'K':K,
'Th':jfl_2018['home_team_id'],
'Ta':jfl_2018['away_team_id'],
'Sh' :jfl_2018['home'],
'Sa' :jfl_2018['away']}
fit = sm.sampling(data=stan_input, seed=999)
fit.plot()
result = pd.DataFrame(fit.summary()['summary'], index=fit.summary()['summary_rownames'], columns=fit.summary()['summary_colnames'])
result
import numpy as np
tk = {}
params = fit.extract()
atks = params['atk']
defs = params['def']
home_power = params['home_power']
games = params['games']
for k, team in enumerate(team_name):
tk[team] = {
"攻撃力": np.mean(atks[:, k]),
"守備力": np.mean(defs[:, k]),
"ホームアドバンテージ": np.mean(home_power[:, k]),
}
tdf = pd.DataFrame(tk).T
tdf
ax = tdf.plot.scatter(x='攻撃力', y='守備力', s=100, figsize=(10, 10))
for k, v in tdf.iterrows():
ax.annotate(k, xy=(v[2],v[1]), size=15)