JFL2018の強さを分析

qiita.com

qiita.com

qiita.com

!pip install pystan
!pip install japanize-matplotlib
# 試合結果をスクレイピング

import pandas as pd
import japanize_matplotlib

df_1st = pd.read_html('http://www.jfl.or.jp/jfl-pc/view/s.php?a=1270&f=2018A001_spc.html', skiprows=1, na_values='-')
df_2nd = pd.read_html('http://www.jfl.or.jp/jfl-pc/view/s.php?a=1271&f=2018A003_spc.html', skiprows=1, na_values='-')

dfs = df_1st + df_2nd

dfs

# 試合結果を結合
jfl_2018 = pd.concat(dfs, keys=[i for i in range(1, len(dfs)+1)], names=['節', '番号'])
jfl_2018.columns=['日にち', '時間', 'ホーム', 'スコア', 'アウェイ', 'スタジアム', '備考']
jfl_2018.drop('備考', axis=1, inplace=True)
jfl_2018.head()

# スコアを分割
df_score = jfl_2018['スコア'].str.split('-', expand=True)
df_score.columns = ['home', 'away']
df_score = df_score.astype(int)
df_score.dtypes

# 試合結果とスコアを結合
jfl_2018 = pd.concat([jfl_2018, df_score], axis=1)
jfl_2018

# スコアをhome・awayに分離
df = df_score.stack().reset_index()
df.drop(columns=['節','番号'], inplace=True)
df.columns = ['team', 'score']
df['count'] = 1
df

# スコアを集計
pv = df.pivot_table(values='count', columns='team', index='score', aggfunc = 'count', fill_value=0)
pv.plot.bar(rot=0)

f:id:imabari_ehime:20190406113150p:plain

import pystan

model_code = """
data {
    int N;  // N Games
    int K;  // K Teams
    int Th[N]; // Home Team ID
    int Ta[N]; // Away Team ID
    int Sh[N]; // Home Team score point
    int Sa[N]; // Away Team score point
}

parameters {
    real atk[K];
    real def[K];
    real home_power[K];
    real<lower=0> sigma;
    real<lower=0> hp_sigma;
}

model {
    for (k in 1:K) {
        atk[k] ~ normal(0, sigma);
        def[k] ~ normal(0, sigma);
        home_power[k] ~ normal(0, hp_sigma);
    }

    for (n in 1:N) {
        Sh[n] ~ poisson(exp(
            (home_power[Th[n]] + atk[Th[n]]) - (def[Ta[n]])
        ));

        Sa[n] ~ poisson(exp(
            (atk[Ta[n]]) - (def[Th[n]] + home_power[Th[n]])
        ));
    }
}

generated quantities {
    real games[K, K, 2];

    for (th in 1:K) {
        for (ta in 1:K) {
            games[th, ta, 1] = poisson_rng(exp((home_power[th] + atk[th]) - (def[ta])));
            games[th, ta, 2] = poisson_rng(exp((atk[ta]) - (def[th] + home_power[th])));
        }
    }
}
"""

sm = pystan.StanModel(model_code=model_code)

labels, team_name = pd.factorize(jfl_2018['ホーム'])
jfl_2018['home_team_id'] = labels + 1
jfl_2018['away_team_id'] = team_name.get_indexer(jfl_2018['アウェイ']) + 1

N = len(jfl_2018)
K = len(team_name)
stan_input = {'N':N,
              'K':K,
              'Th':jfl_2018['home_team_id'],
              'Ta':jfl_2018['away_team_id'],
              'Sh' :jfl_2018['home'],
              'Sa' :jfl_2018['away']}

fit = sm.sampling(data=stan_input, seed=999)

fit.plot()

result = pd.DataFrame(fit.summary()['summary'], index=fit.summary()['summary_rownames'], columns=fit.summary()['summary_colnames'])
result

import numpy as np

tk = {}

params = fit.extract()
atks = params['atk']
defs = params['def']
home_power = params['home_power']
games = params['games']


for k, team in enumerate(team_name):
    tk[team] = {
            "攻撃力": np.mean(atks[:, k]),
            "守備力": np.mean(defs[:, k]),
            "ホームアドバンテージ": np.mean(home_power[:, k]),
    }

tdf = pd.DataFrame(tk).T
tdf

ax = tdf.plot.scatter(x='攻撃力', y='守備力', s=100, figsize=(10, 10))

for k, v in tdf.iterrows():
    ax.annotate(k, xy=(v[2],v[1]), size=15)

f:id:imabari_ehime:20190406113205p:plain