ランナー分析

import pandas as pd
import matplotlib.pyplot as plt

import japanize_matplotlib

# 解像度
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 200

dfs = pd.read_html("https://www.pref.ehime.jp/h14150/malaysiabadminton/seika_runner.html")

df = pd.concat([dfs[0], dfs[2]])

df.drop("性別.1", axis=1, inplace=True)

df["年齢"] = df["年齢"].str.strip("()").astype(int)

df.reset_index(drop=True, inplace=True)

df.loc[df["性別"] == "男", "男"] = df["年齢"].astype(int)
df.loc[df["性別"] == "女", "女"] = df["年齢"].astype(int)

df.info()

df["年齢"].describe()

"""
count    43.000000
mean     34.767442
std      22.761535
min      12.000000
25%      15.000000
50%      27.000000
75%      53.500000
max      90.000000
Name: 年齢, dtype: float64
"""

df.groupby(by=["性別"])["年齢"].describe()
ax = df[df["性別"]=="男"]["年齢"].hist(bins=range(10, 100, 5), alpha=0.6)
ax = df[df["性別"]=="女"]["年齢"].hist(bins=range(10, 100, 5), alpha=0.6, ax=ax)
ax.set_xticks(range(0,100,10))

# グラフを保存
plt.savefig('01.png', dpi=200, bbox_inches="tight")
plt.show()

f:id:imabari_ehime:20191219193528p:plain

df_sort = df.sort_values(by="年齢", inplace=True)

ax = df.plot.barh(x="公募・推薦市町",y=["男","女"], figsize=(5, 10), )
# 垂直
ax.set_xticks(range(0,100,10))
ax.axvline(x=34.77, linestyle="--", color="orange", linewidth = 1, label="")
ax.axvline(x=22.76, linestyle="--", color="green", linewidth = 1, label="")
# グラフを保存
plt.savefig('02.png', dpi=200, bbox_inches="tight")
plt.show()

f:id:imabari_ehime:20191219193536p:plain

ax = df_mean.plot.barh()

# 垂直
ax.axvline(x=34.77, linestyle="--", color="orange", linewidth = 1, label="")
ax.axvline(x=22.76, linestyle="--", color="green", linewidth = 1, label="")
# グラフを保存
plt.savefig('03.png', dpi=200, bbox_inches="tight")
plt.show()

f:id:imabari_ehime:20191219193545p:plain