プロボウラー資格取得テストの成績のPDFを変換

!wget https://www.jpba.or.jp/information/protest/2021/1st_East_M15W12.pdf -O data.pdf
!pip install tabula-py
import pandas as pd
from tabula import read_pdf

headers = [
    "順位",
    "受験番号",
    "氏名",
    "ふりがな",
    "T/PIN",
    "年齢",
    "登録地",
    "利き腕",
    "1-3G",
    "1S",
    "4-6G",
    "2S",
    "7-9G",
    "3S",
    "10-12G",
    "4S",
    "合計",
    "平均",
    "ランキング",
]

dfs = read_pdf("data.pdf", pages="all", lattice=True, pandas_options={"header": None})

df0 = (
    dfs[0]
    .iloc[2:]
    .set_axis(headers, axis=1)
    .dropna(subset=["順位"])
    .set_index(["順位", "氏名"])
)

df1 = df0["1-3G"].str.split(expand=True).rename(columns={0: "1G", 1: "2G", 2: "3G"})
df2 = df0["4-6G"].str.split(expand=True).rename(columns={0: "4G", 1: "5G", 2: "6G"})
df3 = df0["7-9G"].str.split(expand=True).rename(columns={0: "7G", 1: "8G", 2: "9G"})
df4 = (
    df0["10-12G"].str.split(expand=True).rename(columns={0: "10G", 1: "11G", 2: "12G"})
)

df = pd.concat([df1, df2, df3, df4], axis=1).astype(int)

df.sum(axis=1)

df0["合計"].str.replace(",", "").astype(int)

df0["平均"].astype(float)

df.mean(axis=1).round(2)

df