!wget https://www.jpba.or.jp/information/protest/2021/1st_East_M15W12.pdf -O data.pdf !pip install tabula-py
import pandas as pd from tabula import read_pdf headers = [ "順位", "受験番号", "氏名", "ふりがな", "T/PIN", "年齢", "登録地", "利き腕", "1-3G", "1S", "4-6G", "2S", "7-9G", "3S", "10-12G", "4S", "合計", "平均", "ランキング", ] dfs = read_pdf("data.pdf", pages="all", lattice=True, pandas_options={"header": None}) df0 = ( dfs[0] .iloc[2:] .set_axis(headers, axis=1) .dropna(subset=["順位"]) .set_index(["順位", "氏名"]) ) df1 = df0["1-3G"].str.split(expand=True).rename(columns={0: "1G", 1: "2G", 2: "3G"}) df2 = df0["4-6G"].str.split(expand=True).rename(columns={0: "4G", 1: "5G", 2: "6G"}) df3 = df0["7-9G"].str.split(expand=True).rename(columns={0: "7G", 1: "8G", 2: "9G"}) df4 = ( df0["10-12G"].str.split(expand=True).rename(columns={0: "10G", 1: "11G", 2: "12G"}) ) df = pd.concat([df1, df2, df3, df4], axis=1).astype(int) df.sum(axis=1) df0["合計"].str.replace(",", "").astype(int) df0["平均"].astype(float) df.mean(axis=1).round(2) df