qiita.com
!pip install camelot
!pip install pikepdf
import camelot
import pandas as pd
import pikepdf
with pikepdf.open("data.pdf") as pdf:
pdf.save("output.pdf")
tables = camelot.read_pdf("output.pdf", flavor="stream", strip_text=" .\n")
title = "".join(tables[0].data[0])
print(tables.n)
df0 = pd.DataFrame(tables[0].data[1:])
df0.to_csv("result.csv", encoding="utf_8_sig")
subtitle = df0[0].str.cat(sep="")
df1 = df0.iloc[:, 1:]
df1["grp"] = (~(df1 == "").any(axis=1).shift(1).fillna(False)).cumsum()
df1
df2 = df1.groupby("grp").agg("".join)
df2.to_csv("result.csv", encoding="utf_8_sig")