※JAVAのインストールが必要
!pip install tabula-py
import re from urllib.parse import urljoin import pandas as pd import requests from bs4 import BeautifulSoup from tabula import read_pdf url = "https://web.pref.hyogo.lg.jp/kk03/200129.html" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", } r = requests.get(url, headers=headers) r.raise_for_status() soup = BeautifulSoup(r.content, "html.parser") tag = soup.find("a", class_="icon_pdf", text=re.compile("医療提供体制の確保について")) link = urljoin(url, tag.get("href")) dfs = read_pdf(link, pages="1", lattice=True) s = dfs[2].iloc[0] # type(s) d = s.to_dict() d total = d.pop("合計") print(total)