オンライン診療対応医療機関のPDFをCSVに変換

www.mhlw.go.jp

import requests
from bs4 import BeautifulSoup

from urllib.parse import urljoin

import camelot
import pandas as pd


url = "https://www.mhlw.go.jp/stf/seisakunitsuite/bunya/kenkou_iryou/iryou/rinsyo/index_00014.html"

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"}

r = requests.get(url, headers=headers)

r.raise_for_status()

soup = BeautifulSoup(r.content, "html5lib")

for i in soup.select("ul.m-listLink--hCol2 > li > a"):

    link = urljoin(url, i.get("href"))

    print(i.get_text(), link)

    tables = camelot.read_pdf(link, pages="all")

    df = pd.concat([table.df for table in tables])

    df.to_csv(f"{i.get_text()}.csv", index=None, header=None)