救急病院202206

import pathlib
from urllib.parse import urljoin

import pandas as pd
import requests
from bs4 import BeautifulSoup

base_url = "http://www.qq.pref.ehime.jp/qq38/WP0805/RP080501BL"

payload = {
    "_blockCd": "",
    "forward_next": "",
    "torinBlockDetailInfo.torinBlockDetail[0].blockCheckFlg": "0",
    "torinBlockDetailInfo.torinBlockDetail[1].blockCheckFlg": "0",
    "torinBlockDetailInfo.torinBlockDetail[2].blockCheckFlg": "0",
    "torinBlockDetailInfo.torinBlockDetail[3].blockCheckFlg": "1",
    "torinBlockDetailInfo.torinBlockDetail[4].blockCheckFlg": "0",
    "torinBlockDetailInfo.torinBlockDetail[5].blockCheckFlg": "0",
    "torinBlockDetailInfo.torinBlockDetail[6].blockCheckFlg": "0",
    "torinBlockDetailInfo.torinBlockDetail[7].blockCheckFlg": "0",
    "torinBlockDetailInfo.torinBlockDetail[8].blockCheckFlg": "0",
    "torinBlockDetailInfo.torinBlockDetail[9].blockCheckFlg": "0",
    "torinBlockDetailInfo.torinBlockDetail[10].blockCheckFlg": "0",
    "torinBlockDetailInfo.torinBlockDetail[11].blockCheckFlg": "0",
    "torinBlockDetailInfo.torinBlockDetail[12].blockCheckFlg": "0",
}

# 地域選択ページのセッション作成
with requests.Session() as s:

    r = s.get(base_url)

    soup = BeautifulSoup(r.content, "html.parser")

    # トークンを取得
    token = soup.find("input", attrs={"name": "_csrf"}).get("value")

    # トークンをセット
    payload["_csrf"] = token

    # URL生成
    url = urljoin(
        base_url, soup.find("form", attrs={"id": "_wp0805Form"}).get("action")
    )

    # データ送信
    r = s.post(url, data=payload)


dfs = pd.read_html(r.text, attrs={"summary": "検索結果一覧を表示しています。"}, na_values="▲ページトップへ")


def dfs_join(dfs):

    result = []

    for df in dfs:

        tmp = df.iloc[2:].rename(columns=df.iloc[1])
        tmp.dropna(how="all", inplace=True)
        tmp["日付"] = df.iat[0, 0]

        result.append(tmp)

    return pd.concat(result)


def split_tel(se):

    tmp = se.str.extractall("(TEL([昼夜]))\s+([0-9\-]+)").reset_index()
    df = tmp.pivot(index="level_0", columns=0, values=1)

    result = df.reindex(columns=["TEL(昼)", "TEL(夜)"])

    return result


df0 = dfs_join(dfs[1:]).reset_index(drop=True)

df0["医療機関情報"] = df0["医療機関情報"].str.replace("医療法人\s+?", "", regex=True)

df0[["病院名", "住所", "電話番号"]] = df0["医療機関情報"].str.split(n=2, expand=True)

df_tel = split_tel(df0["電話番号"])

df0 = df0.join(df_tel)

df0[["日中", "夜間"]] = df0["外来受付時間"].str.split(expand=True)

df0[["日付", "曜日"]] = df0["日付"].str.split(expand=True)

df_date = df0["日付"].str.extract("(\d{4})年(\d{1,2})月(\d{1,2})日").astype(int)

df_date.rename(columns={0: "year", 1: "month", 2: "day"}, inplace=True)

df0["日付"] = pd.to_datetime(df_date)

df0

col = ["日付", "曜日", "病院名", "住所", "TEL(昼)", "TEL(夜)", "診療科目", "日中", "夜間"]

df1 = df0.reindex(columns=col)

df2 = df1.melt(
    id_vars=df1.columns.values[:7], var_name="時間帯", value_name="診察時間"
).dropna(subset=["診察時間"])

df2["時間帯"].mask(df2["診察時間"].str.contains("翌日"), "夜間", inplace=True)

df3 = (
    df2.pivot(index=df2.columns[:7], columns="時間帯", values="診察時間")
    .reset_index()
    .reindex(columns=col)
)

df3.sort_values(by=["日付", "日中", "夜間"], inplace=True)

dt_str = df3["日付"][0].strftime("%Y-%m-%d")

p = pathlib.Path("data", f"{dt_str}.csv")
p.parent.mkdir(parents=True, exist_ok=True)

df3.to_csv(p, encoding="utf_8_sig", index=False)