import datetime
import pathlib
from urllib.parse import urljoin
import pandas as pd
import requests
from bs4 import BeautifulSoup
base_url = "http://www.qq.pref.ehime.jp/qq38/WP0805/RP080501BL"
payload = {
"_blockCd": "",
"forward_next": "",
"torinBlockDetailInfo.torinBlockDetail[0].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[1].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[2].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[3].blockCheckFlg": "1",
"torinBlockDetailInfo.torinBlockDetail[4].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[5].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[6].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[7].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[8].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[9].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[10].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[11].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[12].blockCheckFlg": "0",
}
with requests.Session() as s:
r = s.get(base_url)
soup = BeautifulSoup(r.content, "html.parser")
token = soup.find("input", attrs={"name": "_csrf"}).get("value")
payload["_csrf"] = token
url = urljoin(
base_url, soup.find("form", attrs={"id": "_wp0805Form"}).get("action")
)
r = s.post(url, data=payload)
soup = BeautifulSoup(r.content, "html.parser")
tables = soup.find_all("table", class_="comTblGyoumuCommon", summary="検索結果一覧を表示しています。")
result = []
for table in tables:
date, week = table.td.get_text(strip=True).split()
for trs in table.find_all("tr", id=[1, 2, 3]):
data = (
[None]
+ [list(td.stripped_strings) for td in trs.find_all("td", recursive=False)]
+ [date, week]
)
result.append(data[-5:])
df0 = (
pd.DataFrame(result)
.fillna(method="ffill")
.set_axis(["医療機関情報", "診療科目", "外来受付時間", "日付", "曜日"], axis=1)
)
df0["date"] = pd.to_datetime(
df0["日付"]
.str.extract("(?P<year>\d{4})年(?P<month>\d{1,2})月(?P<day>\d{1,2})日")
.astype(int)
).dt.date
df1 = (
df0["医療機関情報"]
.apply(pd.Series)
.drop([2, 4], axis=1)
.rename(columns={0: "病院名", 1: "住所", 3: "TEL(昼)", 5: "TEL(夜)"})
)
df2 = df0["診療科目"].apply(pd.Series).rename(columns={0: "診療科目"})
df3 = df0["外来受付時間"].apply(pd.Series).rename(columns={0: "日中", 1: "夜間"})
df4 = pd.concat([df0[["日付", "曜日", "date"]], df1, df2, df3], axis=1)
df4["診療科目ID"] = df4["診療科目"].map({"指定なし": 0, "内科": 2, "小児科": 7})
df4["診療科目ID"].mask(df4["診療科目"].str.contains("外科", na=False), 1, inplace=True)
df4["診療科目ID"].mask(df4["診療科目"].str.contains("内科", na=False), 2, inplace=True)
df4["診療科目ID"].mask(
df4["住所"].str.contains("吉海町|宮窪町|伯方町|上浦町|大三島町|関前", na=False), 9, inplace=True
)
df4["診療科目ID"] = df4["診療科目ID"].fillna(8).astype(int)
df4["開始時間"] = pd.to_timedelta(df4["日中"].str.split("~").str[0] + ":00")
flag = df4["開始時間"] >= pd.Timedelta("17:00:00")
df4.loc[flag, "夜間"] = df3.loc[flag, "日中"]
df4.loc[flag, "日中"] = df3.loc[flag, "夜間"]
df4.sort_values(by=["date", "診療科目ID", "開始時間"]).reset_index(drop=True, inplace=True)
df = df4.reindex(
columns=["日付", "曜日", "病院名", "住所", "TEL(昼)", "TEL(夜)", "診療科目", "日中", "夜間"]
)
JST = datetime.timezone(datetime.timedelta(hours=+9))
dt_str = datetime.datetime.now(JST).date().isoformat()
p = pathlib.Path("data", f"{dt_str}.csv")
p.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(p, index=False, encoding="utf_8_sig")