www.pref.toyama.jp
!pip install jaconv
!pip install pandas
import datetime
import json
import pathlib
import jaconv
import pandas as pd
df = pd.read_html(
"http://www.pref.toyama.jp/cms_sec/1205/kj00021798.html", index_col=0, na_values="〃"
)[0]
dt_now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
df.index = df.index.map(
lambda s: jaconv.z2h(s, kana=False, digit=True, ascii=True)
).astype(int)
df["発表日"] = (
df["発表日"]
.fillna("")
.apply(lambda s: jaconv.z2h(s, kana=False, digit=True, ascii=True))
)
df_date = df["発表日"].str.extract(r"(令和(\d{1,2})年)?(\d{1,2})月(\d{1,2})日$", expand=True)
df_date = df_date.drop(columns=0).fillna(method="ffill").astype(int)
df_date.rename(columns={1: "year", 2: "month", 3: "day"}, inplace=True)
df_date["year"] += 2018
df["発表日"] = pd.to_datetime(df_date)
df["公表日"] = df["発表日"].dt.strftime("%Y-%m-%d")
df["年代"] = df["年代"].astype(str) + "代"
df["年代"] = df["年代"].replace({"10未満代": "10歳未満", "90以上代": "90歳以上"})
df.sort_index(inplace=True)
df_patients = df.loc[:, ["公表日", "居住地", "年代", "性別"]]
data = {"patients": {"data": df_patients.to_dict(orient="records"), "date": dt_now}}
p = pathlib.Path("data", "patients.json")
p.parent.mkdir(parents=True, exist_ok=True)
with p.open(mode="w", encoding="utf-8") as fw:
json.dump(data, fw, ensure_ascii=False, indent=4)