富山県ステータス

import re
import datetime
import pathlib

import pandas as pd
import requests
from bs4 import BeautifulSoup

import jaconv


def zen2han(s):
    result = float(jaconv.z2h(s.rstrip("人"), digit=True, ascii=True))
    return result


def str2date(s):
    y = dt_now.year
    m, d = map(int, re.findall("[0-9]{1,2}", s))

    return pd.Timestamp(y, m, d)


JST = datetime.timezone(datetime.timedelta(hours=+9), "JST")
dt_now = datetime.datetime.now(JST)

url = "http://www.pref.toyama.jp/cms_sec/1205/kj00022038.html"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
}

r = requests.get(url, headers=headers)

r.raise_for_status()

soup = BeautifulSoup(r.content, "html.parser")

link = soup.find("div", id="file").find("a", text="強化・緩和の判断指標(直近1週間平均)の推移").get("href")

link

df = pd.read_excel(link, index_col=[0, 1, 2], skiprows=2, skipfooter=3).T

df.index = df.index.map(str2date)
df.index.name = "日付"
df.columns = ["入院者数", "重症病床稼働率", "新規陽性者数", "感染経路不明の患者数", "陽性率", "達成状況"]

df["入院者数"] = df["入院者数"].apply(zen2han)
df["新規陽性者数"] = df["新規陽性者数"].apply(zen2han)
df["感染経路不明の患者数"] = df["感染経路不明の患者数"].apply(zen2han)

df["重症病床稼働率"] = df["重症病床稼働率"].apply(lambda x: round(x * 100, 1))
df["陽性率"] = df["陽性率"].apply(lambda x: round(x * 100, 1))

# df.to_csv("toyama_status.csv", encoding="utf_8_sig")

df_ori = pd.read_csv("toyama_status.csv", index_col="日付", parse_dates=True)

df_csv = df_ori.reindex(df_ori.index.union(df.index))

df_csv.update(df)

df_csv.sort_index(inplace=True)

df_csv.to_csv("toyama_status.csv", encoding="utf_8_sig")

import json

df_csv["日付"] = df_csv.index.strftime("%Y-%m-%d")

data = {
    "statusItems": df_csv.to_dict(orient="records"),
}

p = pathlib.Path("status.json")
p.parent.mkdir(parents=True, exist_ok=True)

with p.open(mode="w", encoding="utf-8") as fw:
    json.dump(data, fw, ensure_ascii=False, indent=4)

df_csv