import datetime
import json
import re
from urllib.parse import urljoin
import pandas as pd
import requests
from bs4 import BeautifulSoup
url = "https://web.pref.hyogo.lg.jp/kk03/corona_kanjyajyokyo.html"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}
days = ["月", "火", "水", "木", "金", "土", "日"]
r = requests.get(url, headers=headers)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html.parser")
tag = soup.select_one("a.icon_excel")
s = tag.get_text(strip=True)
link = urljoin(url, tag.get("href"))
ms = re.match(r"新型コロナウイルスに感染した患者の状況[((](\d+)月(\d+)日\s(\d+)時現在[))]", s)
m, d, h = map(int, ms.groups())
last_update = datetime.datetime(datetime.datetime.now().year, m, d, 0, 0) + datetime.timedelta(hours=h)
my_parser = lambda date: pd.to_datetime(
date, unit="D", origin=pd.Timestamp("1899/12/30")
)
df = pd.read_excel("data.xlsx", skiprows=3, skipfooter=2, parse_dates=[2], date_parser=my_parser)
df
df.dropna(how='all', axis=1, inplace=True)
df["リリース日"] = df["確認日"].dt.strftime("%Y-%m-%dT%H:%M:%S+09:00")
df["date"] = df["確認日"].dt.strftime("%Y-%m-%d")
df["年代"] = df["年代"].astype(str) + "代"
df["week"] = df["確認日"].dt.dayofweek
df["曜日"] = df["week"].apply(lambda x: days[x])
df.rename(columns={"番号": "No", "備考欄": "備考"}, inplace=True)
df.set_index("No", inplace=True)
df.sort_index(inplace=True)
df1 = df.loc[:, ["リリース日", "曜日", "居住地", "年代", "性別", "備考", "date"]].copy()
df1
patients = {
"data": df1.to_dict(orient="records"),
"last_update": last_update.strftime("%Y/%m/%d %H:%M"),
}
patients
with open("patients.json", "w") as fw:
json.dump(patients, fw, ensure_ascii=False, indent=4)
import requests
from bs4 import BeautifulSoup
url = "https://web.pref.hyogo.lg.jp/kk03/corona_hasseijyokyo.html"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
}
r = requests.get(url, headers=headers)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html.parser")
tbl = soup.find("table", class_="datatable")
tbl.caption.get_text(strip=True)
result = []
for tr in tbl.tbody.find_all("tr"):
tds = [td.get_text(strip=True) for td in tr.find_all("td")]
result.append(tds)