北海道オープンデータポータル www.harp.lg.jp
と同じ書式のCSVを作成
CSVからdata.jsonへ変換
スクレイピング
import requests from bs4 import BeautifulSoup import jaconv import datetime import csv def daterange(_start, _end): for n in range((_end - _start).days): yield _start + datetime.timedelta(n) url = "https://www.pref.mie.lg.jp/YAKUMUS/HP/m0068000071_00005.htm" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", } r = requests.get(url, headers=headers) r.raise_for_status() soup = BeautifulSoup(r.content, "html5lib") # スクレイピング table = [] for tr in soup.find("table").find_all("tr"): if "月" in tr.td.text: tds = [ jaconv.z2h(td.get_text(strip=True).rstrip("日件"), digit=True) for td in tr.find_all("td") ] m, d = [int(td.strip()) for td in tds[0].split("月")] dt = datetime.datetime(2020, m, d, 18, 0) tmp = list(map(int, tds[1:])) table.append([dt] + tmp) table # データ初期化 result = {} for i in daterange(table[0][0], table[-1][0]): result[i.isoformat(timespec="minutes")] = [0, 0, 0] result # 上書き for k, *v in table: result[k.isoformat(timespec="minutes")] = v result # ソート data = sorted(result.items(), key=lambda x: x[0])
# 検査数 data_inspections = [{"日付": k, "日検査数": v[0]} for k, v in data] with open("inspections.csv", "w", newline="", encoding="shift_jis") as fw: fieldnames = list(data_inspections[0].keys()) writer = csv.DictWriter(fw, fieldnames=fieldnames) writer.writeheader() writer.writerows(data_inspections)
# 陽性患者数 data_patients = [{"日付": k, "日陽性数": v[1]} for k, v in data] with open("patients_summary.csv", "w", newline="", encoding="shift_jis") as fw: fieldnames = list(data_patients[0].keys()) writer = csv.DictWriter(fw, fieldnames=fieldnames) writer.writeheader() writer.writerows(data_patients)