import datetime
import time
import pandas as pd
import requests
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}
url = "https://www.city.kumamoto.jp/cal_recycle/pub/default.aspx"
areas = [
{"smst": 1, "name": "池田・黒髪(3号線の西側)・壺川・清水・城北・高平台"},
{"smst": 2, "name": "池上・一新・春日・慶徳・五福・城西・城東・碩台・花園"},
{"smst": 3, "name": "小島・川尻・城山・城南・白坪・高橋・中島・古町・松尾・力合・力合西"},
{"smst": 4, "name": "託麻北・託麻西・託麻東・託麻南・長嶺"},
{"smst": 5, "name": "秋津・桜木・月出・桜木東・東町・山ノ内"},
{"smst": 19, "name": "麻生田・楠・黒髪(3号線の東側)・龍田・龍田西・楡木・武蔵・弓削"},
{"smst": 7, "name": "帯山・帯山西・託麻原・西原"},
{"smst": 8, "name": "出水・大江・向山・白川・白山・春竹・本荘"},
{"smst": 9, "name": "出水南・田迎・田迎西・田迎南・日吉・日吉東・御幸"},
{"smst": 10, "name": "泉ヶ丘・画図・尾ノ上・健軍・健軍東・砂取・若葉"},
{"smst": 11, "name": "川上(改寄・大鳥居・楠野・小糸山・明徳)・北部東(梶尾・飛田・鶴羽田)"},
{"smst": 12, "name": "西里・川上(鹿子木・西梶尾・四方寄・飛田【八原地区】)・北部東(四方寄【東葉山団地】)"},
{"smst": 13, "name": "河内"},
{"smst": 14, "name": "芳野"},
{"smst": 15, "name": "飽田東・隈庄・杉上"},
{"smst": 16, "name": "飽田西・飽田南・富合・豊田"},
{"smst": 17, "name": "中緑・銭塘"},
{"smst": 18, "name": "川口・奥古閑"},
]
year = 2022
data = []
for area in tqdm(areas):
for month in tqdm(range(1, 13)):
payload = {"c_id": 14, "yy": year, "mm": month, "lmst": 1, "smst": area["smst"]}
r = requests.get(url, headers=headers, params=payload)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html.parser")
table = soup.select_one("table.cal")
for tr in table.select("tr"):
for td in tr.select("td"):
tag = td.select_one("span > p strong")
if tag:
day = int(tag.get_text(strip=True))
for img in td.select("img"):
d = {}
d["id"] = area["smst"]
d["name"] = area["name"]
d["date"] = datetime.datetime(year, month, day)
d["kind"] = img.get("alt").replace("アイコン画像", "")
data.append(d)
time.sleep(3)
time.sleep(3)
df0 = pd.DataFrame(data)
df1 = df0.groupby(["id", "name", "date"])["kind"].apply("・".join).reset_index()
df1.to_csv("kumamoto.csv", encoding="utf_8_sig", index=False)