熊本市のごみカレンダーをスクレイピング

import datetime
import time

import pandas as pd
import requests
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}

url = "https://www.city.kumamoto.jp/cal_recycle/pub/default.aspx"

areas = [
    {"smst": 1, "name": "池田・黒髪(3号線の西側)・壺川・清水・城北・高平台"},
    {"smst": 2, "name": "池上・一新・春日・慶徳・五福・城西・城東・碩台・花園"},
    {"smst": 3, "name": "小島・川尻・城山・城南・白坪・高橋・中島・古町・松尾・力合・力合西"},
    {"smst": 4, "name": "託麻北・託麻西・託麻東・託麻南・長嶺"},
    {"smst": 5, "name": "秋津・桜木・月出・桜木東・東町・山ノ内"},
    {"smst": 19, "name": "麻生田・楠・黒髪(3号線の東側)・龍田・龍田西・楡木・武蔵・弓削"},
    {"smst": 7, "name": "帯山・帯山西・託麻原・西原"},
    {"smst": 8, "name": "出水・大江・向山・白川・白山・春竹・本荘"},
    {"smst": 9, "name": "出水南・田迎・田迎西・田迎南・日吉・日吉東・御幸"},
    {"smst": 10, "name": "泉ヶ丘・画図・尾ノ上・健軍・健軍東・砂取・若葉"},
    {"smst": 11, "name": "川上(改寄・大鳥居・楠野・小糸山・明徳)・北部東(梶尾・飛田・鶴羽田)"},
    {"smst": 12, "name": "西里・川上(鹿子木・西梶尾・四方寄・飛田【八原地区】)・北部東(四方寄【東葉山団地】)"},
    {"smst": 13, "name": "河内"},
    {"smst": 14, "name": "芳野"},
    {"smst": 15, "name": "飽田東・隈庄・杉上"},
    {"smst": 16, "name": "飽田西・飽田南・富合・豊田"},
    {"smst": 17, "name": "中緑・銭塘"},
    {"smst": 18, "name": "川口・奥古閑"},
]

# 西暦

year = 2022

# スクレイピング

data = []

for area in tqdm(areas):

    for month in tqdm(range(1, 13)):

        payload = {"c_id": 14, "yy": year, "mm": month, "lmst": 1, "smst": area["smst"]}

        r = requests.get(url, headers=headers, params=payload)
        r.raise_for_status()

        soup = BeautifulSoup(r.content, "html.parser")

        table = soup.select_one("table.cal")

        for tr in table.select("tr"):

            for td in tr.select("td"):

                tag = td.select_one("span > p strong")

                if tag:

                    day = int(tag.get_text(strip=True))

                    for img in td.select("img"):

                        d = {}

                        d["id"] = area["smst"]
                        d["name"] = area["name"]

                        d["date"] = datetime.datetime(year, month, day)
                        d["kind"] = img.get("alt").replace("アイコン画像", "")

                        data.append(d)

        time.sleep(3)

    time.sleep(3)

# データラングリング

df0 = pd.DataFrame(data)

df1 = df0.groupby(["id", "name", "date"])["kind"].apply("・".join).reset_index()

df1.to_csv("kumamoto.csv", encoding="utf_8_sig", index=False)