今治市の過去の天気をスクレイピング

import pandas as pd
import requests
from bs4 import BeautifulSoup

# 年月
date = "201909"

url = "https://weather.goo.ne.jp/past/887/{date}00/"

headers = {
    "User-Agent":
    "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}

r = requests.get(url, headers=headers)

r.raise_for_status()

soup = BeautifulSoup(r.content, "html5lib")

hiduke = []
kion_max = []
kion_min = []

tenki_09 = []
tenki_12 = []
tenki_15 = []

for trs in soup.select("table tr th.day"):

    # 日付
    hiduke += [td.get_text(strip=True) for td in trs.find_next_siblings("td")]

    tr = trs.parent.find_next_siblings("tr", limit=5)

    # 最高気温
    kion_max += [
        td.select_one("span.red").get_text(strip=True) for td in tr[0].find_all("td")
    ]

    # 最低気温
    kion_min += [
        td.select_one("span.blue").get_text(strip=True) for td in tr[0].find_all("td")
    ]

    # 9時
    tenki_09 += [
        td.text if td.text == "-" else td.img.get("alt") for td in tr[1].find_all("td")
    ]

    # 12時
    tenki_12 += [
        td.text if td.text == "-" else td.img.get("alt") for td in tr[2].find_all("td")
    ]

    # 15時
    tenki_15 += [
        td.text if td.text == "-" else td.img.get("alt") for td in tr[3].find_all("td")
    ]

tenki_dict = {
    "日": hiduke,
    "最高気温": kion_max,
    "最低気温": kion_min,
    "9時": tenki_09,
    "12時": tenki_09,
    "12時": tenki_09,
    "15時": tenki_15,
}

print(tenki_dict)

# DataFrameに変換
df = pd.DataFrame(tenki_dict)

# 欠損値に置換
df.mask(df == "-", inplace=True)

# 欠損値の行を削除
df.dropna(axis = 0, how = 'all').set_index('日')

print(df)