import pandas as pd
import requests
from bs4 import BeautifulSoup
date = "201909"
url = "https://weather.goo.ne.jp/past/887/{date}00/"
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}
r = requests.get(url, headers=headers)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html5lib")
hiduke = []
kion_max = []
kion_min = []
tenki_09 = []
tenki_12 = []
tenki_15 = []
for trs in soup.select("table tr th.day"):
hiduke += [td.get_text(strip=True) for td in trs.find_next_siblings("td")]
tr = trs.parent.find_next_siblings("tr", limit=5)
kion_max += [
td.select_one("span.red").get_text(strip=True) for td in tr[0].find_all("td")
]
kion_min += [
td.select_one("span.blue").get_text(strip=True) for td in tr[0].find_all("td")
]
tenki_09 += [
td.text if td.text == "-" else td.img.get("alt") for td in tr[1].find_all("td")
]
tenki_12 += [
td.text if td.text == "-" else td.img.get("alt") for td in tr[2].find_all("td")
]
tenki_15 += [
td.text if td.text == "-" else td.img.get("alt") for td in tr[3].find_all("td")
]
tenki_dict = {
"日": hiduke,
"最高気温": kion_max,
"最低気温": kion_min,
"9時": tenki_09,
"12時": tenki_09,
"12時": tenki_09,
"15時": tenki_15,
}
print(tenki_dict)
df = pd.DataFrame(tenki_dict)
df.mask(df == "-", inplace=True)
df.dropna(axis = 0, how = 'all').set_index('日')
print(df)