import datetime
import re
import jaconv
import requests
from bs4 import BeautifulSoup
from feedgen.feed import FeedGenerator
url = "https://www.police.pref.ehime.jp/sokuho/sokuho.htm"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}
JST = datetime.timezone(datetime.timedelta(hours=+9))
dt_now = datetime.datetime.now(JST)
fg = FeedGenerator()
fg.title("事件事故速報")
fg.link(href=url)
fg.subtitle("愛媛県警")
fg.language("ja")
fg.updated(dt_now)
r = requests.get(url, headers=headers)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html.parser")
trs = soup.select(
"div#hpb-container div#hpb-inner div#toppage div.hpb-section table tr"
)
for tr in trs:
data = [jaconv.normalize(i, "NFKC") for i in tr.stripped_strings if i != "■"]
if len(data) > 1:
m = re.search("\(([0-9]{1,2})月([0-9]{1,2})日 .+\)$", data[0])
if m:
m, d = map(int, m.groups())
pubDate = datetime.datetime(dt_now.year, m, d).replace(tzinfo=JST)
fe = fg.add_entry()
fe.title(data[0])
fe.description("\n".join(data[1:]))
fe.published(pubDate)
fg.rss_file("rss.xml")
print(fg.rss_str(pretty=True).decode())