愛媛県警の事件事故速報をRSS化

import datetime
import re

import jaconv
import requests
from bs4 import BeautifulSoup

from feedgen.feed import FeedGenerator

url = "https://www.police.pref.ehime.jp/sokuho/sokuho.htm"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}

# 日付

JST = datetime.timezone(datetime.timedelta(hours=+9))
dt_now = datetime.datetime.now(JST)

# RSS設定

fg = FeedGenerator()

fg.title("事件事故速報")
fg.link(href=url)
fg.subtitle("愛媛県警")
fg.language("ja")
fg.updated(dt_now)

# スクレイピング

r = requests.get(url, headers=headers)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html.parser")

trs = soup.select(
    "div#hpb-container div#hpb-inner div#toppage div.hpb-section table tr"
)

for tr in trs:

    data = [jaconv.normalize(i, "NFKC") for i in tr.stripped_strings if i != "■"]

    if len(data) > 1:

        m = re.search("\(([0-9]{1,2})月([0-9]{1,2})日 .+\)$", data[0])

        if m:
            m, d = map(int, m.groups())
            pubDate = datetime.datetime(dt_now.year, m, d).replace(tzinfo=JST)

            fe = fg.add_entry()

            fe.title(data[0])
            fe.description("\n".join(data[1:]))
            fe.published(pubDate)

fg.rss_file("rss.xml")

print(fg.rss_str(pretty=True).decode())