今治市の避難準備情報、避難勧告、避難指示情報と避難所情報をスクレイピング

import datetime
import re
from urllib.parse import urljoin
from urllib.request import urlopen

from bs4 import BeautifulSoup


def get_refuge(url):

    html = urlopen(url).read()
    soup = BeautifulSoup(html, 'html.parser')

    title = soup.select_one('#main_container > h1, h3').get_text(strip=True)

    date_pattern = re.compile(
        '(\d{4})年(\d{1,2})月(\d{1,2})日[  ](\d{1,2})時(\d{1,2})分')
    result = date_pattern.search(title)

    if result:
        d = map(int, result.groups())

        pubdate = datetime.datetime(*d)

    description = soup.select_one('#main_container > p').get_text(strip=True)

    return title, description, url, pubdate


def scraping(url, css_select):

    html = urlopen(url).read()
    soup = BeautifulSoup(html, 'html.parser')

    result = [
        get_refuge(urljoin(url, i.get('href')))
        for i in soup.select(css_select)
    ]

    return result


if __name__ == '__main__':

    # 避難準備情報、避難勧告、避難指示情報

    urge = scraping('http://www.city.imabari.ehime.jp/bousai/kankoku/',
                    '#main_container > p > a')
    for i in urge:
        print('\n'.join(i[:3]))
        print('-' * 30)

    # 避難所情報

    shelter = scraping('http://www.city.imabari.ehime.jp/bousai/hinanjo/',
                       '#main_container > div > p > a')
    for i in shelter:
        print('\n'.join(i[:3]))
        print('-' * 30)