読者です 読者をやめる 読者になる 読者になる

Pythonでスクレイピング3(日付の変換)

今治市役所お知らせの取得(日付の変換)

import datetime
from bs4 import BeautifulSoup
from urllib.request import urlopen
from urllib.parse import urljoin

def date_conv(hiduke):
    return str(datetime.date(hiduke.year, hiduke.month, hiduke.day))

url = "http://www.city.imabari.ehime.jp/"
html = urlopen(url).read()
soup = BeautifulSoup(html, from_encoding='cp932')

for osirase in soup.find_all("div",{"id":"osirse"}):

    hiduke = [date_conv(datetime.datetime.strptime(i.string, '%Y年%m月%d日')) for i in osirase.find_all("dt")]
    naiyou = [[[y.get_text().strip(),urljoin('http://www.islands.ne.jp/imabari/',y.get('href'))] for y in x.find_all("a")] for x in osirase.find_all("dd")]

osirase_list = list(zip(hiduke,naiyou))

kiji = [{"date":x[0], "description":y[0], "link":y[1]} for x in osirase_list for y in x[1]]

for i in kiji:
    print(i["date"])
    print(i["description"])
    print(i["link"])
    print("-"*20)