import datetime
import re
import time
from urllib.parse import urljoin
import string
import requests
from bs4 import BeautifulSoup
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
}
city_names = {
'今治市': 'imabari',
'新居浜市': 'niihama',
'西条市': 'saijo',
'四国中央市': 'shikokuchuo',
'上島町': 'kamijima',
'松山市': 'matsuyama',
'伊予市': 'iyo',
'東温市': 'toon',
'久万高原町': 'kumakogen',
'松前町': 'masaki',
'砥部町': 'tobe',
'宇和島市': 'uwajima',
'八幡浜市': 'yawatahama',
'大洲市': 'ozu',
'西予市': 'seiyo',
'内子町': 'uchiko',
'伊方町': 'ikata',
'松野町': 'matsuno',
'鬼北町': 'kihoku',
'愛南町': 'ainan'
}
markdown = """\
---
title: $title($date)
date: $datetime
category:
- $city
tag:
- 避難所
---
$hapyou
$hinanjo
"""
def get_hinanjo_page(url):
r = requests.get(url, headers=headers)
if r.status_code == requests.codes.ok:
soup = BeautifulSoup(r.content, 'html.parser')
body = soup.select_one('#wrap > p').get_text(strip=True)
hapyou, hinanjo = [
i.strip() for i in body.split('(避難世帯数、避難人数は、自主避難を含む)', 2)
]
hapyou = re.sub(r':\s+', ':', hapyou)
res_hapyou = '\n\n'.join([i.strip() for i in hapyou.splitlines() if i])
pattern = re.compile(
r'^(.*?):(臨時)?避難所\s(.*?)\s+\((\d{4}/\d{2}/\d{2} \d{2}:\d{2})\)\s+避難世帯数:(.*?)\s+避難人数:(.*?)\s+?$',
re.M)
hinanjo_open = [i for i in pattern.findall(hinanjo) if i[2] == '開設']
hinanjo_list = []
if len(hinanjo_open):
hinanjo_list.append("|避難所名|状況|避難世帯数|避難人数|更新日|")
hinanjo_list.append("|--|--|--|--|--|")
for i in hinanjo_open:
hinanjo_list.append(
'|{0[0]}|{0[2]}|{0[4]}|{0[5]}|{0[3]}|'.format(i))
res_hinanjo = '\n'.join(hinanjo_list)
return res_hapyou, res_hinanjo
if __name__ == '__main__':
url = 'http://ehime.force.com/PUB_VF_HinanjyoList'
r = requests.get(url, headers=headers)
if r.status_code == requests.codes.ok:
soup = BeautifulSoup(r.content, 'html.parser')
dt_now = datetime.datetime.now() - datetime.timedelta(days=10)
for dl in soup.select('#shelterArea > div > dl'):
_date, _title = [
p.get_text(strip=True) for p in dl.select('a > dd > p')
]
dt_date = datetime.datetime.strptime(_date, '%Y/%m/%d %H:%M')
title = _title.split(':')[0].strip()
city = title.split()[0].strip().replace('[訂正]', '')
if dt_date > dt_now:
link = urljoin(url, dl.select_one('a').get('href'))
hapyou, hinanjo = get_hinanjo_page(link)
context = {
'title': title,
'city': city,
'date': _date,
'datetime': dt_date.strftime('%Y-%m-%d %H:%M:%S'),
'hinanjo': hinanjo,
'hapyou': hapyou,
}
template = string.Template(markdown)
md_post = template.safe_substitute(context)
filename = '{}-{}-hinanjo.md'.format(
dt_date.strftime('%Y-%m-%d-%H%M'), city_names[city])
with open(filename, 'w') as fw:
fw.write(md_post)
time.sleep(1)
else:
break