import io
import csv
import requests
import time
import datetime
from bs4 import BeautifulSoup
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
}
data = """hisaichi,dairi,url
愛媛県,,https://www.furusato-tax.jp/saigai/detail/485
愛媛県,熊本県,https://www.furusato-tax.jp/saigai/detail/495
愛媛県,茨城県,https://www.furusato-tax.jp/saigai/detail/323
愛媛県,静岡県小山町,https://www.furusato-tax.jp/saigai/detail/460
愛媛県松山市,,https://www.furusato-tax.jp/saigai/detail/501
愛媛県今治市,,https://www.furusato-tax.jp/saigai/detail/456
愛媛県松野町,,https://www.furusato-tax.jp/saigai/detail/430
愛媛県鬼北町,,https://www.furusato-tax.jp/saigai/detail/442
愛媛県大洲市,,https://www.furusato-tax.jp/saigai/detail/407
愛媛県大洲市,愛媛県内子町,https://www.furusato-tax.jp/saigai/detail/420
愛媛県大洲市,山形県中山町,https://www.furusato-tax.jp/saigai/detail/486
愛媛県大洲市,滋賀県高島市,https://www.furusato-tax.jp/saigai/detail/455
愛媛県大洲市,北海道えりも町,https://www.furusato-tax.jp/saigai/detail/497
愛媛県宇和島市,,https://www.furusato-tax.jp/saigai/detail/400
愛媛県宇和島市,山形県三川町,https://www.furusato-tax.jp/saigai/detail/512
愛媛県西予市,,https://www.furusato-tax.jp/saigai/detail/401
愛媛県西予市,高知県室戸市,https://www.furusato-tax.jp/saigai/detail/454
"""
dt_now = datetime.datetime.now()
fr = io.StringIO(data)
reader = csv.DictReader(fr, dialect='excel', lineterminator='\n')
filename = 'furusato' + dt_now.strftime('%Y%m%d%H%M') + '.tsv'
with open(filename, 'w') as fw:
writer = csv.writer(fw, dialect='excel', lineterminator='\n')
writer.writerow(['日付', '被災自治体', '代理', '金額', '件数'])
for row in reader:
r = requests.get(row['url'], headers=headers)
if r.status_code == requests.codes.ok:
soup = BeautifulSoup(r.content, 'html5lib')
hisaichi = soup.select_one(
'#projectOverview > div.project_localname > h2.sup_list > a'
).get_text(strip=True)
if row['dairi']:
dairinin = soup.select_one(
'#projectOverview > div.project_localname > dl.sup_reception > dd > a'
).get_text(strip=True)
else:
dairinin = ''
kingaku, kensu = [
i.get_text(strip=True).rstrip('円件').replace(',', '')
for i in soup.select(
'#projectOverview > div.project_about > p.sup_moneyTotal')
]
if hisaichi == row['hisaichi'] and dairinin == row['dairi']:
writer.writerow([
dt_now.strftime('%Y/%m/%d %H:%M'), hisaichi, dairinin,
kingaku, kensu
])
else:
print('error', hisaichi, row['hisaichi'], dairinin,
row['dairi'])
time.sleep(1)