データのスクレイピング
import csv
from urllib.request import urlopen
from bs4 import BeautifulSoup
url = 'http://www.akb48.co.jp/sousenkyo_45th/result.php'
html = urlopen(url).read()
soup = BeautifulSoup(html, 'html.parser')
with open('akb48.csv', 'w') as fw:
writer = csv.writer(fw, dialect='excel', lineterminator='\n')
writer.writerow(['rank', 'akb_names', 'akb_count'])
for i in soup.select('#main_area > div.frameFix > div > ul > li'):
rank = int(
i.select_one('p.result_rank').get_text(strip=True).strip('第位'))
count = int(
i.select_one('p.result_count').get_text(
strip=True).rstrip('票').replace(',', ''))
name = i.select_one('h4.result_name').get_text(strip=True)
writer.writerow([rank, name, count])
2017年総選挙データ(東京)のスクレイピング
from urllib.request import urlopen
from bs4 import BeautifulSoup
import csv
url = 'http://www.asahi.com/senkyo/senkyo2017/kaihyo/A13.html'
html = urlopen(url).read()
soup = BeautifulSoup(html, 'html.parser')
with open('hr2017_tokyo.csv', 'w') as fw:
writer = csv.writer(fw, dialect='excel', lineterminator='\n')
writer.writerow(
['num', 'name', 'age', 'count', 'party', 'status', 'previous'])
for num, i in enumerate(
soup.select('div.areabox > table > tbody > tr'), start=1):
sei = i.select_one('td.namae > div > span.sei').get_text(strip=True)
mei = i.select_one('td.namae > div > span.mei').get_text(strip=True)
age = int(
i.select_one('td.namae > div > span.age').get_text(
strip=True).strip('()'))
count = int(
i.select_one('td.num > div').contents[0].strip().replace(',', ''))
party = i.select_one('td.party > div').get_text(strip=True)
status = i.select_one('td.status > div').get_text(strip=True)
previous = int(
i.select_one('td.tosenkaisu > div').get_text(
strip=True).rstrip('回'))
writer.writerow(
[num, sei + ' ' + mei, age, count, party, status, previous])