Python3ならできるんだけどPython2ではCSV保存のところでエラーがでてわからない
from urllib.request import urlopen from bs4 import BeautifulSoup import csv base_url = 'http://db.netkeiba.com/?pid=jockey_detail&id=00663&page={0}' data = [] for num, i in enumerate([1, 2]): url = base_url.format(i) html = urlopen(url).read() soup = BeautifulSoup(html, 'html5lib') # ヘッダー追加 if not num: header = [x.get_text().strip() for x in soup.select('#contents_liquid > table > thead > tr > th')] data.append(header) tr = soup.select('#contents_liquid > table > tbody > tr') td = [[x.get_text().strip() for x in y.select('td')] for y in tr] data.extend(td) with open('horse.csv', 'wt') as fw: writer = csv.writer(fw, lineterminator='\n') writer.writerows(data)
ヘッダーとデータを両方取っておいて1ページのみヘッダー追加
from urllib.request import urlopen from bs4 import BeautifulSoup import csv base_url = 'http://db.netkeiba.com/?pid=jockey_detail&id=00663&page={0}' data = [] for num, i in enumerate([1, 2]): url = base_url.format(i) html = urlopen(url).read() soup = BeautifulSoup(html, 'html5lib') tr = soup.find('div', {'id':'contents_liquid'}).find_all('tr') td = [[x.get_text().strip() for x in y.find_all(['th','td'])] for y in tr] if num: data.extend(td[1:]) else: data.extend(td) with open('horse.csv', 'wt') as fw: writer = csv.writer(fw, lineterminator='\n') writer.writerows(data)
from urllib.request import urlopen from bs4 import BeautifulSoup import csv url = 'http://db.netkeiba.com/horse/1994103997/' html = urlopen(url).read() soup = BeautifulSoup(html, 'html5lib') td = [ [i.get_text().strip()] for i in soup.select('#db_main_box > div.db_main_deta > div > div.db_prof_area_02 > div > dl > dd > table > tbody > tr > td')] with open('blood.csv', 'wt') as fw: writer = csv.writer(fw, lineterminator='\n') writer.writerows(td)
from urllib.request import urlopen from bs4 import BeautifulSoup import csv url = 'http://race.netkeiba.com/?pid=race_old&id=c201604020801' html = urlopen(url).read() soup = BeautifulSoup(html, 'html5lib') tr = soup.select('#shutuba > diary_snap > table > tbody > tr') with open('race.csv', 'wt') as fw: writer = csv.writer(fw, lineterminator='\n') for y in tr[3:]: td = [x.get_text().strip() for x in y.select('td')] # あなたの印 削除 del td[2] # お気に入り馬(登録・あなたのメモ) 削除 del td[-2:] writer.writerow(td)
from urllib.request import urlopen from bs4 import BeautifulSoup import csv url = 'http://db.netkeiba.com/race/201508050411/' html = urlopen(url).read() soup = BeautifulSoup(html, 'html5lib') race = soup.select_one('#main > div > div > div > diary_snap > div > div > dl > dd > h1').get_text().strip() span = soup.select_one( '#main > div > div > div > diary_snap > div > div > dl > dd > p > diary_snap_cut > span').get_text().strip() data = [[j.strip() for j in i.split(':', 1)] for i in span.split('/')] with open('race.csv', 'wt') as fw: writer = csv.writer(fw, lineterminator='\n') writer.writerow([race, data[0][0], data[1][1], data[2][1], data[3][1]])