また新しいお題がでていたので
ja.stackoverflow.com ja.stackoverflow.com
from urllib.request import urlopen from bs4 import BeautifulSoup import csv url = 'http://race.netkeiba.com/?pid=race&id=c201605050211&mode=shutuba' html = urlopen(url).read() soup = BeautifulSoup(html, 'html5lib') tr = soup.select('#shutuba > table > tbody > tr') # 改行ごとに分解、前後の空白文字を除去、空行なら除外し、結合 # td = [['\n'.join([z.strip() for z in x.get_text().splitlines() if z.strip()]) for x in y.find_all(['th', 'td'])] for y in tr] td = [[x.get_text("\n", True) for x in y.find_all(['th', 'td'])] for y in tr] with open('race.csv', 'wt', encoding='utf-8') as fw: writer = csv.writer(fw, lineterminator='\n') writer.writerows(td)
from urllib.request import urlopen from bs4 import BeautifulSoup import csv url = 'http://race.netkeiba.com/?pid=race&id=c201605050211&mode=shutuba' html = urlopen(url).read() soup = BeautifulSoup(html, 'html5lib') with open('name.csv', 'wt', encoding='utf-8') as fw: writer = csv.writer(fw, lineterminator='\n') for tr in soup.select('#shutuba > table > tbody > tr')[1:]: td = tr.select_one('td:nth-of-type(7)') # td = tr.select('td')[6] names = [] names.append(td.select_one('span.h_name > a').get_text().strip()) names.extend([i.strip('()') for i in td.select_one('span.txt_smaller').get_text().split()]) writer.writerow(names)