また新しいお題がでていたので
ja.stackoverflow.com
ja.stackoverflow.com
from urllib.request import urlopen
from bs4 import BeautifulSoup
import csv
url = 'http://race.netkeiba.com/?pid=race&id=c201605050211&mode=shutuba'
html = urlopen(url).read()
soup = BeautifulSoup(html, 'html5lib')
tr = soup.select('#shutuba > table > tbody > tr')
td = [[x.get_text("\n", True) for x in y.find_all(['th', 'td'])] for y in tr]
with open('race.csv', 'wt', encoding='utf-8') as fw:
writer = csv.writer(fw, lineterminator='\n')
writer.writerows(td)
ja.stackoverflow.com
from urllib.request import urlopen
from bs4 import BeautifulSoup
import csv
url = 'http://race.netkeiba.com/?pid=race&id=c201605050211&mode=shutuba'
html = urlopen(url).read()
soup = BeautifulSoup(html, 'html5lib')
with open('name.csv', 'wt', encoding='utf-8') as fw:
writer = csv.writer(fw, lineterminator='\n')
for tr in soup.select('#shutuba > table > tbody > tr')[1:]:
td = tr.select_one('td:nth-of-type(7)')
names = []
names.append(td.select_one('span.h_name > a').get_text().strip())
names.extend([i.strip('()') for i in td.select_one('span.txt_smaller').get_text().split()])
writer.writerow(names)
imabari.hateblo.jp