TSV保存
from selenium import webdriver from selenium.webdriver.firefox.options import Options from bs4 import BeautifulSoup import csv options = Options() options.set_headless() driver = webdriver.Firefox(options=options) driver.get( 'https://www.mizuhobank.co.jp/retail/takarakuji/loto/miniloto/index.html') # ブラウザ操作 html = driver.page_source driver.quit() soup = BeautifulSoup(html, 'html.parser') with open('minoloto.tsv', 'a') as fw: writer = csv.writer(fw, dialect='excel-tab', lineterminator='\n') for table in soup.select( '#mainCol > article > section > section > section > div > div.sp-none > table' ): result = [i.get_text(strip=True) for i in table.find_all(['th', 'td'])] print(result) temp = [result[i].strip('()') for i in [1,3,5,6,7,8,9,10]] writer.writerow(temp)
昨日はできたのに今日するとKasperskyのネット決済保護機能が働いて スクレイピングできなくてびっくりした。
取得
from selenium import webdriver from selenium.webdriver.firefox.options import Options from bs4 import BeautifulSoup import datetime import locale options = Options() options.headless = True driver = webdriver.Firefox(options=options) driver.get( 'https://www.mizuhobank.co.jp/retail/takarakuji/loto/miniloto/index.html') # ブラウザ操作 html = driver.page_source driver.quit() soup = BeautifulSoup(html, 'html.parser') for table in soup.select( '#mainCol > article > section > section > section > div > div.sp-none > table' ): result = [i.get_text(strip=True) for i in table.find_all(['th', 'td'])] # print(result) locale.setlocale(locale.LC_NUMERIC, 'Japanese_Japan.932') loto_list = { 'count': int(result[1].strip('第回')), 'date': datetime.datetime.strptime(result[3], '%Y年%m月%d日'), 'win_num': result[5:10], 'bou_num': result[10].strip('()'), 'prize_1st': { 'lot': locale.atoi(result[12].rstrip('口')), 'prize': locale.atoi(result[13].rstrip('円')) }, 'prize_2nd': { 'lot': locale.atoi(result[15].rstrip('口')), 'prize': locale.atoi(result[16].rstrip('円')) }, 'prize_3rd': { 'lot': locale.atoi(result[18].rstrip('口')), 'prize': locale.atoi(result[19].rstrip('円')) }, 'prize_4th': { 'lot': locale.atoi(result[21].rstrip('口')), 'prize': locale.atoi(result[22].rstrip('円')) }, 'sales_amount': locale.atoi(result[24].rstrip('円')) } print(loto_list['count'], loto_list['date']) print(loto_list['win_num'], loto_list['bou_num'])