colspanがめんどくさいので直取り
import requests from bs4 import BeautifulSoup headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko' } def get_table(url): r = requests.get(url, headers=headers) if r.status_code == requests.codes.ok: soup = BeautifulSoup(r.content, 'html5lib') tables = soup.find_all('table', class_='datatable') return tables def scraping(tables, num, area): # 今週 this_week = float(tables[4].select_one( 'tr:nth-of-type(3) > td:nth-of-type({})'.format(num + 1)).get_text( strip=True)) # 前週 last_week = float(tables[4].select_one( 'tr:nth-of-type(5) > td:nth-of-type({})'.format(num)).get_text( strip=True)) # 警報・注意報 alarm = tables[4].select_one( 'tr:nth-of-type(4) > td:nth-of-type({})'.format(num - 1)).get_text( strip=True) # 前週差 comp_week = this_week - last_week if comp_week > 0: sign = '↑' elif comp_week < 0: sign = '↓' else: sign = '' # インフルエンザ患者報告数 this_count = tables[0].select_one( 'tr:nth-of-type(2) > td:nth-of-type({})'.format(num)).get_text( strip=True) # 迅速検査結果 temp = [] inful_a = tables[0].select_one( 'tr:nth-of-type(4) > td:nth-of-type({})'.format(num + 1)).get_text( strip=True) inful_b = tables[0].select_one( 'tr:nth-of-type(5) > td:nth-of-type({})'.format(num)).get_text( strip=True) inful_n = tables[0].select_one( 'tr:nth-of-type(6) > td:nth-of-type({})'.format(num)).get_text( strip=True) if inful_a: temp.append('A型:{}人'.format(inful_a)) if inful_b: temp.append('B型:{}人'.format(inful_b)) if inful_n: temp.append('不明:{}人'.format(inful_n)) inful_kata = '、'.join(temp) result = '({0}){1}\n定点当たり:{2:.1f}人{3}(前週比:{4:+.1f}人)\n患者報告数:{5}人({6})'.format( area, alarm, this_week, sign, comp_week, this_count, inful_kata) return result # 02:愛媛県 # 03:四国中央 # 04:西条 # 05:今治 # 06:松山市 # 07:中予 # 08:八幡浜 # 09:宇和島 if __name__ == '__main__': tables = get_table( 'https://www.pref.ehime.jp/h25115/kanjyo/topics/influ1819/tb_flu1819.html' ) tables.extend( get_table( 'https://www.pref.ehime.jp/h25115/kanjyo/topics/influ1819/index1819.html' )) # print(len(tables)) if len(tables) == 5: temp = [] temp.append('インフルエンザ患者報告数') temp.append(scraping(tables, 2, '愛媛県')) temp.append(scraping(tables, 5, '今治')) temp.append('https://www.pref.ehime.jp/h25115/kanjyo/index.html') result = '\n\n'.join(temp) print(result)
速報
import requests from bs4 import BeautifulSoup url = 'https://www.pref.ehime.jp/h25115/kanjyo/index.html' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko' } def scraping(tables, num, area): # 患者数(前週・今週)比較 # 今週 this_week = float(tables[1].select_one( 'tr:nth-of-type(3) > td:nth-of-type({})'.format(num + 1)).get_text( strip=True)) # 前週 last_week = float(tables[1].select_one( 'tr:nth-of-type(5) > td:nth-of-type({})'.format(num)).get_text( strip=True)) # print(this_week, last_week) # 差 comp_week = this_week - last_week if this_week > last_week: sign = '↑' elif this_week < last_week: sign = '↓' else: sign = '' # インフルエンザ患者報告数 data = [ i.get_text(strip=True) for i in tables[0].select('tr > td:nth-of-type({})'.format(num)) ] temp = [] if data[3]: temp.append('A型:{}人'.format(data[3])) if data[4]: temp.append('B型:{}人'.format(data[4])) if data[5]: temp.append('不明:{}人'.format(data[5])) inful_kata = '、'.join(temp) result = '({0}){1[1]}\n定点当たり:{1[0]}人{4}(前週比:{3:+.1f}人)\n患者報告数:{1[2]}人({2})'.format( area, data, inful_kata, comp_week, sign) return result # 02:愛媛県 # 03:四国中央 # 04:西条 # 05:今治 # 06:松山市 # 07:中予 # 08:八幡浜 # 09:宇和島 r = requests.get(url, headers=headers) if r.status_code == requests.codes.ok: soup = BeautifulSoup(r.content, 'html5lib') tables = soup.select('table.datatable') temp = [] temp.append('インフルエンザ患者報告数') temp.append(scraping(tables, 2, '愛媛県')) temp.append(scraping(tables, 5, '今治')) temp.append('https://www.pref.ehime.jp/h25115/kanjyo/index.html') result = '\n\n'.join(temp) print(result)