愛媛県のインフルエンザ患者報告数をスクレイピング

colspanがめんどくさいので直取り

import requests
from bs4 import BeautifulSoup

headers = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
}


def get_table(url):

    r = requests.get(url, headers=headers)

    if r.status_code == requests.codes.ok:

        soup = BeautifulSoup(r.content, 'html5lib')

        tables = soup.find_all('table', class_='datatable')

        return tables


def scraping(tables, num, area):

    # 今週
    this_week = float(tables[4].select_one(
        'tr:nth-of-type(3) > td:nth-of-type({})'.format(num + 1)).get_text(
            strip=True))

    # 前週
    last_week = float(tables[4].select_one(
        'tr:nth-of-type(5) > td:nth-of-type({})'.format(num)).get_text(
            strip=True))

    # 警報・注意報
    alarm = tables[4].select_one(
        'tr:nth-of-type(4) > td:nth-of-type({})'.format(num - 1)).get_text(
            strip=True)

    # 前週差
    comp_week = this_week - last_week

    if comp_week > 0:

        sign = '↑'
    elif comp_week < 0:
        sign = '↓'
    else:
        sign = ''

    # インフルエンザ患者報告数

    this_count = tables[0].select_one(
        'tr:nth-of-type(2) > td:nth-of-type({})'.format(num)).get_text(
            strip=True)

    # 迅速検査結果

    temp = []

    inful_a = tables[0].select_one(
        'tr:nth-of-type(4) > td:nth-of-type({})'.format(num + 1)).get_text(
            strip=True)
    inful_b = tables[0].select_one(
        'tr:nth-of-type(5) > td:nth-of-type({})'.format(num)).get_text(
            strip=True)
    inful_n = tables[0].select_one(
        'tr:nth-of-type(6) > td:nth-of-type({})'.format(num)).get_text(
            strip=True)

    if inful_a:
        temp.append('A型:{}人'.format(inful_a))

    if inful_b:
        temp.append('B型:{}人'.format(inful_b))

    if inful_n:
        temp.append('不明:{}人'.format(inful_n))

    inful_kata = '、'.join(temp)

    result = '({0}){1}\n定点当たり:{2:.1f}人{3}(前週比:{4:+.1f}人)\n患者報告数:{5}人({6})'.format(
        area, alarm, this_week, sign, comp_week, this_count, inful_kata)

    return result


# 02:愛媛県
# 03:四国中央
# 04:西条
# 05:今治
# 06:松山市
# 07:中予
# 08:八幡浜
# 09:宇和島

if __name__ == '__main__':

    tables = get_table(
        'https://www.pref.ehime.jp/h25115/kanjyo/topics/influ1819/tb_flu1819.html'
    )

    tables.extend(
        get_table(
            'https://www.pref.ehime.jp/h25115/kanjyo/topics/influ1819/index1819.html'
        ))

    # print(len(tables))

    if len(tables) == 5:

        temp = []

        temp.append('インフルエンザ患者報告数')

        temp.append(scraping(tables, 2, '愛媛県'))

        temp.append(scraping(tables, 5, '今治'))

        temp.append('https://www.pref.ehime.jp/h25115/kanjyo/index.html')

        result = '\n\n'.join(temp)

        print(result)

速報

import requests
from bs4 import BeautifulSoup

url = 'https://www.pref.ehime.jp/h25115/kanjyo/index.html'

headers = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
}


def scraping(tables, num, area):

    # 患者数(前週・今週)比較

    # 今週
    this_week = float(tables[1].select_one(
        'tr:nth-of-type(3) > td:nth-of-type({})'.format(num + 1)).get_text(
            strip=True))

    # 前週
    last_week = float(tables[1].select_one(
        'tr:nth-of-type(5) > td:nth-of-type({})'.format(num)).get_text(
            strip=True))

    # print(this_week, last_week)

    # 差
    comp_week = this_week - last_week

    if this_week > last_week:

        sign = '↑'
    elif this_week < last_week:
        sign = '↓'
    else:
        sign = ''

    # インフルエンザ患者報告数

    data = [
        i.get_text(strip=True)
        for i in tables[0].select('tr > td:nth-of-type({})'.format(num))
    ]

    temp = []

    if data[3]:
        temp.append('A型:{}人'.format(data[3]))

    if data[4]:
        temp.append('B型:{}人'.format(data[4]))

    if data[5]:
        temp.append('不明:{}人'.format(data[5]))

    inful_kata = '、'.join(temp)

    result = '({0}){1[1]}\n定点当たり:{1[0]}人{4}(前週比:{3:+.1f}人)\n患者報告数:{1[2]}人({2})'.format(
        area, data, inful_kata, comp_week, sign)

    return result


# 02:愛媛県
# 03:四国中央
# 04:西条
# 05:今治
# 06:松山市
# 07:中予
# 08:八幡浜
# 09:宇和島

r = requests.get(url, headers=headers)

if r.status_code == requests.codes.ok:

    soup = BeautifulSoup(r.content, 'html5lib')

    tables = soup.select('table.datatable')

    temp = []

    temp.append('インフルエンザ患者報告数')

    temp.append(scraping(tables, 2, '愛媛県'))

    temp.append(scraping(tables, 5, '今治'))

    temp.append('https://www.pref.ehime.jp/h25115/kanjyo/index.html')

    result = '\n\n'.join(temp)

    print(result)