玉川ダムの貯水率をスクレイピング

2021/04/18現在利用できません

from urllib.request import urlopen
from bs4 import BeautifulSoup
import datetime
import csv

# GRP = USR004:玉川ダム、USR005:台ダム

grp = 'USR004'

# KTM = 1:1時間毎、2:30分毎、3:10分毎

ktm = 1

# 現在の時刻の8分前を取得
now = datetime.datetime.now() - datetime.timedelta(minutes=8)

# 時間を60分・30分・10分単位に補正する

n = 60

if ktm == 2:
    n = 30
elif ktm == 3:
    n = 10

cut_time = now - datetime.timedelta(minutes=(now.minute % n))

url = 'http://183.176.244.72/cgi/170_USER_010_01.cgi?GID=170_USER_010&UI=U777&SI=00000&MNU=1&LO=88&BTY=IE6X&NDT=1&SK=0000000&DT={0}&GRP={1}&TPG=1&PG=1&KTM={2}'.format(
    cut_time.strftime('%Y%m%d%H%M'), grp, ktm)

# URL確認
print(url)

html = urlopen(url).read()

soup = BeautifulSoup(html, 'html5lib')

result = []
day = ''

for trs in soup.select('body > table:nth-of-type(7) > tbody > tr'):

    # 列 => セル => セル内の列 => セル内のセル の順に取得

    dam = [[[i.get_text(strip=True) for i in tr.select('td')]
            for tr in tds.select('tr')]
           for tds in trs.select('td > table > tbody')]

    # 行・列入替
    temp = list(map(list, zip(*dam)))

    for j in temp:

        # Flatten
        data = sum(j, [])

        # 日付なし補完

        if data[0]:
            day = data[0]
        else:
            data[0] = day

        result.append(data)

with open('result.tsv', 'w') as fw:
    writer = csv.writer(fw, dialect='excel-tab', lineterminator='\n')
    writer.writerows(result)