2021/04/18現在利用できません
import re import requests from bs4 import BeautifulSoup headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko" } def scraping(url, pattern): r = requests.get(url, headers=headers) if r.status_code == 200: soup = BeautifulSoup(r.content, "html5lib") contents = soup.find("a", {"name": "contents"}) # 水平線で分割 html = contents.prettify().split("<hr/>") # 修正済みHTMLを読み込み text = BeautifulSoup(html[-1], "html5lib").get_text("\n", strip=True) # サブタイトル・単位を除去 data = re.sub(pattern, "", text) # print(data) lines = [i.split() for i in data.splitlines()] res = [] for line in lines: try: num = float(line[1]) except: continue else: res.append([num] + line) if len(res) > 0: # ベース取り出し base = res.pop(0) for i in res: if base == i: continue # アロー作成 else: if base[0] > i[0]: arrow = "↑" elif base[0] < i[0]: arrow = "↓" else: arrow = "→" return base[2], arrow # 前のデータなし else: return base[2], "-" # データなし else: return "欠損", "-" if __name__ == "__main__": # ---------------------------------------- # ダム # ---------------------------------------- print("玉川ダム") # 流入量 dam_in, arrow = scraping( "http://i.river.go.jp/_-p01-_/p/ktm1801030/?mtm=10&swd=&prf=3801&twn=3801202&rvr=&den=0972900700006", r"■\d{1,2}時間履歴\n単位:m3/s\n増減\n") print("流入量", dam_in, arrow) # 放流量 dam_out, arrow = scraping( "http://i.river.go.jp/_-p01-_/p/ktm1801040/?mtm=10&swd=&prf=3801&twn=3801202&rvr=&den=0972900700006", r"■\d{1,2}時間履歴\n単位:m3/s\n増減\n") print("放流量", dam_out, arrow) # 貯水量 dam_vol, arrow = scraping( "http://i.river.go.jp/_-p01-_/p/ktm1801060/?mtm=10&swd=&prf=3801&twn=3801202&rvr=&den=0972900700006", r"■\d{1,2}時間履歴\n単位:千m3\n") print("貯水量", dam_vol, arrow) # ---------------------------------------- # 河川 観測地 # ---------------------------------------- print("\n蒼社川") # 中通 nakadori, arrow = scraping( "http://i.river.go.jp/_-p01-_/p/ktm1201020/?mtm=10&swd=&prf=3801&twn=3801202&rvr=&den=0972900400024", r"■\d{1,2}時間履歴\n単位:m\n増減\n") print("中通", nakadori, arrow) # 高野 takano, arrow = scraping( "http://i.river.go.jp/_-p01-_/p/ktm1201020/?mtm=10&swd=&prf=3801&twn=3801202&rvr=&den=0972900400021", r"■\d{1,2}時間履歴\n単位:m\n増減\n") print("高野", takano, arrow) # 片山 katayama, arrow = scraping( "http://i.river.go.jp/_-p01-_/p/ktm1201020/?mtm=10&swd=&prf=3801&twn=3801202&rvr=&den=0972900400025", r"■\d{1,2}時間履歴\n単位:m\n増減\n") print("片山", katayama, arrow)
crontab -e 8,18,28,38,48,58 * * * * python3 /home/imabari/workspace/dam-river_twit.py