import datetime
import re
from urllib.parse import urljoin
import requests
import twitter
from bs4 import BeautifulSoup
def scraping(html):
soup = BeautifulSoup(html, "html.parser")
tables = soup.find_all(
"table", class_="comTblGyoumuCommon", summary="検索結果一覧を表示しています。"
)
today = datetime.date.today()
for table in tables:
date, week = table.td.get_text(strip=True).split()
day = datetime.datetime.strptime(date, "%Y年%m月%d日")
if day.date() == today:
result = []
dprev = ["今治市医師会市民病院", "今治市別宮町7-1-40"]
for trs in table.find_all("tr", id=[1, 2, 3]):
id = trs.get("id")
data = list(trs.stripped_strings)
if id == "1" and data[0] == "今治市":
del data[0]
for _ in range(2):
if len(data) > 4 and data[2].startswith("TEL"):
del data[2:4]
if id == "2":
data = dprev[:2] + data
dprev = data
hospital = dict(zip(["name", "address", "subject"], data[0:4]))
hospital["class"] = 8
t = [j for i in data[3:] for j in i.split("〜")]
hospital["time"] = "~".join([t[0], t[-1]])
if len(t) == 4 and t[1] != t[2]:
hospital["time"] = "\n".join(["~".join(t[:2]), "~".join(t[2:])])
if "外科" in hospital["subject"]:
hospital["class"] = 1
elif "内科" in hospital["subject"]:
hospital["class"] = 2
elif hospital["subject"] == "小児科":
hospital["class"] = 4
elif hospital["subject"] == "指定なし":
hospital["class"] = 0
hospital["subject"] = ""
match = re.search("(吉海町|宮窪町|伯方町|上浦町|大三島町|関前)", hospital["address"])
if match:
hospital["class"] = 9
hospital["subject"] = "島嶼部"
if hospital["subject"]:
hospital["subject"] = f'【{hospital["subject"]}】'
hospital["text"] = "\n".join(
[hospital["subject"], hospital["name"], hospital["time"]]
).strip()
result.append(hospital)
result.sort(key=lambda x: (x["class"], x["time"]))
twit_date = f"{date} {week}"
twit_riku = "\n\n".join(
[i["text"] for i in result if i["class"] < 9]
).strip()
twit_sima = "\n\n".join(
[i["text"] for i in result if i["class"] > 8]
).strip()
twit_all = "\n\n".join([twit_date, twit_riku, twit_sima]).strip()
api = twitter.Api(
consumer_key="",
consumer_secret="",
access_token_key="",
access_token_secret="",
)
if len(twit_all) < 140:
api.PostUpdate(twit_all)
else:
api.PostUpdate("\n\n".join(twit_date + twit_sima).strip())
api.PostUpdate("\n\n".join(twit_date + twit_riku).strip())
break
if __name__ == "__main__":
base_url = "http://www.qq.pref.ehime.jp/qq38/WP0805/RP080501BL.do"
payload = {
"blockCd[3]": "",
"forward_next": "",
"torinBlockDetailInfo.torinBlockDetail[0].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[1].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[2].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[3].blockCheckFlg": "1",
"torinBlockDetailInfo.torinBlockDetail[4].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[5].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[6].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[7].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[8].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[9].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[10].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[11].blockCheckFlg": "0",
"torinBlockDetailInfo.torinBlockDetail[12].blockCheckFlg": "0",
}
with requests.Session() as s:
r = s.get(base_url)
soup = BeautifulSoup(r.content, "html.parser")
token = soup.find(
"input", attrs={"name": "org.apache.struts.taglib.html.TOKEN"}
).get("value")
payload["org.apache.struts.taglib.html.TOKEN"] = token
url = urljoin(
base_url, soup.find("form", attrs={"name": "wp0805Form"}).get("action")
)
r = s.post(url, data=payload)
scraping(r.content)