import json
import requests
from bs4 import BeautifulSoup
def get_title(table, css):
result = [i.get_text(strip=True) for i in table.select(f"tbody > tr{css} > td")]
return result
def get_data(table, css):
result = []
for i in table.select(f"tbody > tr{css} > td"):
j = i.get_text(strip=True)
n = int(j) if j else 0
result.append(n)
return result
def get_web(number):
url = "http://etsuran.mlit.go.jp/TAKKEN/ksGaiyo.do"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}
params = {"CMD": "", "sv_licenseNo": number, "caller": "KS"}
with requests.Session() as s:
r = s.post(url, headers=headers, params=params)
if r.status_code == requests.codes.ok:
soup = BeautifulSoup(r.content, "html5lib")
result = {}
tds = [
t
for td in soup.select("table.re_summ > tbody > tr > td")
for t in td.stripped_strings
]
result["auth"], result["lic_num"] = tds[0].split(None, 1)
result["company"] = tds[2]
result["company_yomi"] = tds[1]
result["name"] = tds[4]
result["name_yomi"] = tds[3]
result["postal_code"] = tds[5]
result["address"] = tds[6] + tds[7]
result["tel"] = tds[8]
tables = soup.select("table.re_summ_3")
result["const"] = get_title(tables[0], ".re_summ_ev")
result["license"] = get_data(tables[0], ".re_summ_odd")
params["CMD"] = "init"
parmit = []
for i in soup.select("table.re_summ_4 > tbody > tr > td > a"):
date = i.get_text(strip=True)
params["licenseDay"] = date
d_res = s.post(url, headers=headers, params=params)
if d_res.status_code == requests.codes.ok:
d_soup = BeautifulSoup(d_res.content, "html5lib")
parmit.append(
{
date: get_data(
d_soup.select("table.re_summ_3")[1], ".re_summ_odd"
)
}
)
result["license_day"] = parmit
if soup.find("img", src="/TAKKEN/images/btn_tab_office_off.png"):
params["licenseDay"] = ""
o_res = s.post(
"https://etsuran.mlit.go.jp/TAKKEN/ksEigyo.do",
headers=headers,
params=params,
)
o_soup = BeautifulSoup(o_res.content, "html5lib")
o_list = []
for j in o_soup.select("table.re_office > tbody > tr")[1:]:
office = {}
o_tds = j.find_all("td", recursive=False)
office["id"] = o_tds[0].get_text(strip=True)
office["name"], office["tel"] = o_tds[1].stripped_strings
office["postal_code"], office["address"] = o_tds[2].stripped_strings
office["license"] = get_data(
o_tds[3].select_one("table.re_office3"), ":nth-of-type(2)"
)
o_list.append(office)
result["office"] = o_list
if result["company"]:
filename = result["company"] + ".json"
with open(filename, "w") as fw:
json.dump(result, fw)
if __name__ == "__main__":
number = input("許可番号:")
if number.isdecimal() and len(number) == 8:
get_web(number)
else:
print("エラー")