import requests
from bs4 import BeautifulSoup
url = "https://g-sys.toyo.ac.jp/syllabus/result"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
}
payload = {
"year": "2020",
"course": "0",
"faculty": "1F000-2017",
"department": "",
"course_name": "",
"instructor": "",
"language": "",
"keyword1": "",
"condition1": "",
"conjuntion": "",
"keyword2": "",
"condition2": "",
"perPage": "1000",
}
r = requests.post(url, headers=headers, data=payload)
r.raise_for_status()
soup = BeautifulSoup(r.content, "html.parser")
import re
data = []
for tr in soup.find("table", id="result_table").find_all("tr"):
tds = []
for td in tr.find_all(["th", "td"]):
tds.append(td.get_text("\n", strip=True).replace(" / ", "\n"))
if td.name == "td":
syllabus_jp = td.find("input", class_="btn_syllabus_jp")
jp = re.findall("\d+", syllabus_jp.get("onclick"))[1] if syllabus_jp else ""
syllabus_en = td.find("input", class_="btn_syllabus_en")
en = re.findall("\d+", syllabus_en.get("onclick"))[1] if syllabus_en else ""
tds[-1] = f"{jp}\n{en}"
data.append(tds)
import pandas as pd
df_temp = pd.DataFrame(data)
df_temp
dfs = []
for col in df_temp.columns:
dfs.append(df_temp[col].str.split("\n", expand=True))
df = pd.concat(dfs, axis=1)
df.to_csv("data.csv", header=None, index=None, encoding="utf_8_sig")
pd.read_csv("data.csv", header=0)