import requests from bs4 import BeautifulSoup url = "https://g-sys.toyo.ac.jp/syllabus/result" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", } payload = { "year": "2020", "course": "0", "faculty": "1F000-2017", "department": "", "course_name": "", "instructor": "", "language": "", "keyword1": "", "condition1": "", "conjuntion": "", "keyword2": "", "condition2": "", "perPage": "1000", } r = requests.post(url, headers=headers, data=payload) r.raise_for_status() soup = BeautifulSoup(r.content, "html.parser") import re data = [] for tr in soup.find("table", id="result_table").find_all("tr"): tds = [] for td in tr.find_all(["th", "td"]): tds.append(td.get_text("\n", strip=True).replace(" / ", "\n")) if td.name == "td": syllabus_jp = td.find("input", class_="btn_syllabus_jp") jp = re.findall("\d+", syllabus_jp.get("onclick"))[1] if syllabus_jp else "" syllabus_en = td.find("input", class_="btn_syllabus_en") en = re.findall("\d+", syllabus_en.get("onclick"))[1] if syllabus_en else "" tds[-1] = f"{jp}\n{en}" data.append(tds) import pandas as pd df_temp = pd.DataFrame(data) df_temp dfs = [] for col in df_temp.columns: dfs.append(df_temp[col].str.split("\n", expand=True)) df = pd.concat(dfs, axis=1) df.to_csv("data.csv", header=None, index=None, encoding="utf_8_sig") pd.read_csv("data.csv", header=0)