from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
from playwright.sync_api import sync_playwright
def fetch_html(url, id, parser="html.parser"):
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
page.goto(url)
while True:
try:
page.locator("text=次へ").click(timeout=10000)
except PlaywrightTimeoutError:
break
with open(f"{id}.html", "w", encoding="utf-8") as f:
f.write(page.content())
soup = BeautifulSoup(page.content(), parser)
browser.close()
return soup