import csv import re from bs4 import BeautifulSoup import requests url = "https://www.pref.kumamoto.jp/kiji_22038.html" r = requests.get(url) r.raise_for_status() soup = BeautifulSoup(r.content, "html.parser") tags = soup.find("h3", text=re.compile("新型コロナウイルス感染症")).parent.find_all("tr") result = [tag.find("img", src=re.compile("csv.gif$")).find_parent("a").get("href") for tag in tags[1:]] with open("kumamoto.csv", "w") as f: writer = csv.writer(f) writer.writerow(result)
!curl -s 'https://www.pref.kumamoto.jp/kiji_22038.html' | xmllint --xpath '//table/tbody/tr[3]/td[4]//ul/li/a/img/../@href' --html - | cut -d\" -f 2 | sed -e 's/&/\&/g'