!apt install poppler-utils poppler-data import requests from bs4 import BeautifulSoup from urllib.parse import urljoin import os import re headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", } url = "https://www.mhlw.go.jp/stf/seisakunitsuite/bunya/0000121431_00086.html" r = requests.get(url, headers=headers) r.raise_for_status() soup = BeautifulSoup(r.content, "html5lib") for i in soup.find("div", class_="l-contentMain").find_all( "a", string=re.compile(r"^新型コロナウイルス感染症の現在の状況と厚生労働省の対応について") ): href = i.find_next("a", string="PDF版") if href: link = urljoin(url, href.get("href")) filename = os.path.basename(link) !wget $link !pdfinfo $filename
どうも同じ人がMicrosoft® Word 2016で作成しているようで時間を見るとUTCなので+9時間すると(T ^ T)
Saving to: ‘000599916.pdf’ CreationDate: Tue Feb 25 15:15:06 2020 UTC ModDate: Tue Feb 25 15:15:06 2020 UTC Saving to: ‘000603636.pdf’ CreationDate: Wed Mar 4 05:09:46 2020 UTC ModDate: Wed Mar 4 05:09:46 2020 UTC Saving to: ‘000602410.pdf’ CreationDate: Mon Mar 2 14:29:19 2020 UTC ModDate: Mon Mar 2 14:29:19 2020 UTC Saving to: ‘000602411.pdf’ CreationDate: Mon Mar 2 14:34:23 2020 UTC ModDate: Mon Mar 2 14:34:23 2020 UTC Saving to: ‘000602412.pdf’ CreationDate: Mon Mar 2 14:28:01 2020 UTC ModDate: Mon Mar 2 14:28:01 2020 UTC Saving to: ‘000600358.pdf’ CreationDate: Wed Feb 26 13:30:23 2020 UTC ModDate: Wed Feb 26 13:30:23 2020 UTC Saving to: ‘000599916.pdf’ CreationDate: Tue Feb 25 15:15:06 2020 UTC ModDate: Tue Feb 25 15:15:06 2020 UTC Saving to: ‘000599296.pdf’ CreationDate: Fri Feb 21 15:40:31 2020 UTC ModDate: Fri Feb 21 15:40:31 2020 UTC Saving to: ‘000598726.pdf’ CreationDate: Thu Feb 20 12:32:16 2020 UTC ModDate: Thu Feb 20 12:32:16 2020 UTC Saving to: ‘000598731.pdf’ CreationDate: Thu Feb 20 12:41:08 2020 UTC ModDate: Thu Feb 20 12:41:08 2020 UTC Saving to: ‘000597478.pdf’ CreationDate: Tue Feb 18 09:06:52 2020 UTC ModDate: Tue Feb 18 09:06:52 2020 UTC Saving to: ‘000596893.pdf’ CreationDate: Mon Feb 17 10:37:33 2020 UTC ModDate: Mon Feb 17 10:37:33 2020 UTC Saving to: ‘000596241.pdf’ CreationDate: Fri Feb 14 13:32:51 2020 UTC ModDate: Fri Feb 14 13:32:51 2020 UTC Saving to: ‘000594994.pdf’ CreationDate: Wed Feb 12 12:55:09 2020 UTC ModDate: Wed Feb 12 12:55:09 2020 UTC