feed43で10進文字を変換する方法ないかな
beautifulsoupだと変換できるんだけど
import requests from bs4 import BeautifulSoup from urllib.parse import urljoin url = 'http://ehime.force.com/PUB_VF_Detail_Docs' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko' } r = requests.get(url, headers=headers) if r.status_code == requests.codes.ok: soup = BeautifulSoup(r.content, 'html5lib') with open('test.html', mode='w', encoding='utf-8') as fw: fw.write(soup.prettify()) for link in soup.find_all('a', target='_blank'): # iタグを除去 # link.i.extract() print(link.get_text(strip=True)) print(urljoin(url, link.get('href')))
勝手に変換してくれた 災害対策本部・災害警戒本部関係情報