digitalnagasaki.hatenablog.com
curl -O "https://www.dhii.jp/dh/tei/soseki_letter_19000908.xml" curl -O "https://www.dhii.jp/dh/tei/soseki_letter_19001008.xml" curl -O "https://www.dhii.jp/dh/tei/soseki_letter_19001022.xml"
import pathlib import folium import pandas as pd from lxml import etree ns = {"tei": "http://www.tei-c.org/ns/1.0"} data = [] for p in pathlib.Path(".").glob("soseki_letter*.xml"): tree = etree.parse(p) root = tree.getroot() for l in root.xpath("//tei:location", namespaces=ns): d = {} name = l.xpath("./tei:placeName/text()", namespaces=ns) region = l.xpath("./tei:address/tei:region/text()", namespaces=ns) d["title"] = (name or region)[0] d["geo"] = l.xpath("./tei:geo/text()", namespaces=ns)[0] data.append(d) df = pd.DataFrame(data) # 重複削除 df.drop_duplicates(subset="geo", inplace=True) # 緯度経度分割 df[["lat", "lon"]] = df["geo"].str.split(expand=True).astype(float) df map = folium.Map( location=[35.5, 138.5], zoom_start=2, ) for i, r in df.iterrows(): folium.Marker( location=[r.lat, r.lon], popup=folium.Popup(f"<p>{r.title}</p>", max_width=300), ).add_to(map) map