TEI/XMLファイルから抜き出した地理情報を地図上にマッピング(lxmlで抽出)

digitalnagasaki.hatenablog.com

curl -O "https://www.dhii.jp/dh/tei/soseki_letter_19000908.xml"
curl -O "https://www.dhii.jp/dh/tei/soseki_letter_19001008.xml"
curl -O "https://www.dhii.jp/dh/tei/soseki_letter_19001022.xml"
import pathlib

import folium
import pandas as pd
from lxml import etree

ns = {"tei": "http://www.tei-c.org/ns/1.0"}

data = []

for p in pathlib.Path(".").glob("soseki_letter*.xml"):

    tree = etree.parse(p)
    root = tree.getroot()

    for l in root.xpath("//tei:location", namespaces=ns):

        d = {}

        name = l.xpath("./tei:placeName/text()", namespaces=ns)
        region = l.xpath("./tei:address/tei:region/text()", namespaces=ns)

        d["title"] = (name or region)[0]
        d["geo"] = l.xpath("./tei:geo/text()", namespaces=ns)[0]

        data.append(d)


df = pd.DataFrame(data)

# 重複削除
df.drop_duplicates(subset="geo", inplace=True)

# 緯度経度分割
df[["lat", "lon"]] = df["geo"].str.split(expand=True).astype(float)

df

map = folium.Map(
    location=[35.5, 138.5],
    zoom_start=2,
)

for i, r in df.iterrows():

    folium.Marker(
        location=[r.lat, r.lon],
        popup=folium.Popup(f"<p>{r.title}</p>", max_width=300),
    ).add_to(map)

map