愛媛県の不審者情報の場所情報からpygeonlpで逆ジオコーディング

geonlp.ex.nii.ac.jp

環境設定

!apt install libmecab-dev mecab-ipadic-utf8 libboost-all-dev
!apt install libgdal-dev

!pip install pygeonlp
!pip install gdal
!pip install jageocoder

!python -m jageocoder install-dictionary

逆ジオコーディング

import pandas as pd
import pygeonlp.api as api


def get_latlon(s):

    geometry = api.geoparse(s)[0].get("geometry")

    result = [None, None]

    if geometry:
        result = geometry.get("coordinates", [None, None])

    return pd.Series(result)

# 住所辞書

api.setup_basic_database(db_dir="mydic/")

api.init(db_dir="mydic")

# 不審者情報

df0 = (
    pd.read_html(
        "http://www.police.pref.ehime.jp/fushinsha.htm", match="概 要", header=0
    )[0]
    .fillna("")
    .astype(str)
)

df0["概 要"] = df0["概 要"].str.normalize("NFKC")

df1 = df0["概 要"].str.split("◆", expand=True)

df1.rename(
    columns={0: "管轄署", 1: "種別", 2: "日時", 3: "場所", 4: "状況", 5: "特徴"}, inplace=True
)

df1.replace("^(種別|日時|場所|状況|特徴):", "", regex=True, inplace=True)

for col in df1.select_dtypes(include=object).columns:
    df1[col] = df1[col].str.strip()

df1["管轄署"] = df1["管轄署"].str.strip("()")

df1

df1[["lon", "lat"]] = df1["場所"].apply(get_latlon)

df1

# 座標なし
df1[df1.isnull().any(axis=1)]

# 座標なしを除外
df2 = df1.dropna(subset=["lat", "lon"])

地図

import folium
from folium.plugins import MarkerCluster

map = folium.Map(
    location=[34.06604300, 132.99765800],
    tiles="https://cyberjapandata.gsi.go.jp/xyz/pale/{z}/{x}/{y}.png",
    attr='&copy; <a href="https://maps.gsi.go.jp/development/ichiran.html">国土地理院</a>',
    zoom_start=10,
)

marker_cluster = MarkerCluster()

for i, r in df2.iterrows():

    folium.Marker(
        location=[r.lat, r.lon],
        popup=folium.Popup(
            f'<p>{r["管轄署"]}</p><p>{r["種別"]}</p><p>{r["日時"]}</p><p>{r["場所"]}</p><p>{r["状況"]}</p><p>{r["特徴"]}</p>',
            max_width=300,
        ),
    ).add_to(marker_cluster)

marker_cluster.add_to(map)

map

map.save("ehime.html")