不審者マップ

qiita.com

愛媛県の不審者マップ https://imabari.github.io/fushinsha_map/

import pathlib

import pandas as pd
import requests

import folium

GEO_URL = "https://raw.githubusercontent.com/geolonia/japanese-addresses/master/data/latest.csv"


def fetch_file(url, dir="."):

    r = requests.get(url)
    r.raise_for_status()

    p = pathlib.Path(dir, pathlib.PurePath(url).name)
    p.parent.mkdir(parents=True, exist_ok=True)

    with p.open(mode="wb") as fw:
        fw.write(r.content)
    return p


if __name__ == "__main__":

    # 数字を漢数字
    kanji = str.maketrans("1234567890", "一二三四五六七八九〇")

    p_geo = fetch_file(GEO_URL, "src")

    df_geo = pd.read_csv(p_geo)

    # 愛媛県のみ抽出
    df_geo_ehime = df_geo[df_geo["都道府県名"] == "愛媛県"].copy()

    # 市名と町名を結合
    df_geo_ehime["address"] = df_geo_ehime["市区町村名"] + df_geo_ehime["大字町丁目名"]

    # 愛媛県警の不審者情報をスクレイピング
    df_tmp = (
        pd.read_html(
            "http://www.police.pref.ehime.jp/fushinsha.htm", match="概 要", header=0
        )[0]
        .fillna("")
        .astype(str)
    )

    # 内容を正規化
    df_tmp["概 要"] = df_tmp["概 要"].str.normalize("NFKC")

    # 項目ごとに分割
    df = df_tmp["概 要"].str.extract("(.+)◆.+:(.+)◆.+:(.+)◆.+:(.+)◆.+:(.+)")

    # 列名変更
    df.rename(columns={0: "管轄署", 1: "種別", 2: "日時", 3: "場所", 4: "状況"}, inplace=True)

    # 前後の空白文字を削除
    df = df.applymap(lambda s: s.strip())

    # かっこを削除
    df["管轄署"] = df["管轄署"].str.strip("()")

    # 住所間違い訂正
    df["場所"] = df["場所"].str.replace("常磐", "常盤")

    # 住所を漢数字に変換
    df["場所"] = df["場所"].apply(lambda s: s.translate(kanji))

    # 町名までに修正
    df["住所"] = df["場所"].str.replace(
        "(路上|施設|店舗|付近|一般住宅|住宅|アパート|マンション|公園|屋外|緑地|駐輪場|駐車場|河川敷|児童).*", "", regex=True
    )

    # 「甲乙丙の」を削除
    df["address"] = df["住所"].str.rstrip("甲乙丙の")

    # 北新田がないので新田に訂正
    df["address"] = df["address"].str.replace("西条市新田字北新田", "西条市新田")

    df["count"] = df.groupby("address").cumcount()

    # 種別の確認
    df["種別"].unique()

    # アイコンの色を追加
    df["color"] = df["種別"].replace(
        {
            "のぞき・盗撮": "pink",
            "身体露出": "orange",
            "ちかん": "gray",
            "不審者": "purple",
            "声かけ": "green",
            "暴行": "red",
            "つきまとい": "blue",
            "写真撮影": "lightred",
            "建造物侵入": "darkred",
            "住居侵入": "darkred",
            "のぞき": "pink",
            "動画撮影": "lightred",
        }
    )

    # 上記に該当しない場合は黒
    df["color"] = df["color"].fillna("black")

    # 色の種類
    colors = {
        "lightred",
        "darkred",
        "darkblue",
        "pink",
        "gray",
        "green",
        "orange",
        "purple",
        "lightgray",
        "blue",
        "beige",
        "cadetblue",
        "darkgreen",
        "darkpurple",
        "lightblue",
        "black",
        "lightgreen",
        "red",
        "white",
    }

    # 住所から緯度経度をマージ
    df_ehime = df.merge(df_geo_ehime, how="left", on="address")

    p_csv = pathlib.Path("map", "ehime.csv")
    p_csv.parent.mkdir(parents=True, exist_ok=True)

    df_ehime.to_csv(p_csv, encoding="utf_8_sig")

    # 欠損を確認
    df_nan = df_ehime[df_ehime.isnull().any(axis=1)]
    p_nan = pathlib.Path("map", "nan.csv")
    df_nan.to_csv(p_nan, encoding="utf_8_sig")

    # 欠損を削除
    df_ehime.dropna(inplace=True)

    map = folium.Map(location=[34.06604300, 132.99765800], zoom_start=10)

    for i, r in df_ehime.iterrows():
        folium.Marker(
            location=[r["緯度"], r["経度"] + r["count"] * 0.0002],
            popup=folium.Popup(
                f'<p>{r["管轄署"]}</p><p>{r["種別"]}</p><p>{r["日時"]}</p><p>{r["場所"]}</p><p>{r["状況"]}</p>',
                max_width=300,
                min_width=150,
            ),
            icon=folium.Icon(color=r["color"]),
        ).add_to(map)

    p_map = pathlib.Path("map", "index.html")

    map.save(str(p_map))