楽天モバイルの基地局のPCIをMLSと比較

eNB-LCIDとPCIの数が同じじゃないとエラーがでます

!pip install geopandas
!pip install rtree
!pip install pygeos

!wget https://www.esrij.com/cgi-bin/wp/wp-content/uploads/2012/10/japan_ver83.zip

import pandas as pd
import geopandas as gpd

japan = gpd.read_file("japan_ver83.zip!japan_ver83")

# 設定

# スプレッドシートのURL
csv_url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vRE1NoYtNw1FmjRQ8wcdPkcE0Ryeoc2mfFkCQPHjzwL5CpwNKkLXnBl_F7c0LZjrtbLtRLH55ZVi6gQ/pub?gid=0&single=true&output=csv"


def enblcid_split(df0):

    df1 = (
        df0["eNB-LCID"]
        .str.split("-", expand=True)
        .rename(columns={0: "eNB", 1: "LCID"})
    )
    df2 = df0.join(df1)

    df2["LCID"] = df2["LCID"].str.split(",")
    df2["PCI"] = df2["PCI"].str.split(",")

    df3 = df2.loc[:, ["eNB-LCID", "LCID", "PCI"]].copy()

    df4 = df3.set_index("eNB-LCID").apply(pd.Series.explode).reset_index()

    df4["eNB"] = df4["eNB-LCID"].str.split("-", expand=True)[0]

    df4["id"] = df4["eNB"].astype(str) + "-" + df4["LCID"].astype(str)

    return df4


# マイデータ

df0 = pd.read_csv(csv_url, index_col=0).dropna(how="all")

df1 = enblcid_split(df0.dropna(subset=["eNB-LCID"]))
df1

df0.drop("PCI", axis=1, inplace=True)

# MLSデータ

# 都道府県名で抽出
pref = japan[japan["KEN"] == "愛媛県"]

df2 = pd.read_json("https://cellmap.rukihena.com/mls44011.json")
df2

pt_df = gpd.GeoDataFrame(
    df2, geometry=gpd.points_from_xy(df2.lon, df2.lat), crs="EPSG:6668"
)
pt_df

spj = gpd.sjoin(pt_df, pref, op="within")
spj

# eNB-LCID

# cellからeNB-LCIDを作成
spj["id"] = (
    spj["cell"].apply(lambda x: divmod(x, 256)).apply(lambda s: f"{s[0]}-{s[1]}")
)

spj.rename(columns={"unit": "PCI"}, inplace=True)

df3 = spj.loc[:, ["id", "PCI"]].copy().reset_index(drop=True)

df3["PCI"] = df3["PCI"].astype(str)

df3

df2.dtypes

df3.dtypes

df4 = pd.merge(df1, df3, on="id")

df4[df4["PCI_x"] == df4["PCI_y"]]

df5 = df4[df4["PCI_x"] != df4["PCI_y"]]
df5

df6 = pd.merge(df0, df5, on="eNB-LCID")

df6["sector"] = df6["sector"].astype(int)

df6.to_csv("pci.csv", encoding="utf_8_sig")

df6.groupby(by="eNB-LCID")["LCID"].apply(list).apply(lambda x: ",".join(map(str, x)))

from google.colab import files

files.download("pci.csv")