docs.google.com
import copy
import re
import pandas as pd
def checkdigit(n):
c = copy.deepcopy(n)
t = 0
for i in range(2, 7):
c, m = divmod(c, 10)
t += i * m
result = (n * 10) + ((11 - (t % 11)) % 10)
return result
df_code = pd.read_excel(
"https://www.soumu.go.jp/main_content/000618153.xls", usecols=[0, 1, 2]
)
df_code.rename(columns={"都道府県名\n(漢字)": "都道府県名", "市区町村名\n(漢字)": "市区町村名"}, inplace=True)
df_code.dropna(inplace=True)
df_code.reset_index(inplace=True, drop=True)
df = pd.read_csv(
"https://www.post.japanpost.jp/zipcode/dl/oogaki/zip/ken_all.zip",
encoding="cp932",
header=None,
usecols=[0, 6, 7],
names=["コード", "都道府県名", "市区町村名"],
)
df.drop_duplicates(keep="first", inplace=True)
df["団体コード"] = df["コード"].apply(checkdigit)
df.drop(columns="コード", inplace=True)
df1 = pd.merge(df_code, df, on=["団体コード"], how="left", suffixes=("", "郵便"))
df1.fillna("", inplace=True)
df1["市区町村名"] = df1["市区町村名"].mask(
(df1["都道府県名"] == "高知県") & (df1["市区町村名"] == "梼原町"), "檮原町"
)
df1["市区町村名郵便"] = df1["市区町村名郵便"].mask(
(df1["都道府県名"] == "福岡県") & (df1["市区町村名郵便"] == "糟屋郡須惠町"), "糟屋郡須恵町"
)
df1["郡名"] = df1.apply(lambda x: re.sub(f'{x["市区町村名"]}$', "", x["市区町村名郵便"]), axis=1)
df1[(df1["郡名"] == df1["市区町村名郵便"]) & (df1["市区町村名郵便"] != "")]
df2 = df1.loc[:, ["団体コード", "都道府県名", "郡名", "市区町村名"]].copy()
df2.to_csv("code.csv", index=None, encoding="utf_8_sig")