別のExcelファイルからデータラングリングしてみた
確かに都道府県が入っているから楽でした
!wget "https://www.e-stat.go.jp/stat-search/file-download?statInfId=000031693271&fileKind=0" -O data.xlsx
import pandas as pd data = [] for i in range(1, 8): df1_tmp = pd.read_excel( "data.xlsx", sheet_name=f"生命表{i}", header=None, na_values="…" ) row, _ = df1_tmp.shape for n in range(0, row, 49): city = df1_tmp.iat[n + 1, 1] male = df1_tmp.iat[n + 5, 6] female = df1_tmp.iat[n + 27, 6] data.append([city, male, female]) df = pd.DataFrame(data) df.columns = ["市区町村", "男", "女"] df_city = df["市区町村"].str.split(r"\s", expand=True) df_city.columns = ["都道府県", "市郡", "区町村"] df1 = pd.concat([df_city, df], axis=1).drop("市区町村", axis=1) df1["集計"] = 1 df1.loc[df1["市郡"].isnull() & df1["区町村"].isnull(), "集計"] = 0 df1.loc[ (df1.duplicated(subset=["都道府県", "市郡"], keep=False)) & (df1["区町村"].isnull()), "集計" ] = -1 df1.loc[df1["都道府県"] == "全国", "集計"] = -1 df1.loc[df1["市郡"] == "東京都区部", "集計"] = -1 df1[df1["集計"] == -1] df2 = df1[df1["集計"] > 0].copy().drop("集計", axis=1).reset_index(drop=True) df2 = df2.loc[:, ["都道府県", "市郡", "区町村", "男", "女"]] df2.to_csv("data.csv")