exploratory.io
import pandas as pd
df1 = pd.read_excel(
"https://www.e-stat.go.jp/stat-search/file-download?statInfId=000031797656&fileKind=0",
sheet_name=1,
header=None,
)
df1.head(10)
df2 = df1.loc[df1.notnull().sum(axis=1) > 3, df1.notnull().sum() > 2].copy()
df2.to_csv("data.tsv", sep="\t")
df2.iloc[0:2, 0] = "大分類"
df2.iloc[0:2, 1] = "中分類"
df2.iloc[0:2, 2] = "小分類"
df2.iat[3, 0] = "刑法犯総数"
df2.iloc[3:, 1][df2.iloc[3:, 0].notnull()] = "計"
df2.iloc[3:, 0].fillna(method="ffill", inplace=True)
df2.iloc[3:, 1].fillna(method="ffill", inplace=True)
df2.iloc[3:, 2].fillna("-", inplace=True)
df2
df2.drop([6], inplace=True)
df2.drop([12, 13, 16, 17, 20, 23, 24, 27, 28], axis=1, inplace=True)
df2.iloc[:, 1] = df2.iloc[:, 1].str.replace("うち)", "")
df2.iloc[:, 2] = df2.iloc[:, 2].str.replace("うち)", "")
df2.iloc[0] = df2.iloc[0].str.replace("うち)", "検挙人員_")
df2.iloc[0].fillna(method="ffill", inplace=True)
df2.iloc[1] = df2.iloc[1].replace({"平成30年": "2018年", "平成29年": "2017年"})
df2.T.to_csv("data.csv", index=None, header=None)
df3 = pd.read_csv("data.csv", index_col=[0, 1], header=[0, 1, 2], na_values="-")
df3.drop("検挙率", level=0, inplace=True)
df3.loc[:, ("粗暴犯", "傷害", "その他")] = (
df3.loc[:, ("粗暴犯", "傷害", "-")] - df3.loc[:, ("粗暴犯", "傷害", "傷害致死")]
)
df3.loc[:, ("知能犯", "偽造", "その他")] = (
df3.loc[:, ("知能犯", "偽造", "-")]
- df3.loc[:, ("知能犯", "偽造", "通貨偽造")]
- df3.loc[:, ("知能犯", "偽造", "文書偽造")]
- df3.loc[:, ("知能犯", "偽造", "支払用カード偽造")]
- df3.loc[:, ("知能犯", "偽造", "有価証券偽造")]
)
df3.loc[:, ("知能犯", "汚職", "その他")] = (
df3.loc[:, ("知能犯", "汚職", "-")] - df3.loc[:, ("知能犯", "汚職", "賄賂")]
)
df3.loc[:, ("風俗犯", "わいせつ", "その他")] = (
df3.loc[:, ("風俗犯", "わいせつ", "-")]
- df3.loc[:, ("風俗犯", "わいせつ", "強制わいせつ")]
- df3.loc[:, ("風俗犯", "わいせつ", "公然わいせつ")]
)
df3.loc[:, ("その他の刑法犯", "その他", "-")] = (
df3.loc[:, ("その他の刑法犯", "計", "-")]
- df3.loc[:, ("その他の刑法犯", "占有離脱物横領", "-")]
- df3.loc[:, ("その他の刑法犯", "公務執行妨害", "-")]
- df3.loc[:, ("その他の刑法犯", "住居侵入", "-")]
- df3.loc[:, ("その他の刑法犯", "逮捕監禁", "-")]
- df3.loc[:, ("その他の刑法犯", "略取誘拐・人身売買", "-")]
- df3.loc[:, ("その他の刑法犯", "盗品", "-")]
- df3.loc[:, ("その他の刑法犯", "器物損壊等", "-")]
)
df4 = (
df3.T.reset_index()
.set_index(["大分類", "中分類", "小分類"])
.sort_index()
.fillna(0)
.astype(int)
)
df5 = df4["検挙件数"] / df4["認知件数"] * 100
df5.columns = pd.MultiIndex.from_product([["検挙率"], df5.columns])
df6 = df4["検挙人員"] - df4["検挙人員_少年"]
df6.columns = pd.MultiIndex.from_product([["検挙人員_成人"], df6.columns])
df7 = pd.concat([df4, df5, df6], axis=1).sort_index(axis=1)
df7.fillna(0, inplace=True)
df7
df7.loc[pd.IndexSlice[:, "計", :], :]