www.npa.go.jp
github.com
各種コード表
!wget "https://www.npa.go.jp/publications/statistics/koutsuu/opendata/2020/codebook_2020.pdf" -O data.pdf
import camelot
import pandas as pd
tables = camelot.read_pdf(
"data.pdf", pages="all", split_text=True, strip_text=" \n", line_scale=40
)
name = ""
dfs = []
for i, table in enumerate(tables):
temp = table.df
if temp.iat[0, 0] == "項目名":
name = temp.iat[0, 1]
if temp.iat[0, 0] == "コード":
df = temp.rename(columns=temp.iloc[0]).drop(temp.index[0])
df["項目名"] = name
dfs.append(df)
df0 = pd.concat(dfs)
df0.to_csv("code.tsv", sep="\t")
本票
!wget "https://docs.google.com/spreadsheets/d/e/2PACX-1vQeA2o-4Ayu-LBkMRl1AJEd5yNZ9DsCeb1hFECiyGgGUJuwj_M2oJVdbvNv659jxBFxZRCuR5K2mxUF/pub?gid=1779754181&single=true&output=csv" -O pref.csv
!wget "https://docs.google.com/spreadsheets/d/e/2PACX-1vRPcWUgfi848Fj0EwsrN5zSb-gpa2I5paLPQ_GvFotLYnMwAlmvz7tNyh2SGqHdNvaClee__ogp2tN5/pub?gid=0&single=true&output=csv" -O code.csv
import pandas as pd
pd.set_option("display.max_columns", None)
df_honhyo2020 = pd.read_csv(
"https://www.npa.go.jp/publications/statistics/koutsuu/opendata/2020/honhyo_2020.csv",
dtype=str,
encoding="cp932",
)
df_honhyo2019 = pd.read_csv(
"https://www.npa.go.jp/publications/statistics/koutsuu/opendata/2019/honhyo_2019.csv",
dtype=str,
encoding="cp932",
)
df_pref = pd.read_csv("pref.csv", dtype=str)
df_code = pd.read_csv("code.csv", dtype=str)
df_city = pd.read_csv(
"https://www.soumu.go.jp/main_content/000608358.csv", encoding="cp932", dtype=str
)
df0 = pd.concat([df_honhyo2020, df_honhyo2019]).reset_index(drop=True)
df0.shape
df0.head()
df0.describe()
df1 = df0.copy()
def conv2date(df):
df.set_axis(["year", "month", "day", "hour", "minute"], axis=1, inplace=True)
return pd.to_datetime(df)
df1["accident_date"] = conv2date(
df1[["発生日時 年", "発生日時 月", "発生日時 日", "発生日時 時", "発生日時 分"]]
)
df1 = df1.join(df_pref.set_index("都道府県コード"), on="都道府県コード")
df1["tiiki-code"] = df1["ken-code"].str.cat(df1["市区町村コード"])
df1 = df1.join(
df_city.set_index(["ken-code", "tiiki-code"]), on=["ken-code", "tiiki-code"]
)
df1.head()
df1["road_type"] = pd.cut(
df1["路線コード"].str[:-1].astype(int),
[
1,
1000,
1500,
2000,
3000,
4000,
5000,
5500,
6000,
7000,
8000,
8500,
9000,
9500,
9900,
10000,
],
labels=[
"一般国道(国道番号)",
"主要地方道-都道府県道",
"主要地方道-市道",
"一般都道府県道",
"一般市町村道",
"高速自動車国道",
"自動車専用道-指定",
"自動車専用道-その他",
"道路運送法上の道路",
"農(免)道",
"林道",
"港湾道",
"私道",
"その他",
"一般の交通の用に供するその他の道路",
],
right=False,
)
df1["road_type"].value_counts()
df1["road_bypass"] = "バイパス区間"
df1["road_bypass"].mask(
df1["路線コード"].str[-1].astype(int) == 0, "現道区間又は包括路線", inplace=True
)
df1["road_bypass"].value_counts()
df_code["コード"] = df_code["コード"].fillna("")
df_code
for (key, name), df in df_code.groupby(["項目名", "name"]):
d = df.set_index("コード")["区分"].to_dict()
df1[name] = df1[key].replace(d)
df1["death_flag"] = (df1["accident_type"] == "死亡").astype(int)
df1
df1["accident_type"].value_counts()
df1["road_updown_type"].value_counts()
df1["day_night_type"].value_counts()
df1["weather_type"].value_counts()
df1["terrain_type"].value_counts()
df1["road_condition_type"].value_counts()
df1["road_shape_type"].value_counts()
df1["roundabout_diameter_type"].value_counts()
df1["traffic_lights_type"].value_counts()
df1["pause_sign_type_a"].value_counts()
df1["pause_sign_type_b"].value_counts()
df1["pause_display_type_a"].value_counts()
df1["pause_display_type_b"].value_counts()
df1["road_width_type"].value_counts()
df1["road_alignment_type"].value_counts()
df1["clash_point_type"].value_counts()
df1["zone_regulation_type"].value_counts()
df1["divider_type"].value_counts()
df1["pedestrian_road_division_type"].value_counts()
df1["accident_vehicle_type"].value_counts()
df1["age_type_a"].value_counts()
df1["age_type_b"].value_counts()
df1["parties_type_a"].value_counts()
df1["parties_type_b"].value_counts()
df1["use_type_a"].value_counts()
df1["use_type_b"].value_counts()
df1["vehicle_shape_type_a"].value_counts()
df1["vehicle_shape_type_b"].value_counts()
df1["speed_regulation_type_a"].value_counts()
df1["speed_regulation_type_b"].value_counts()
df1["collision_site_type_a"].value_counts()
df1["collision_site_type_b"].value_counts()
df1["airbag_equipment_type_a"].value_counts()
df1["airbag_equipment_type_b"].value_counts()
df1["side_airbag_equipment_type_a"].value_counts()
df1["side_airbag_equipment_type_b"].value_counts()
df1["personal_injury_type_a"].value_counts()
df1["personal_injury_type_b"].value_counts()
df1["weekday_type"].value_counts()
df1["holiday_type"].value_counts()