perze.jp
nakanocitizens.hatenablog.jp
import json
import re
import pandas as pd
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}
url = "https://perze.jp/nakano/"
r = requests.get(url, headers=headers)
r.raise_for_status()
html = r.text
result = []
for i in re.findall("addSOS\((.+?)\);", html):
data = json.loads("[" + i + "]")
d = {}
d["sosID"] = data[0]
d["category"] = data[1]
d["status"] = data[2]
d["datetime"] = data[3]
d["public"] = data[4]
d["comment"] = data[5]
for j in data[6]:
k = j.get("key")
v = j.get("value")
d[k] = v
for j in data[7]:
k = j.get("key")
v = j.get("latLng")
d[k] = v
result.append(d)
df0 = (
pd.DataFrame(result)
.set_index("sosID")
.sort_index()
.rename(columns={"発見日": "date", "種類": "type", "内容": "text"})
)
df0
df0[["lat", "lng"]] = df0["発見場所"].apply(pd.Series)
df0.drop("発見場所", axis=1, inplace=True)
df1 = df0.reindex(
[
"status",
"datetime",
"category",
"type",
"lat",
"lng",
"comment",
"public",
"date",
"text",
],
axis=1,
)
df1.to_csv("nakano.csv")