twitter.com
Tweepy V2
import tweepy
bearer_token = ""
client = tweepy.Client(bearer_token)
user = client.get_user(username="ehime_covid19").data
user.id
tweets = [
tweet.data
for tweet in tweepy.Paginator(
client.get_users_tweets,
id=user.id,
tweet_fields=["created_at"],
max_results=100,
).flatten(limit=500)
]
tweets
Pandas
import pandas as pd
df0 = pd.DataFrame(tweets)
df0["created_at"] = pd.to_datetime(df0["created_at"]).dt.tz_convert("Asia/Tokyo")
df0.sort_values("created_at", inplace=True)
df0.reset_index(drop=True, inplace=True)
df0["text"] = df0["text"].str.normalize("NFKC")
df0["level"] = (
df0["text"]
.str.extract("(感染縮小期|感染警戒期|感染対策期)")
.fillna(method="ffill")
)
df0["日付"] = df0["text"].str.extract("(\d{1,2}月\d{1,2}日)")
df_date = (
df0["日付"]
.str.extract("(\d{1,2})月(\d{1,2})日")
.astype(int)
.rename(columns={0: "month", 1: "day"})
)
df_date["year"] = (df0["created_at"] - pd.Timedelta(days=1)).dt.year
df0["date"] = pd.to_datetime(df_date)
df0["count"] = df0["text"].str.extract("陽性([0-9,]+)名")
df0["count"].mask(df0["text"].str.contains("陽性は確認されませんでした"), 0, inplace=True)
df0[df0["count"].isna()]
df1 = df0.dropna(subset=["count"])
df2 = df1.reindex(columns=["date", "count", "level"])
df2.to_csv("ehime.tsv", sep="\t", index=False)
altair
import altair as alt
chart = alt.Chart(df2).mark_bar().encode(
x="date", y="count", tooltip=["date", "count"]
).properties(width=800).interactive()
chart
chart.save("view.html")