qiita.com
import pandas as pd
import tweepy
bearer_token = ""
client = tweepy.Client(bearer_token)
res = client.get_recent_tweets_count("雪だるま -is:retweet", granularity="hour")
df_count = pd.DataFrame(res.data)
df_count["start"] = pd.to_datetime(df_count["start"])
df_count["end"] = pd.to_datetime(df_count["end"])
df_count.dtypes
df_count.plot.bar(x="start", y="tweet_count", figsize=(20, 5))
df_count.tail(20)
se = df_count.loc[160]
start_time = se["start"].isoformat()
end_time = se["end"].isoformat()
dfs = []
next_token = None
total = 0
max_count = 1000
while True:
tweets = client.search_recent_tweets(
"雪だるま -is:retweet",
start_time=start_time,
end_time=end_time,
expansions=["author_id", "referenced_tweets.id"],
tweet_fields=["created_at", "referenced_tweets"],
user_fields=["verified"],
max_results=100,
next_token=next_token,
)
df_data = pd.DataFrame(tweets.data)
df_user = pd.DataFrame(tweets.includes["users"]).rename(columns={"id": "author_id"})
dfs.append(pd.merge(df_data, df_user, on="author_id"))
n = tweets.meta.get("result_count", 0)
next_token = tweets.meta.get("next_token")
total += n
if not next_token:
break
elif total >= max_count:
break
df0 = pd.concat(dfs)
df0.shape
df0["created_at"] = df0["created_at"].dt.tz_convert("Asia/Tokyo")
df0["url"] = df0.apply(lambda d: f'https://twitter.com/{d["username"]}/status/{d["id"]}', axis=1)
df0.dtypes
df1 = df0.head(max_count).copy()
df1.sort_values(by="created_at", inplace=True)
df1.reset_index(drop=True, inplace=True)
df1