CAMPFIREの支援者をスクレイピング

import time
from urllib.parse import urljoin

import requests
from bs4 import BeautifulSoup

# プロジェクト
p_number = 12345

# スクレイピング

url = f"https://camp-fire.jp/projects/{p_number}/backers"
link = url

result = []

while True:

    print(link)

    r = requests.get(link)
    r.raise_for_status()

    soup = BeautifulSoup(r.content, "html.parser")

    for i in soup.select("li.clearfix"):

        d = {}

        tag = i.select_one("div.body > ul.date > li > a")

        d["project"] = p_number
        d["name"] = tag.get_text(strip=True)
        d["profile"] = tag.get("href")
        d["count"] = (
            i.select_one("div.body > ul.date > li > small > strong")
            .get_text(strip=True)
            .rstrip("件")
        )
        d["datetime"] = i.select_one(
            "div.body > ul.date > li.rfloat > span.time"
        ).get_text(strip=True)
        d["message"] = i.select_one("div.body > p.readmore").get_text(strip=True)

        result.append(d)

    # ページネーション
    next_page = soup.select_one('div.pagination > div.clearfix > span > a[rel="next"]')

    if next_page:
        link = urljoin(url, next_page.get("href"))
        time.sleep(1)
    else:
        break

# データクレンジング

import pandas as pd

df = pd.DataFrame(result)

df["user_id"] = df["profile"].str.replace("/profile/", "")
df["profile"] = df["profile"].str.replace("^/", "https://camp-fire.jp/", regex=True)

df.to_csv(f"{p_number}.csv")