松山市プレミアム付商品券が使えるお店をスクレイピング

premium-gift.jp

import csv
import time
import urllib.parse

import requests
from bs4 import BeautifulSoup

url = 'https://premium-gift.jp/matsuyama/use_store'

headers = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
}

s = requests.Session()

with open('result.csv', 'w') as fw:
    writer = csv.writer(fw, dialect='excel', lineterminator='\n')

    # ヘッダー登録
    writer.writerow(["ID", "業種", "店舗名", "郵便番号", "所在地", "電話番号", "URL"])

    for n in range(1, 90):

        params = {
            "events": "page",
            "id": n,
            "store": "",
            "addr": "",
            "industry": ""
        }

        r = s.get(url, headers=headers, params=params)

        print(r.url)

        if r.status_code == requests.codes.ok:

            soup = BeautifulSoup(r.content, 'html5lib')

            for i in soup.select("div.store-card__item"):

                # 店舗名を取得
                title = i.select_one("h3.store-card__title").get_text(
                    strip=True)

                # 業種を取得
                tag = i.select_one("p.store-card__tag").get_text(strip=True)

                # 郵便番号・住所・電話番号・URLを取得
                temp, tel, link = [
                    td.get_text(strip=True) for td in i.select(
                        "table.store-card__table > tbody > tr > td")
                ]

                # 郵便番号と住所を分離
                postal, *address = temp.split()

                # IDのみ抽出
                id = urllib.parse.parse_qs(
                    urllib.parse.urlparse(
                        i.select_one("a.store-card__button").get(
                            "href")).query)["id"][0]

                data = [id, tag, title, postal, ' '.join(address), tel, link]

                writer.writerow(data)

            time.sleep(1)