某サイトの買取金額のスクレイピング対策の金額をスクレイピング

買取金額表示用の数字画像を取得
OCRで画像から数字に変換
切り抜き位置から数字番号取得
数字に変換

CSSからのパースはじめてした

pip install easyocr
pip install cssutils

import pathlib
from urllib.parse import urljoin

import cssutils
import easyocr
import requests
from bs4 import BeautifulSoup

# スクレイピング

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}


def fetch_soup(url, parser="html5lib"):

    r = requests.get(url, headers=headers)
    r.raise_for_status()

    soup = BeautifulSoup(r.content, parser)

    return soup


def fetch_file(url, fn):

    p = pathlib.Path(fn)
    p.parent.mkdir(parents=True, exist_ok=True)

    r = requests.get(url)
    r.raise_for_status()

    with p.open(mode="wb") as fw:
        fw.write(r.content)

    return p

# URLなし
url = ""

soup = fetch_soup(url)

# prettifyで整形されたファイルで保存
with open("test.html", mode="w", encoding="utf-8") as fw:
    fw.write(soup.prettify())

# encrypt-num

css = soup.select_one("div.topic > style").get_text(strip=True)

sheet = cssutils.parseString(css)

img_url = ""

for rule in sheet:
    if rule.type == rule.STYLE_RULE:
        # find property
        for property in rule.style:
            if property.name == "background-image":
                temp = property.value
                img_url = temp.replace("url(", "").replace(")", "")
                break

img_url

link = urljoin(url, img_url)
print(link)

p = fetch_file(link, "encrypt-num.png")


reader = easyocr.Reader(["en"], gpu=False)
result = reader.readtext("encrypt-num.png", detail=0, allowlist="0123456789")[0] + ","

print(result)

for tag in soup.select("div.item.item-thumbnail.item-product-list"):

    title = tag.select_one("h4.item-title").get_text(strip=True)
    print(title)

    for price in tag.select("div.item-price.encrypt-price"):

        number = [
            int(
                int(
                    cssutils.parseStyle(i.get("style"))["background-position"].strip(
                        "-px"
                    )
                )
                / 10
            )
            for i in price.select("span.encrypt-num")
        ]

        print("".join([result[j] for j in number]))

    print("-" * 20)