Scrapy Tutorial — Scrapy 1.3.0 documentation
Scrapy 1.2 ドキュメント — Scrapy 1.2.2 ドキュメント
# インストール pip install scrapy conda install -c conda-forge scrapy=1.3.0 # プロジェクト作成 scrapy startproject ehime_np cd ehime_np
item.py編集
import scrapy class EhimeNpItem(scrapy.Item): title = scrapy.Field() url = scrapy.Field()
# ひながた作成 scrapy genspider ehime_news www.ehime-np.co.jp # テスト scrapy shell https://www.ehime-np.co.jp/online/news/ehime/list/ response.css('#js_contents > section > main > article > dl > dd > a > span.imgtextlink_rightimg__text--tit::text').extract_first()
# -*- coding: utf-8 -*- import scrapy from ehime_np.items import EhimeNpItem class EhimeNewsSpider(scrapy.Spider): name = "ehime_news" allowed_domains = ["www.ehime-np.co.jp"] start_urls = ( 'https://www.ehime-np.co.jp/online/news/ehime/list/', ) def parse(self, response): for sel in response.css("#js_contents > section > main > article > dl > dd"): article = EhimeNpItem() article['title'] = sel.css("a > span.imgtextlink_rightimg__text--tit::text").extract_first() href = sel.css("a::attr('href')").extract_first() article['url'] = response.urljoin(href) yield article
scrapy crawl ehime_news -o test.csv scrapy parse --spider=ehime_news http://www.ehime-np.co.jp/online/news/ehime/list