PythonでRSSの新着チェック(feedparser/sqlite3)

rss_cron.pyを実行すると5分おきにrss_print.pyが実行される。

rss_cron.py

from apscheduler.schedulers.blocking import BlockingScheduler
import os

sched = BlockingScheduler()


@sched.scheduled_job('interval', minutes=5)
def timed_job():

    os.system('python rss_print.py')


sched.start()

rss_print.py

import datetime
import sqlite3

import feedparser
import pytz


class SqliteDB:
    def __init__(self):

        self.db_connect = sqlite3.connect('rss.sqlite')
        self.db_connect.row_factory = sqlite3.Row

        self.db_cursor = self.db_connect.cursor()

        self.db_cursor.execute(
            "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='RSSEntries';"
        )

        if self.db_cursor.fetchone()[0] == 0:

            self.db_flag = True

            # テーブル新規作成
            self.db_cursor.execute(
                'CREATE TABLE RSSEntries (entry_id INTEGER PRIMARY KEY AUTOINCREMENT, title, link, published)'
            )
        else:
            self.db_flag = False

    def execute(self, url):

        feedparser.USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
        fdp = feedparser.parse(url)

        for entry in fdp.entries:

            # リンクの重複チェック
            self.db_cursor.execute(
                'SELECT COUNT(*) from RSSEntries WHERE link=?', (entry.link, ))


            # 重複がないまたはテーブルが新規の場合
            if self.db_cursor.fetchone()[0] == 0 or self.db_flag:

                # データーベースに追加
                if 'published_parsed' in entry.keys():
                    pubdate = pytz.timezone('Asia/Tokyo').localize(
                        datetime.datetime(*entry.published_parsed[:6]))

                elif 'updated_parsed' in entry.keys():
                    pubdate = pytz.timezone('Asia/Tokyo').localize(
                        datetime.datetime(*entry.updated_parsed[:6]))
                else:
                    pubdate = datetime.datetime.now()

                self.db_cursor.execute(
                    'INSERT INTO RSSEntries (title, link, published) VALUES (?,?,?)',
                    (entry.title, entry.link, pubdate))

                self.db_connect.commit()

                # テーブルが新規の場合は表示しない
                if not self.db_flag:

                    print(entry.title, entry.link)

    def close(self):
        self.db_connect.close()


if __name__ == '__main__':
    database = SqliteDB()
    database.execute('http://imabari.jpn.org/sc/imabari-urge.cgi')
    database.execute('http://imabari.jpn.org/sc/imabari-shelter.cgi')
    database.close()