読者です 読者をやめる 読者になる 読者になる

Google Apps ScriptでRSS1.0・RSS2.0・ATOMを取得(フィルタ付き)

Reader
Title
Url
RSS
って名前のシートを作って
「Title」と「Url」は正規表現でマッチすると登録されない
RSS」のA列目は「RSS1・RSS2・ATOM」、B列目はRSSのURL
「Reader」は結果が表示される

var sheet;
var row = 0;

function getRSS10(feedURL) {

  if (feedURL) {

    row = sheet.getLastRow() + 1;

    var xml = UrlFetchApp.fetch(feedURL).getContentText();
    var document = XmlService.parse(xml);
    var root = document.getRootElement();
    var rss = XmlService.getNamespace('http://purl.org/rss/1.0/');
    var dc = XmlService.getNamespace('dc', 'http://purl.org/dc/elements/1.1/');

    var items = root.getChildren('item', rss);

    for (var i = 0; i < items.length; i++) {

      var title = items[i].getChild('title', rss).getText();
      var link = items[i].getChild('link', rss).getText();
      var pubdate = items[i].getChild('date', dc).getText();
      //var description = items[i].getChild('description',rss).getText();

      if (filterTitle(title) && filterUrl(link)) {

        sheet.getRange(row, 1).setValue(title);
        sheet.getRange(row, 2).setValue(link);
        sheet.getRange(row, 3).setValue(pubdate);
        //sheet.getRange(row,4).setValue(description);

        row++;
      }
    }
  }
}

function getRSS20(feedURL) {

  if (feedURL) {

    row = sheet.getLastRow() + 1;

    var xml = UrlFetchApp.fetch(feedURL).getContentText();
    var document = XmlService.parse(xml);
    var root = document.getRootElement();

    var items = root.getChild('channel').getChildren('item');

    for (var i = 0; i < items.length; i++) {

      var title = items[i].getChild('title').getText();
      var link = items[i].getChild('link').getText();
      var pubdate = Utilities.formatDate(new Date(items[i].getChild('pubDate').getText()), 'JST', "yyyy-MM-dd'T'HH:mm:ssXXX");
      //var description = items[i].getChild('description').getText();

      if (filterTitle(title) && filterUrl(link)) {

        sheet.getRange(row, 1).setValue(title);
        sheet.getRange(row, 2).setValue(link);
        sheet.getRange(row, 3).setValue(pubdate);
        //sheet.getRange(row,4).setValue(description);

        row++;
      }
    }
  }
}

function getATOM(feedURL) {

  if (feedURL) {

    row = sheet.getLastRow() + 1;

    var xml = UrlFetchApp.fetch(feedURL).getContentText();
    var document = XmlService.parse(xml);
    var root = document.getRootElement();
    var atom = XmlService.getNamespace('http://www.w3.org/2005/Atom');

    var entries = root.getChildren('entry', atom);

    for (var i = 0; i < entries.length; i++) {

      var title = entries[i].getChild('title', atom).getText();
      var link = entries[i].getChild('link', atom).getAttribute('href').getValue();
      var updated = entries[i].getChild('updated', atom).getText();
      //var summary = entries[i].getChild('summary', atom).getText();

      if (filterTitle(title) && filterUrl(link)) {

        sheet.getRange(row, 1).setValue(title);
        sheet.getRange(row, 2).setValue(link);
        sheet.getRange(row, 3).setValue(updated);
        //sheet.getRange(row,4).setValue(summary);

        row++;
      }
    }
  }
}

function filterTitle(title) {
  var Tsheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Title");
  var Trow = Tsheet.getLastRow();

  if (Trow) {
    var filters = Tsheet.getRange(1, 1, Trow).getValues();

    for (var i = 0; i < filters.length; i++) {
      var regex = new RegExp(filters[i]);

      if (regex.test(title)) {
        return false;
      }
    }
  }
  return true;
}

function filterUrl(url) {
  var Usheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Url");
  var Urow = Usheet.getLastRow();

  if (Urow) {
    var filters = Usheet.getRange(1, 1, Urow).getValues();

    for(var i = 0; i < filters.length; i++) {
      var regex = new RegExp(filters[i]);

      if (regex.test(url)) {
        return false;
      }
    }
  }
  return true;
}

function findURL(text) {

  row = sheet.getLastRow() + 1;

  var links = sheet.getRange(1, 2, row).getValues();

  for (var i = 0; i < links.length; i++) {
    if (links[i] == text) {
      return false;
    }
  }
  return true;
}


function readerRSS() {
  sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Reader");
  sheet.clear();

  var Rsheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("RSS");
  var Rrow = Rsheet.getLastRow();

  if (Rrow) {

    var rssLists = Rsheet.getRange(1, 1, Rrow, 2).getValues();
    Logger.log(rssLists);

    for (var i = 0; i < rssLists.length; i++) {

      switch (rssLists[i][0]) {
        case 'RSS1':
          getRSS10(rssLists[i][1]);
          break;

        case 'RSS2':
          getRSS20(rssLists[i][1]);
          break;

        case 'ATOM':
          getATOM(rssLists[i][1]);
          break;
      }
    }
    sheet.sort(3, false);
  }
}

Pythonの癖でfor in を使っていたけど遅い、順番が変わる場合があるらしいので変更しました。

imabari.hateblo.jp
imabari.hateblo.jp