Initial import
authorAntonio Ospite <ospite@studenti.unina.it>
Sat, 6 Jul 2013 19:51:53 +0000 (21:51 +0200)
committerAntonio Ospite <ospite@studenti.unina.it>
Sun, 7 Jul 2013 09:03:48 +0000 (11:03 +0200)
README [new file with mode: 0644]
TODO [new file with mode: 0644]
tweeper.php [new file with mode: 0755]
twitter_user_timeline2rss.xsl [new file with mode: 0644]

diff --git a/README b/README
new file mode 100644 (file)
index 0000000..46a2fe5
--- /dev/null
+++ b/README
@@ -0,0 +1,40 @@
+Tweeper is a web scraper which extracts the most recent public tweets of
+a given user from their home page on Twitter.com and formats them in RSS, so
+the information can be conveniently accessed and collected by a feed reader.
+
+Since Jun 11th 2013 Twitter.com retired their API v1.0, so it's not possible
+to access a user timeline via RSS anymore, and it's also become mandatory to
+authenticate via OAuth to access this _public_ information in JSON format:
+
+  https://dev.twitter.com/discussions/16289
+  https://dev.twitter.com/discussions/11564
+
+Some services came up to overcome this "problem":
+
+  http://twss.55uk.net/
+  http://twitter-rss.com/ (now redirecting to google.com)
+
+However these solutions are still shady and let no control to the user about
+who collects informations about the visited user timelines.
+
+This is why Tweeper[1] was born, as an Open Source way to keep following your
+friends with a certain degree of anonymity, without having to tell Twitter.com
+whom you are friend to.
+
+[1] http://www.urbandictionary.com/define.php?term=TWEEPER&defid=3743173
+
+Tweeper can be used via web or as a command line program, for example as
+a filter in your feed reader.
+
+Example of use on the command line:
+
+  $ php tweeper.php NSACareers
+
+Example of use as a Liferea[2] filter:
+
+  $ liferea-add-feed  "|php .../path_to_tweeper/tweeper.php NSAcareers"
+
+[2] http://lzone.de/liferea/
+
+Tweeper is licensed under the GPLv3.
+Tweeper was written by Antonio Ospite http://ao2.it
diff --git a/TODO b/TODO
new file mode 100644 (file)
index 0000000..ebc5d3a
--- /dev/null
+++ b/TODO
@@ -0,0 +1 @@
+- write a better XSL stylesheet? I am not an XSL expert.
diff --git a/tweeper.php b/tweeper.php
new file mode 100755 (executable)
index 0000000..2e27a0d
--- /dev/null
@@ -0,0 +1,86 @@
+<?php
+/*
+ * tweeper - a Twitter to RSS web scraper
+ * 
+ * Copyright (C) 2013  Antonio Ospite <ospite@studenti.unina.it>
+ * 
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+date_default_timezone_set('UTC');
+
+class Tweeper {
+
+  private $USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0";
+
+  public function __construct($stylesheet) {
+    $stylesheet_contents = $this->get_contents($stylesheet);
+
+    $xslDoc = new DOMDocument();
+    $xslDoc->loadXML($stylesheet_contents);
+
+    $this->xsltProcessor = new XSLTProcessor();
+    $this->xsltProcessor->importStylesheet($xslDoc);
+  }
+
+  private function get_contents($uri) {
+    # https://www.wjsams.com/c/docs/Wiki/Php-HowToSetUserAgentOnFileGetContents
+    $opts = array(
+      'http' => array(
+        'method' => "GET",
+        'header' => join(array(
+          "Accept-language: en\r\n",
+          "User-Agent: {$this->USER_AGENT}\r\n"
+        ))
+      )
+    );
+
+    $context = stream_context_create($opts);
+    $contents = file_get_contents($uri, false, $context);
+    return $contents;
+  }
+
+  public function tweep($uri) {
+    $html = $this->get_contents($uri);
+
+    $xmlDoc = new DOMDocument();
+    $xmlDoc->loadHTML($html);
+
+    $output = $this->xsltProcessor->transformToXML($xmlDoc);
+
+    if (FALSE === $output) {
+      trigger_error('XSL transformation failed.', E_USER_ERROR);
+      return NULL;
+    }
+    return $output;
+  }
+}
+
+if (isset($_GET['screen_name'])) {
+  $screen_name = $_GET['screen_name'];
+} else if (isset($argv[1])) {
+  $screen_name = $argv[1];
+} else {
+  if (isset($_SERVER['SCRIPT_NAME']))
+    $usage = htmlentities("{$_SERVER['SCRIPT_NAME']}?screen_name=<screen_name>");
+  else
+    $usage = "{$argv[0]} <screen_name>\n";
+
+  die("usage: $usage");
+}
+
+$tweeper = new Tweeper('twitter_user_timeline2rss.xsl');
+
+$src_uri = 'https://twitter.com/' . $screen_name;
+echo $tweeper->tweep($src_uri);
diff --git a/twitter_user_timeline2rss.xsl b/twitter_user_timeline2rss.xsl
new file mode 100644 (file)
index 0000000..79d1662
--- /dev/null
@@ -0,0 +1,77 @@
+<!--
+  Stylesheet to convert Twitter user timelines to RSS.
+
+  Copyright (C) 2013  Antonio Ospite <ospite@studenti.unina.it>
+
+  This file is part of tweeper.
+
+  This program is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+-->
+<xsl:stylesheet version="1.0"
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+    <xsl:output method="xml" indent="yes"/>
+
+    <xsl:variable name="twitterBaseURL">
+        <xsl:text>https://twitter.com</xsl:text>
+    </xsl:variable>
+
+    <xsl:variable name="screen-name" select="//div[@class='profile-card-inner']/@data-screen-name"/>
+
+    <!-- TODO: make URLs hyperlinked (http://www.dpawson.co.uk/xsl/rev2/regex2.html) -->
+    <xsl:template name="tweet" match="//p[@class='js-tweet-text tweet-text']">
+        <xsl:value-of select="$screen-name"/>: <xsl:value-of select=".//p[@class='js-tweet-text tweet-text']"/>
+    </xsl:template>
+
+    <xsl:template match="//div[@id='timeline']//ol[@id='stream-items-id']//li[@data-item-type='tweet']">
+        <item>
+            <title>
+                <xsl:call-template name="tweet"/>
+            </title>
+            <link>
+                <xsl:value-of select="$twitterBaseURL"/><xsl:value-of select=".//a[@class='details with-icn js-details']/@href"/>
+            </link>
+            <pubDate>
+                <xsl:value-of select=".//small[@class='time']//span/@data-time"/>
+            </pubDate>
+            <description>
+                <xsl:call-template name="tweet"/>
+            </description>
+        </item>
+    </xsl:template>
+
+    <xsl:template match="/">
+
+        <rss version="2.0">
+            <channel>
+                <generator>Tweeper</generator>
+                <title>
+                    <xsl:text>Twitter / </xsl:text><xsl:value-of select="$screen-name"/>
+                </title>
+                <link>
+                    <xsl:value-of select="//link[@rel='canonical']/@href"/>
+                </link>
+                <description>
+                    <xsl:value-of select="//meta[@name='description']/@content"/>
+                </description>
+                <image>
+                    <url>
+                        <xsl:value-of select="//a[@class='profile-picture media-thumbnail']/@href"/>
+                    </url>
+                </image>
+                <xsl:apply-templates select="//div[@id='timeline']//ol[@id='stream-items-id']//li[@data-item-type='tweet']"/>
+            </channel>
+        </rss>
+    </xsl:template>
+</xsl:stylesheet>