From 4835a2e8d2816eeed44d2e5b9ffe62be7d4572eb Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Sat, 6 Jul 2013 21:51:53 +0200 Subject: [PATCH 1/1] Initial import --- README | 40 ++++++++++++++++++++ TODO | 1 + tweeper.php | 86 +++++++++++++++++++++++++++++++++++++++++++ twitter_user_timeline2rss.xsl | 77 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 204 insertions(+) create mode 100644 README create mode 100644 TODO create mode 100755 tweeper.php create mode 100644 twitter_user_timeline2rss.xsl diff --git a/README b/README new file mode 100644 index 0000000..46a2fe5 --- /dev/null +++ b/README @@ -0,0 +1,40 @@ +Tweeper is a web scraper which extracts the most recent public tweets of +a given user from their home page on Twitter.com and formats them in RSS, so +the information can be conveniently accessed and collected by a feed reader. + +Since Jun 11th 2013 Twitter.com retired their API v1.0, so it's not possible +to access a user timeline via RSS anymore, and it's also become mandatory to +authenticate via OAuth to access this _public_ information in JSON format: + + https://dev.twitter.com/discussions/16289 + https://dev.twitter.com/discussions/11564 + +Some services came up to overcome this "problem": + + http://twss.55uk.net/ + http://twitter-rss.com/ (now redirecting to google.com) + +However these solutions are still shady and let no control to the user about +who collects informations about the visited user timelines. + +This is why Tweeper[1] was born, as an Open Source way to keep following your +friends with a certain degree of anonymity, without having to tell Twitter.com +whom you are friend to. + +[1] http://www.urbandictionary.com/define.php?term=TWEEPER&defid=3743173 + +Tweeper can be used via web or as a command line program, for example as +a filter in your feed reader. + +Example of use on the command line: + + $ php tweeper.php NSACareers + +Example of use as a Liferea[2] filter: + + $ liferea-add-feed "|php .../path_to_tweeper/tweeper.php NSAcareers" + +[2] http://lzone.de/liferea/ + +Tweeper is licensed under the GPLv3. +Tweeper was written by Antonio Ospite http://ao2.it diff --git a/TODO b/TODO new file mode 100644 index 0000000..ebc5d3a --- /dev/null +++ b/TODO @@ -0,0 +1 @@ +- write a better XSL stylesheet? I am not an XSL expert. diff --git a/tweeper.php b/tweeper.php new file mode 100755 index 0000000..2e27a0d --- /dev/null +++ b/tweeper.php @@ -0,0 +1,86 @@ + + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +date_default_timezone_set('UTC'); + +class Tweeper { + + private $USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0"; + + public function __construct($stylesheet) { + $stylesheet_contents = $this->get_contents($stylesheet); + + $xslDoc = new DOMDocument(); + $xslDoc->loadXML($stylesheet_contents); + + $this->xsltProcessor = new XSLTProcessor(); + $this->xsltProcessor->importStylesheet($xslDoc); + } + + private function get_contents($uri) { + # https://www.wjsams.com/c/docs/Wiki/Php-HowToSetUserAgentOnFileGetContents + $opts = array( + 'http' => array( + 'method' => "GET", + 'header' => join(array( + "Accept-language: en\r\n", + "User-Agent: {$this->USER_AGENT}\r\n" + )) + ) + ); + + $context = stream_context_create($opts); + $contents = file_get_contents($uri, false, $context); + return $contents; + } + + public function tweep($uri) { + $html = $this->get_contents($uri); + + $xmlDoc = new DOMDocument(); + $xmlDoc->loadHTML($html); + + $output = $this->xsltProcessor->transformToXML($xmlDoc); + + if (FALSE === $output) { + trigger_error('XSL transformation failed.', E_USER_ERROR); + return NULL; + } + return $output; + } +} + +if (isset($_GET['screen_name'])) { + $screen_name = $_GET['screen_name']; +} else if (isset($argv[1])) { + $screen_name = $argv[1]; +} else { + if (isset($_SERVER['SCRIPT_NAME'])) + $usage = htmlentities("{$_SERVER['SCRIPT_NAME']}?screen_name="); + else + $usage = "{$argv[0]} \n"; + + die("usage: $usage"); +} + +$tweeper = new Tweeper('twitter_user_timeline2rss.xsl'); + +$src_uri = 'https://twitter.com/' . $screen_name; +echo $tweeper->tweep($src_uri); diff --git a/twitter_user_timeline2rss.xsl b/twitter_user_timeline2rss.xsl new file mode 100644 index 0000000..79d1662 --- /dev/null +++ b/twitter_user_timeline2rss.xsl @@ -0,0 +1,77 @@ + + + + + + + https://twitter.com + + + + + + + : + + + + + + <xsl:call-template name="tweet"/> + + + + + + + + + + + + + + + + + + Tweeper + + <xsl:text>Twitter / </xsl:text><xsl:value-of select="$screen-name"/> + + + + + + + + + + + + + + + + + -- 2.1.4