From 4835a2e8d2816eeed44d2e5b9ffe62be7d4572eb Mon Sep 17 00:00:00 2001 From: Antonio Ospite <ospite@studenti.unina.it> Date: Sat, 6 Jul 2013 21:51:53 +0200 Subject: [PATCH] Initial import --- README | 40 ++++++++++++++++++++ TODO | 1 + tweeper.php | 86 +++++++++++++++++++++++++++++++++++++++++++ twitter_user_timeline2rss.xsl | 77 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 204 insertions(+) create mode 100644 README create mode 100644 TODO create mode 100755 tweeper.php create mode 100644 twitter_user_timeline2rss.xsl diff --git a/README b/README new file mode 100644 index 0000000..46a2fe5 --- /dev/null +++ b/README @@ -0,0 +1,40 @@ +Tweeper is a web scraper which extracts the most recent public tweets of +a given user from their home page on Twitter.com and formats them in RSS, so +the information can be conveniently accessed and collected by a feed reader. + +Since Jun 11th 2013 Twitter.com retired their API v1.0, so it's not possible +to access a user timeline via RSS anymore, and it's also become mandatory to +authenticate via OAuth to access this _public_ information in JSON format: + + https://dev.twitter.com/discussions/16289 + https://dev.twitter.com/discussions/11564 + +Some services came up to overcome this "problem": + + http://twss.55uk.net/ + http://twitter-rss.com/ (now redirecting to google.com) + +However these solutions are still shady and let no control to the user about +who collects informations about the visited user timelines. + +This is why Tweeper[1] was born, as an Open Source way to keep following your +friends with a certain degree of anonymity, without having to tell Twitter.com +whom you are friend to. + +[1] http://www.urbandictionary.com/define.php?term=TWEEPER&defid=3743173 + +Tweeper can be used via web or as a command line program, for example as +a filter in your feed reader. + +Example of use on the command line: + + $ php tweeper.php NSACareers + +Example of use as a Liferea[2] filter: + + $ liferea-add-feed "|php .../path_to_tweeper/tweeper.php NSAcareers" + +[2] http://lzone.de/liferea/ + +Tweeper is licensed under the GPLv3. +Tweeper was written by Antonio Ospite http://ao2.it diff --git a/TODO b/TODO new file mode 100644 index 0000000..ebc5d3a --- /dev/null +++ b/TODO @@ -0,0 +1 @@ +- write a better XSL stylesheet? I am not an XSL expert. diff --git a/tweeper.php b/tweeper.php new file mode 100755 index 0000000..2e27a0d --- /dev/null +++ b/tweeper.php @@ -0,0 +1,86 @@ +<?php +/* + * tweeper - a Twitter to RSS web scraper + * + * Copyright (C) 2013 Antonio Ospite <ospite@studenti.unina.it> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +date_default_timezone_set('UTC'); + +class Tweeper { + + private $USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0"; + + public function __construct($stylesheet) { + $stylesheet_contents = $this->get_contents($stylesheet); + + $xslDoc = new DOMDocument(); + $xslDoc->loadXML($stylesheet_contents); + + $this->xsltProcessor = new XSLTProcessor(); + $this->xsltProcessor->importStylesheet($xslDoc); + } + + private function get_contents($uri) { + # https://www.wjsams.com/c/docs/Wiki/Php-HowToSetUserAgentOnFileGetContents + $opts = array( + 'http' => array( + 'method' => "GET", + 'header' => join(array( + "Accept-language: en\r\n", + "User-Agent: {$this->USER_AGENT}\r\n" + )) + ) + ); + + $context = stream_context_create($opts); + $contents = file_get_contents($uri, false, $context); + return $contents; + } + + public function tweep($uri) { + $html = $this->get_contents($uri); + + $xmlDoc = new DOMDocument(); + $xmlDoc->loadHTML($html); + + $output = $this->xsltProcessor->transformToXML($xmlDoc); + + if (FALSE === $output) { + trigger_error('XSL transformation failed.', E_USER_ERROR); + return NULL; + } + return $output; + } +} + +if (isset($_GET['screen_name'])) { + $screen_name = $_GET['screen_name']; +} else if (isset($argv[1])) { + $screen_name = $argv[1]; +} else { + if (isset($_SERVER['SCRIPT_NAME'])) + $usage = htmlentities("{$_SERVER['SCRIPT_NAME']}?screen_name=<screen_name>"); + else + $usage = "{$argv[0]} <screen_name>\n"; + + die("usage: $usage"); +} + +$tweeper = new Tweeper('twitter_user_timeline2rss.xsl'); + +$src_uri = 'https://twitter.com/' . $screen_name; +echo $tweeper->tweep($src_uri); diff --git a/twitter_user_timeline2rss.xsl b/twitter_user_timeline2rss.xsl new file mode 100644 index 0000000..79d1662 --- /dev/null +++ b/twitter_user_timeline2rss.xsl @@ -0,0 +1,77 @@ +<!-- + Stylesheet to convert Twitter user timelines to RSS. + + Copyright (C) 2013 Antonio Ospite <ospite@studenti.unina.it> + + This file is part of tweeper. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +--> +<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> + + <xsl:output method="xml" indent="yes"/> + + <xsl:variable name="twitterBaseURL"> + <xsl:text>https://twitter.com</xsl:text> + </xsl:variable> + + <xsl:variable name="screen-name" select="//div[@class='profile-card-inner']/@data-screen-name"/> + + <!-- TODO: make URLs hyperlinked (http://www.dpawson.co.uk/xsl/rev2/regex2.html) --> + <xsl:template name="tweet" match="//p[@class='js-tweet-text tweet-text']"> + <xsl:value-of select="$screen-name"/>: <xsl:value-of select=".//p[@class='js-tweet-text tweet-text']"/> + </xsl:template> + + <xsl:template match="//div[@id='timeline']//ol[@id='stream-items-id']//li[@data-item-type='tweet']"> + <item> + <title> + <xsl:call-template name="tweet"/> + </title> + <link> + <xsl:value-of select="$twitterBaseURL"/><xsl:value-of select=".//a[@class='details with-icn js-details']/@href"/> + </link> + <pubDate> + <xsl:value-of select=".//small[@class='time']//span/@data-time"/> + </pubDate> + <description> + <xsl:call-template name="tweet"/> + </description> + </item> + </xsl:template> + + <xsl:template match="/"> + + <rss version="2.0"> + <channel> + <generator>Tweeper</generator> + <title> + <xsl:text>Twitter / </xsl:text><xsl:value-of select="$screen-name"/> + </title> + <link> + <xsl:value-of select="//link[@rel='canonical']/@href"/> + </link> + <description> + <xsl:value-of select="//meta[@name='description']/@content"/> + </description> + <image> + <url> + <xsl:value-of select="//a[@class='profile-picture media-thumbnail']/@href"/> + </url> + </image> + <xsl:apply-templates select="//div[@id='timeline']//ol[@id='stream-items-id']//li[@data-item-type='tweet']"/> + </channel> + </rss> + </xsl:template> +</xsl:stylesheet> -- 2.1.4