From: Antonio Ospite <ospite@studenti.unina.it> Date: Sat, 27 Jul 2013 14:51:38 +0000 (+0200) Subject: Add initial support for scraping Pump.io activity streams X-Git-Tag: v0.1~20 X-Git-Url: https://git.ao2.it/tweeper.git/commitdiff_plain/5bf4c1bdaf068d58e3141c0015d1b9341965b43b?ds=inline;hp=7678d6017c992b1e1d210c9294644ac153d28cb9 Add initial support for scraping Pump.io activity streams Use symlinks to represent alternate sites with the same structure (i.e. same server software). Symlinks are handy and concise, an alternative way would be to introduce some equivalence mapping, like in the patch below, but I don't really like that: diff --git a/tweeper.php b/tweeper.php index a019684..eb12af2 100755 --- a/tweeper.php +++ b/tweeper.php @@ -101,9 +101,18 @@ $url = parse_url($src_url); if (FALSE === $url || empty($url["host"])) die("Invalid url: $url\n"); -$stylesheet = __DIR__ . "/rss_converter_" . $url["host"] . ".xsl"; +$equivalence_map = array( + "identi.ca" => "pump.io" +); + +if (array_key_exists($url["host"], $equivalence_map)) + $host = $equivalence_map[$url["host"]]; +else + $host = $url["host"]; + +$stylesheet = __DIR__ . "/rss_converter_" . $host . ".xsl"; if (FALSE === file_exists($stylesheet)) - die("Conversion to RSS not supported: {$url["host"]}\n"); + die("Conversion to RSS not supported: {$host}\n"); $tweeper = new Tweeper($stylesheet); echo $tweeper->tweep($src_url); --- diff --git a/rss_converter_identi.ca.xsl b/rss_converter_identi.ca.xsl new file mode 120000 index 0000000..d8042a1 --- /dev/null +++ b/rss_converter_identi.ca.xsl @@ -0,0 +1 @@ +rss_converter_pump.io.xsl \ No newline at end of file diff --git a/rss_converter_pump.io.xsl b/rss_converter_pump.io.xsl new file mode 100644 index 0000000..ef4b6e8 --- /dev/null +++ b/rss_converter_pump.io.xsl @@ -0,0 +1,75 @@ +<!-- + Stylesheet to convert Pump.io activity streams to RSS. + + Copyright (C) 2013 Antonio Ospite <ospite@studenti.unina.it> + + This file is part of tweeper. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +--> +<!-- To Evan, please reconsider publishing RSS ouput for _public_ contents --> +<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:php="http://php.net/xsl" + xsl:extension-element-prefixes="php"> + + <xsl:output method="xml" indent="yes"/> + + <xsl:variable name="user-name" select="substring-after(//div[@id='profile-block']/@data-profile-id, ':')"/> + + <xsl:template match="//div[@id='user-content-activities']//ul[@id='major-stream']/li"> + <xsl:variable name="activity-text" select=".//div[@class='activity-content']"/> + <item> + <title> + <xsl:value-of select="concat($user-name, ': ', normalize-space($activity-text))"/> + </title> + <link> + <xsl:value-of select=".//p[@class='muted']/small/a/@href"/> + </link> + <pubDate> + <xsl:value-of select="php:functionString('str_to_gmdate', .//abbr[@class='easydate']/@title)"/> + </pubDate> + <description> + <xsl:value-of select="concat($user-name, ': ')"/> + <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text> + <xsl:copy-of select="$activity-text/node()"/> + <xsl:text disable-output-escaping="yes">]]></xsl:text> + </description> + </item> + </xsl:template> + + <xsl:template match="/"> + + <rss version="2.0"> + <channel> + <generator>Tweeper</generator> + <title> + <xsl:value-of select="concat(substring-after($user-name, '@'), ' / ', substring-before($user-name, '@'))"/> + </title> + <link> + <xsl:value-of select="concat('https://', substring-after($user-name, '@'), '/', substring-before($user-name, '@'))"/> + </link> + <description> + <xsl:value-of select="normalize-space(//h1[@class='media-header'])"/> + </description> + <image> + <url> + <xsl:value-of select="//div[@id='profile-block']/span/img[@class='img-rounded media-object']/@src"/> + </url> + </image> + <xsl:apply-templates select="//div[@id='user-content-activities']//ul[@id='major-stream']/li"/> + </channel> + </rss> + </xsl:template> +</xsl:stylesheet>