From: Antonio Ospite <ospite@studenti.unina.it>
Date: Sat, 27 Jul 2013 14:51:38 +0000 (+0200)
Subject: Add initial support for scraping Pump.io activity streams
X-Git-Tag: v0.1~20
X-Git-Url: https://git.ao2.it/tweeper.git/commitdiff_plain/5bf4c1bdaf068d58e3141c0015d1b9341965b43b?ds=inline;hp=7678d6017c992b1e1d210c9294644ac153d28cb9

Add initial support for scraping Pump.io activity streams

Use symlinks to represent alternate sites with the same structure (i.e.
same server software).

Symlinks are handy and concise, an alternative way would be to introduce
some equivalence mapping, like in the patch below, but I don't really
like that:

  diff --git a/tweeper.php b/tweeper.php
  index a019684..eb12af2 100755
  --- a/tweeper.php
  +++ b/tweeper.php
  @@ -101,9 +101,18 @@ $url = parse_url($src_url);
   if (FALSE === $url || empty($url["host"]))
     die("Invalid url: $url\n");

  -$stylesheet = __DIR__ . "/rss_converter_" . $url["host"] . ".xsl";
  +$equivalence_map = array(
  +  "identi.ca" => "pump.io"
  +);
  +
  +if (array_key_exists($url["host"], $equivalence_map))
  +  $host = $equivalence_map[$url["host"]];
  +else
  +  $host = $url["host"];
  +
  +$stylesheet = __DIR__ . "/rss_converter_" . $host . ".xsl";
   if (FALSE === file_exists($stylesheet))
  -  die("Conversion to RSS not supported: {$url["host"]}\n");
  +  die("Conversion to RSS not supported: {$host}\n");

   $tweeper = new Tweeper($stylesheet);
   echo $tweeper->tweep($src_url);
---

diff --git a/rss_converter_identi.ca.xsl b/rss_converter_identi.ca.xsl
new file mode 120000
index 0000000..d8042a1
--- /dev/null
+++ b/rss_converter_identi.ca.xsl
@@ -0,0 +1 @@
+rss_converter_pump.io.xsl
\ No newline at end of file
diff --git a/rss_converter_pump.io.xsl b/rss_converter_pump.io.xsl
new file mode 100644
index 0000000..ef4b6e8
--- /dev/null
+++ b/rss_converter_pump.io.xsl
@@ -0,0 +1,75 @@
+<!--
+  Stylesheet to convert Pump.io activity streams to RSS.
+
+  Copyright (C) 2013  Antonio Ospite <ospite@studenti.unina.it>
+
+  This file is part of tweeper.
+
+  This program is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+-->
+<!-- To Evan, please reconsider publishing RSS ouput for _public_ contents -->
+<xsl:stylesheet version="1.0"
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+    xmlns:php="http://php.net/xsl"
+    xsl:extension-element-prefixes="php">
+
+    <xsl:output method="xml" indent="yes"/>
+
+    <xsl:variable name="user-name" select="substring-after(//div[@id='profile-block']/@data-profile-id, ':')"/>
+
+    <xsl:template match="//div[@id='user-content-activities']//ul[@id='major-stream']/li">
+        <xsl:variable name="activity-text" select=".//div[@class='activity-content']"/>
+        <item>
+            <title>
+                <xsl:value-of select="concat($user-name, ': ', normalize-space($activity-text))"/>
+            </title>
+            <link>
+                <xsl:value-of select=".//p[@class='muted']/small/a/@href"/>
+            </link>
+            <pubDate>
+                <xsl:value-of select="php:functionString('str_to_gmdate', .//abbr[@class='easydate']/@title)"/>
+            </pubDate>
+            <description>
+                <xsl:value-of select="concat($user-name, ': ')"/>
+                <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
+                <xsl:copy-of select="$activity-text/node()"/>
+                <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
+            </description>
+        </item>
+    </xsl:template>
+
+    <xsl:template match="/">
+
+        <rss version="2.0">
+            <channel>
+                <generator>Tweeper</generator>
+                <title>
+                    <xsl:value-of select="concat(substring-after($user-name, '@'), ' / ', substring-before($user-name, '@'))"/>
+                </title>
+                <link>
+                    <xsl:value-of select="concat('https://', substring-after($user-name, '@'), '/', substring-before($user-name, '@'))"/>
+                </link>
+                <description>
+                    <xsl:value-of select="normalize-space(//h1[@class='media-header'])"/>
+                </description>
+                <image>
+                    <url>
+                        <xsl:value-of select="//div[@id='profile-block']/span/img[@class='img-rounded media-object']/@src"/>
+                    </url>
+                </image>
+                <xsl:apply-templates select="//div[@id='user-content-activities']//ul[@id='major-stream']/li"/>
+            </channel>
+        </rss>
+    </xsl:template>
+</xsl:stylesheet>