Add a stylesheet for Howtoons.com
authorAntonio Ospite <ao2@ao2.it>
Thu, 24 Apr 2014 11:46:33 +0000 (13:46 +0200)
committerAntonio Ospite <ao2@ao2.it>
Thu, 24 Apr 2014 11:47:38 +0000 (13:47 +0200)
rss_converter_howtoons.com.xsl [new file with mode: 0644]

diff --git a/rss_converter_howtoons.com.xsl b/rss_converter_howtoons.com.xsl
new file mode 100644 (file)
index 0000000..4067065
--- /dev/null
@@ -0,0 +1,89 @@
+<!--
+  Stylesheet to convert Howtoons.com to RSS.
+
+  Copyright (C) 2014  Antonio Ospite <ao2@ao2.it>
+
+  This file is part of tweeper.
+
+  This program is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+-->
+
+<!--
+  The RSS feed link is broken on http://howtoons.com so just work around it.
+
+  Howtoons uses Wordpress, so maybe this style sheet can be used as a base for
+  scraping other Wordpress sites.
+-->
+
+<xsl:stylesheet version="1.0"
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+    xmlns:php="http://php.net/xsl"
+    xsl:extension-element-prefixes="php">
+
+    <xsl:output method="xml" indent="yes"/>
+
+    <xsl:variable name="BaseURL">
+        <xsl:text>http://howtoons.com</xsl:text>
+    </xsl:variable>
+
+    <xsl:template match="//div[contains(@id, 'post-')]">
+        <item>
+            <title>
+                <xsl:value-of select="normalize-space(.//div[@class='post-headline']//a)"/>
+            </title>
+            <link>
+                <xsl:value-of select=".//div[@class='post-headline']//a/@href"/>
+            </link>
+            <pubDate>
+                <xsl:variable name="date" select="substring-before(.//div[@class='post-byline'], ',')"/>
+                <!-- date format is MM.DD.YY -->
+                <xsl:variable name="month" select="substring($date, 1, 2)"/>
+                <xsl:variable name="day" select="substring($date, 4, 2)"/>
+                <xsl:variable name="year" select="substring($date, 7, 2)"/>
+                <xsl:variable name="iso-date" select="concat('20', $year, '-', $month, '-', $day)"/>
+                <xsl:value-of select="php:functionString('Tweeper::str_to_gmdate', $iso-date)"/>
+            </pubDate>
+            <description>
+                <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
+                <xsl:copy-of select=".//div[contains(@class, 'post-bodycopy')]/p"/>
+                <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
+            </description>
+        </item>
+    </xsl:template>
+
+    <xsl:template match="/">
+
+        <rss version="2.0">
+            <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
+            <channel>
+                <generator>Tweeper</generator>
+                <title>
+                    <xsl:value-of select="//title"/>
+                </title>
+                <link>
+                    <xsl:value-of select="$BaseURL"/>
+                </link>
+                <description>
+                    <xsl:text>The world's greatest D.I.Y. comic website! Tools of mass construction!</xsl:text>
+                </description>
+                <image>
+                    <url>
+                        <xsl:text>http://www.howtoons.com/wp-content/themes/atahualpa/images/header/tuck1000.png</xsl:text>
+                    </url>
+                </image>
+                <xsl:apply-templates select="//div[contains(@id, 'post-')]"/>
+            </channel>
+        </rss>
+    </xsl:template>
+</xsl:stylesheet>