2 Stylesheet to convert Howtoons.com to RSS.
4 Copyright (C) 2014 Antonio Ospite <ao2@ao2.it>
6 This file is part of tweeper.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 The RSS feed link is broken on http://howtoons.com so just work around it.
25 Howtoons uses Wordpress, so maybe this style sheet can be used as a base for
26 scraping other Wordpress sites.
29 <xsl:stylesheet version="1.0"
30 xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
31 xmlns:php="http://php.net/xsl"
32 xsl:extension-element-prefixes="php"
33 exclude-result-prefixes="php">
35 <xsl:output method="xml" indent="yes"/>
37 <xsl:variable name="BaseURL">
38 <xsl:text>http://howtoons.com</xsl:text>
41 <xsl:template match="//div[contains(@id, 'post-')]">
42 <xsl:variable name="item-permalink" select=".//div[@class='post-headline']//a/@href"/>
45 <xsl:value-of select="normalize-space(.//div[@class='post-headline']//a)"/>
48 <xsl:value-of select="$item-permalink"/>
51 <xsl:value-of select="$item-permalink"/>
54 <xsl:variable name="date" select="substring-before(.//div[@class='post-byline'], ',')"/>
55 <!-- date format is MM.DD.YY -->
56 <xsl:variable name="month" select="substring($date, 1, 2)"/>
57 <xsl:variable name="day" select="substring($date, 4, 2)"/>
58 <xsl:variable name="year" select="substring($date, 7, 2)"/>
59 <xsl:variable name="iso-date" select="concat('20', $year, '-', $month, '-', $day)"/>
60 <xsl:value-of select="php:functionString('Tweeper::str_to_gmdate', $iso-date)"/>
63 <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
64 <xsl:copy-of select=".//div[contains(@class, 'post-bodycopy')]/p"/>
65 <xsl:text disable-output-escaping="yes">]]></xsl:text>
70 <xsl:template match="/">
71 <xsl:variable name="channel-title" select="//title"/>
72 <xsl:variable name="channel-link" select="$BaseURL"/>
75 <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
77 <generator>Tweeper</generator>
79 <xsl:value-of select="$channel-title"/>
82 <xsl:value-of select="$channel-link"/>
85 <xsl:text>The world's greatest D.I.Y. comic website! Tools of mass construction!</xsl:text>
89 <xsl:value-of select="$channel-title"/>
92 <xsl:value-of select="$channel-link"/>
95 <xsl:text>http://www.howtoons.com/wp-content/themes/atahualpa/images/header/tuck1000.png</xsl:text>
98 <xsl:apply-templates select="//div[contains(@id, 'post-')]"/>