4067065cd970dc123cbb371f745b242dfb6ef3fe
[tweeper.git] / rss_converter_howtoons.com.xsl
1 <!--
2   Stylesheet to convert Howtoons.com to RSS.
3
4   Copyright (C) 2014  Antonio Ospite <ao2@ao2.it>
5
6   This file is part of tweeper.
7
8   This program is free software: you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation, either version 3 of the License, or
11   (at your option) any later version.
12
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 -->
21
22 <!--
23   The RSS feed link is broken on http://howtoons.com so just work around it.
24
25   Howtoons uses Wordpress, so maybe this style sheet can be used as a base for
26   scraping other Wordpress sites.
27 -->
28
29 <xsl:stylesheet version="1.0"
30     xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
31     xmlns:php="http://php.net/xsl"
32     xsl:extension-element-prefixes="php">
33
34     <xsl:output method="xml" indent="yes"/>
35
36     <xsl:variable name="BaseURL">
37         <xsl:text>http://howtoons.com</xsl:text>
38     </xsl:variable>
39
40     <xsl:template match="//div[contains(@id, 'post-')]">
41         <item>
42             <title>
43                 <xsl:value-of select="normalize-space(.//div[@class='post-headline']//a)"/>
44             </title>
45             <link>
46                 <xsl:value-of select=".//div[@class='post-headline']//a/@href"/>
47             </link>
48             <pubDate>
49                 <xsl:variable name="date" select="substring-before(.//div[@class='post-byline'], ',')"/>
50                 <!-- date format is MM.DD.YY -->
51                 <xsl:variable name="month" select="substring($date, 1, 2)"/>
52                 <xsl:variable name="day" select="substring($date, 4, 2)"/>
53                 <xsl:variable name="year" select="substring($date, 7, 2)"/>
54                 <xsl:variable name="iso-date" select="concat('20', $year, '-', $month, '-', $day)"/>
55                 <xsl:value-of select="php:functionString('Tweeper::str_to_gmdate', $iso-date)"/>
56             </pubDate>
57             <description>
58                 <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
59                 <xsl:copy-of select=".//div[contains(@class, 'post-bodycopy')]/p"/>
60                 <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
61             </description>
62         </item>
63     </xsl:template>
64
65     <xsl:template match="/">
66
67         <rss version="2.0">
68             <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
69             <channel>
70                 <generator>Tweeper</generator>
71                 <title>
72                     <xsl:value-of select="//title"/>
73                 </title>
74                 <link>
75                     <xsl:value-of select="$BaseURL"/>
76                 </link>
77                 <description>
78                     <xsl:text>The world's greatest D.I.Y. comic website! Tools of mass construction!</xsl:text>
79                 </description>
80                 <image>
81                     <url>
82                         <xsl:text>http://www.howtoons.com/wp-content/themes/atahualpa/images/header/tuck1000.png</xsl:text>
83                     </url>
84                 </image>
85                 <xsl:apply-templates select="//div[contains(@id, 'post-')]"/>
86             </channel>
87         </rss>
88     </xsl:template>
89 </xsl:stylesheet>