rss_converter_twitter.com.xsl: filter out promoted tweets
[tweeper.git] / src / rss_converter_twitter.com.xsl
1 <!--
2   Stylesheet to convert Twitter user timelines to RSS.
3
4   Copyright (C) 2013-2014  Antonio Ospite <ao2@ao2.it>
5
6   This file is part of tweeper.
7
8   This program is free software: you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation, either version 3 of the License, or
11   (at your option) any later version.
12
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 -->
21 <xsl:stylesheet version="1.0"
22     xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
23     xmlns:php="http://php.net/xsl"
24     xsl:extension-element-prefixes="php"
25     exclude-result-prefixes="php">
26
27     <xsl:param name="generate-enclosure"/>
28
29     <xsl:output method="xml" indent="yes"/>
30
31     <xsl:variable name="BaseURL">
32         <xsl:text>https://twitter.com</xsl:text>
33     </xsl:variable>
34
35     <!-- Identity transform -->
36     <xsl:template match="@*|node()">
37         <xsl:copy>
38             <xsl:apply-templates select="@*[not(name() = 'style')]|node()"/>
39         </xsl:copy>
40     </xsl:template>
41
42     <!--
43          Anchors to external links provide the direct URL in the
44          data-expanded-url attribute, so use this in the href attribute too
45          instead of the default short URL which uses the t.co redirection
46          service.
47
48          NOTE: when creating an element, attributes must be processed _before_
49          adding the contents (either children or a value):
50          http://stackoverflow.com/questions/21984867/
51     -->
52     <xsl:template match="a[@data-expanded-url]">
53         <!-- Prepend and append a white space for aestethic reasons -->
54         <xsl:text> </xsl:text>
55         <a>
56             <xsl:attribute name="href">
57                 <xsl:value-of select="@data-expanded-url"/>
58             </xsl:attribute>
59             <!-- Also strip &nbsp; and &hellip; -->
60             <xsl:value-of select="translate(., '&#xA0;&#x2026;', '')"/>
61         </a>
62         <xsl:text> </xsl:text>
63     </xsl:template>
64
65     <!--
66          These are links to pic.twitter.com, use the direct link for those
67          too instead of the t.co redirections.
68     -->
69     <xsl:template match="a[@data-pre-embedded='true']">
70         <!-- Prepend and append a white space for aestethic reasons -->
71         <xsl:text> </xsl:text>
72         <a>
73             <xsl:attribute name="href">
74                 <xsl:value-of select="concat('https://', .)"/>
75             </xsl:attribute>
76             <xsl:value-of select="concat('https://', .)"/>
77         </a>
78         <xsl:text> </xsl:text>
79     </xsl:template>
80
81     <!-- Present images in a more convenient way -->
82     <xsl:template match="div[@data-image-url]">
83         <a>
84             <xsl:attribute name="href">
85                 <xsl:value-of select="concat(@data-image-url, ':orig')"/>
86             </xsl:attribute>
87             <img>
88                 <xsl:attribute name="src">
89                     <xsl:value-of select="@data-image-url"/>
90                 </xsl:attribute>
91             </img>
92         </a>
93     </xsl:template>
94
95     <!-- Don't repeat background in embedded media content -->
96     <xsl:template match="div[contains(@class, 'PlayableMedia-player')]">
97         <xsl:copy>
98             <xsl:apply-templates select="@*"/>
99             <xsl:attribute name="style">
100                 <xsl:value-of select="concat(@style, '; background-repeat: no-repeat')"/>
101             </xsl:attribute>
102             <xsl:apply-templates select="node()"/>
103         </xsl:copy>
104     </xsl:template>
105
106     <xsl:template match="a[@data-expanded-url]" mode="enclosure">
107         <xsl:copy-of select="php:functionString('Tweeper\Tweeper::generateEnclosure', ./@data-expanded-url)"/>
108     </xsl:template>
109
110     <xsl:template match="div[@data-image-url]" mode="enclosure">
111         <xsl:copy-of select="php:functionString('Tweeper\Tweeper::generateEnclosure', concat(./@data-image-url, ':orig'))"/>
112     </xsl:template>
113
114     <xsl:variable name="screen-name" select="//div[@class='user-actions btn-group not-following ']/@data-screen-name"/>
115
116     <xsl:template match="//li[@data-item-id and @data-item-type='tweet']">
117         <xsl:variable name="user-name" select=".//div[contains(@class, 'js-stream-tweet')]/@data-screen-name"/>
118         <xsl:variable name="item-content" select=".//p[contains(@class, 'js-tweet-text')]"/>
119         <xsl:variable name="item-media" select=".//div[contains(@class, 'AdaptiveMedia-container')]"/>
120         <xsl:variable name="item-permalink" select="concat($BaseURL, .//div[@data-permalink-path]/@data-permalink-path)"/>
121
122         <xsl:variable name="item-has-video" select="$item-media//*[contains(@class, 'PlayableMedia--video')]"/>
123         <item>
124             <title>
125                 <xsl:value-of select="concat($user-name, ': ')"/>
126                 <xsl:if test="$item-has-video">
127                     <xsl:text>(Video) </xsl:text>
128                 </xsl:if>
129                 <!--
130                      Prepend a space in front of the URLs which are not
131                      preceded by an open parenthesis, for aestethic reasons.
132                      Also, regex, I know: http://xkcd.com/1171/
133                 -->
134                 <xsl:variable
135                     name="processed-title"
136                     select="php:functionString('preg_replace', '@((?&lt;!\()(?:http[s]?://|pic.twitter.com))@', ' \1', $item-content)"/>
137                 <!-- Also strip &nbsp; and &hellip; -->
138                 <xsl:value-of select="normalize-space(translate($processed-title, '&#xA0;&#x2026;', ''))"/>
139             </title>
140             <link>
141                 <xsl:value-of select="$item-permalink"/>
142             </link>
143             <guid>
144                 <xsl:value-of select="$item-permalink"/>
145             </guid>
146             <pubDate>
147                 <xsl:variable name="timestamp" select=".//span[contains(@class, 'js-short-timestamp')]/@data-time"/>
148                 <xsl:value-of select="php:functionString('Tweeper\Tweeper::epochToRssDate', number($timestamp))"/>
149             </pubDate>
150             <description>
151                 <xsl:value-of select="concat($user-name, ': ')"/>
152                 <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
153                 <xsl:if test="$item-has-video">
154                     <xsl:text>(Video) </xsl:text>
155                 </xsl:if>
156                 <xsl:apply-templates select="$item-content/node()"/>
157                 <xsl:apply-templates select="$item-media/node()"/>
158                 <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
159             </description>
160             <xsl:if test="$generate-enclosure = 1">
161                 <xsl:apply-templates select="$item-content//a[@data-expanded-url]" mode="enclosure"/>
162                 <xsl:apply-templates select="$item-media//div[@data-image-url]" mode="enclosure"/>
163             </xsl:if>
164         </item>
165     </xsl:template>
166
167     <xsl:template match="/">
168         <xsl:variable name="channel-title">
169             <xsl:choose>
170                 <xsl:when test="$screen-name != ''">
171                     <xsl:value-of select="concat('Twitter / ', $screen-name)"/>
172                 </xsl:when>
173                 <xsl:otherwise>
174                     <xsl:value-of select="concat('Twitter / ', normalize-space(//h1[1]))"/>
175                 </xsl:otherwise>
176             </xsl:choose>
177         </xsl:variable>
178         <xsl:variable name="channel-link" select="//link[@rel='canonical']/@href"/>
179
180         <rss version="2.0">
181             <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
182             <channel>
183                 <generator>Tweeper</generator>
184                 <title>
185                     <xsl:value-of select="$channel-title"/>
186                 </title>
187                 <link>
188                     <xsl:value-of select="$channel-link"/>
189                 </link>
190                 <description>
191                     <xsl:value-of select="normalize-space(//div[@class='ProfileHeaderCard'])"/>
192                 </description>
193                 <image>
194                     <title>
195                         <xsl:value-of select="$channel-title"/>
196                     </title>
197                     <link>
198                         <xsl:value-of select="$channel-link"/>
199                     </link>
200                     <url>
201                         <xsl:value-of select="//a[contains(@class, 'profile-picture')]/@href"/>
202                     </url>
203                 </image>
204                 <xsl:apply-templates select="//ol[@id='stream-items-id']/li[@data-item-id and @data-item-type='tweet' and not(contains(@class, 'has-profile-promoted-tweet'))]"/>
205             </channel>
206         </rss>
207     </xsl:template>
208 </xsl:stylesheet>