src/Tweeper.php: allow overriding the User-Agent in cURL requests
[tweeper.git] / src / rss_converter_twitter.com.xsl
index 873be5f..bbb3bd8 100644 (file)
@@ -1,7 +1,7 @@
 <!--
   Stylesheet to convert Twitter user timelines to RSS.
 
-  Copyright (C) 2013-2014  Antonio Ospite <ao2@ao2.it>
+  Copyright (C) 2013-2020  Antonio Ospite <ao2@ao2.it>
 
   This file is part of tweeper.
 
 <xsl:stylesheet version="1.0"
     xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
     xmlns:php="http://php.net/xsl"
-    xsl:extension-element-prefixes="php"
     exclude-result-prefixes="php">
 
     <xsl:param name="generate-enclosure"/>
+    <xsl:param name="show-usernames"/>
+    <xsl:param name="show-multimedia"/>
 
     <xsl:output method="xml" indent="yes"/>
 
     <!-- Identity transform -->
     <xsl:template match="@*|node()">
         <xsl:copy>
+            <!--
+                Strip the style attribute while copying elements because it may be
+                dangerous, see:
+                https://validator.w3.org/feed/docs/warning/DangerousStyleAttr.html
+            -->
             <xsl:apply-templates select="@*[not(name() = 'style')]|node()"/>
         </xsl:copy>
     </xsl:template>
 
+    <!-- Strip leading spaces in first text node of the tweet-text. -->
+    <xsl:template match="div[@class='tweet-text']/div/text()[1]">
+        <xsl:value-of select="substring-after(substring-after(., ' '), ' ')"/>
+    </xsl:template>
+
     <!--
          Anchors to external links provide the direct URL in the
          data-expanded-url attribute, so use this in the href attribute too
          http://stackoverflow.com/questions/21984867/
     -->
     <xsl:template match="a[@data-expanded-url]">
-        <!-- Prepend and append a white space for aestethic reasons -->
-        <xsl:text> </xsl:text>
         <a>
             <xsl:attribute name="href">
                 <xsl:value-of select="@data-expanded-url"/>
             </xsl:attribute>
-            <!-- Also strip &nbsp; and &hellip; -->
-            <xsl:value-of select="translate(., '&#xA0;&#x2026;', '')"/>
+            <xsl:value-of select="@data-expanded-url"/>
         </a>
-        <xsl:text> </xsl:text>
     </xsl:template>
 
     <!--
          too instead of the t.co redirections.
     -->
     <xsl:template match="a[@data-pre-embedded='true']">
-        <!-- Prepend and append a white space for aestethic reasons -->
-        <xsl:text> </xsl:text>
-        <a>
-            <xsl:attribute name="href">
+        <xsl:if test="$show-multimedia = 1">
+            <a>
+                <xsl:attribute name="href">
+                    <xsl:value-of select="@data-url"/>
+                </xsl:attribute>
                 <xsl:value-of select="concat('https://', .)"/>
-            </xsl:attribute>
-            <xsl:value-of select="concat('https://', .)"/>
-        </a>
-        <xsl:text> </xsl:text>
+            </a>
+        </xsl:if>
     </xsl:template>
 
     <!-- Present images in a more convenient way -->
-    <xsl:template match="div[@data-image-url]">
+    <!-- TODO: not supported in mobile UI
+    <xsl:template match="a[@data-pre-embedded='true' and contains(@data-url, '/photo/')]">
+        <xsl:variable name="embedded-photo-url" select="concat('https://pbs.twimg.com/media/', @data-tco-id, '?format=jpg')"/>
         <a>
             <xsl:attribute name="href">
-                <xsl:value-of select="concat(@data-image-url, ':orig')"/>
+                <xsl:value-of select="$embedded-photo-url"/>
             </xsl:attribute>
-            <img>
+            <img style="max-width: 100%">
                 <xsl:attribute name="src">
-                    <xsl:value-of select="@data-image-url"/>
+                    <xsl:value-of select="$embedded-photo-url"/>
                 </xsl:attribute>
             </img>
         </a>
     </xsl:template>
+    -->
 
     <!-- Don't repeat background in embedded media content -->
+    <!-- TODO: not supported in mobile UI
     <xsl:template match="div[contains(@class, 'PlayableMedia-player')]">
         <xsl:copy>
             <xsl:apply-templates select="@*"/>
             <xsl:attribute name="style">
-                <xsl:value-of select="concat(@style, '; background-repeat: no-repeat')"/>
+                <xsl:value-of select="concat(@style, '; background-repeat: no-repeat; background-size: 100% auto')"/>
             </xsl:attribute>
             <xsl:apply-templates select="node()"/>
         </xsl:copy>
     </xsl:template>
+    -->
 
     <xsl:template match="a[@data-expanded-url]" mode="enclosure">
         <xsl:copy-of select="php:functionString('Tweeper\Tweeper::generateEnclosure', ./@data-expanded-url)"/>
     </xsl:template>
 
-    <xsl:template match="div[@data-image-url]" mode="enclosure">
-        <xsl:copy-of select="php:functionString('Tweeper\Tweeper::generateEnclosure', concat(./@data-image-url, ':orig'))"/>
+    <xsl:template match="a[@data-pre-embedded='true']" mode="enclosure">
+        <xsl:copy-of select="php:functionString('Tweeper\Tweeper::generateEnclosure', @data-url)"/>
     </xsl:template>
 
-    <xsl:variable name="screen-name" select="//div[@class='user-actions btn-group not-following ']/@data-screen-name"/>
+    <xsl:variable name="screen-name" select="normalize-space(substring-after(//table[@class='profile-details' or @class='main-tweet']//*[@class='username'], '@'))"/>
+
+    <xsl:template match="//div[contains(@class, 'timeline')]/table[@class='tweet  ']|//div[@class='main-tweet-container']/table[@class='main-tweet']">
+        <xsl:variable name="user-name" select="normalize-space(.//*[@class='username']/text()[2])"/>
+        <xsl:variable name="item-content" select=".//div[@class='tweet-text']/div"/>
+        <xsl:variable name="item-media" select=".//a[@data-pre-embedded='true']"/>
+        <xsl:variable name="item-permalink">
+            <xsl:choose>
+                <xsl:when test="@href">
+                    <xsl:value-of select="concat($BaseURL, substring-before(@href, '?'))"/>
+                </xsl:when>
+                <xsl:otherwise>
+                    <!--
+                        The main tweet in permalink pages do not have a timestamp tag,
+                        just use the canonical URL as permalink.
+                    -->
+                    <xsl:value-of select="//link[@rel='canonical']/@href"/>
+                </xsl:otherwise>
+            </xsl:choose>
+        </xsl:variable>
 
-    <xsl:template match="//li[@data-item-id and @data-item-type='tweet']">
-        <xsl:variable name="user-name" select=".//div[@data-tweet-id]/@data-screen-name"/>
-        <xsl:variable name="item-content" select=".//p[contains(@class, 'js-tweet-text')]"/>
-        <xsl:variable name="item-media" select=".//div[contains(@class, 'AdaptiveMedia-container')]"/>
-        <xsl:variable name="item-permalink" select="concat($BaseURL, .//div[@data-permalink-path]/@data-permalink-path)"/>
+        <!-- TODO twitter mobile UI does not have a way to detect this
+        <xsl:variable name="item-has-video" select="$item-media//*[contains(@class, 'PlayableMedia- -video')]"/>
+        <xsl:variable name="item-has-gif" select="$item-media//*[contains(@class, 'PlayableMedia- -gif')]"/>
+        -->
 
-        <xsl:variable name="item-has-video" select="$item-media//*[contains(@class, 'PlayableMedia--video')]"/>
         <item>
             <title>
-                <xsl:value-of select="concat($user-name, ': ')"/>
+                <xsl:if test="($show-usernames = 1) or ($screen-name != $user-name)">
+                    <xsl:value-of select="concat($user-name, ': ')"/>
+                </xsl:if>
+                <!-- TODO twitter mobile UI does not have a way to detect this
                 <xsl:if test="$item-has-video">
                     <xsl:text>(Video) </xsl:text>
                 </xsl:if>
+                -->
                 <!--
                      Prepend a space in front of the URLs which are not
                      preceded by an open parenthesis, for aestethic reasons.
                 <xsl:value-of select="$item-permalink"/>
             </guid>
             <pubDate>
-                <xsl:variable name="timestamp" select=".//span[contains(@class, 'js-short-timestamp')]/@data-time"/>
-                <xsl:value-of select="php:functionString('Tweeper\Tweeper::epochToRssDate', number($timestamp))"/>
+                <xsl:variable name="timestamp" select=".//td[@class='timestamp']/a|.//div[@class='metadata']/a"/>
+                <xsl:value-of select="php:functionString('Tweeper\Tweeper::twitterToRssDate', $timestamp)"/>
             </pubDate>
             <description>
                 <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
+                <xsl:if test="($show-usernames = 1) or ($screen-name != $user-name)">
+                    <xsl:value-of select="concat($user-name, ':')"/>
+                    <xsl:element name="br"/>
+                </xsl:if>
+                <!-- TODO twitter mobile UI does not support embedded media
                 <xsl:if test="$item-has-video">
-                    <xsl:text>(Video) </xsl:text>
+                    <xsl:text> (Video)</xsl:text>
+                    <xsl:element name="br"/>
                 </xsl:if>
-                <xsl:apply-templates select="$item-content/node()"/>
-                <xsl:apply-templates select="$item-media/node()"/>
+                <xsl:if test="$item-has-gif">
+                    <xsl:text> (GIF)</xsl:text>
+                    <xsl:element name="br"/>
+                </xsl:if>
+                -->
+                <xsl:element name="span">
+                    <xsl:attribute name="style">white-space: pre-wrap;</xsl:attribute>
+                    <xsl:apply-templates select="$item-content/node()"/>
+                </xsl:element>
+
+                <!-- TODO twitter mobile UI does not support embedded media
+                <xsl:if test="$show-multimedia = 1">
+                    <xsl:apply-templates select="$item-media"/>
+                </xsl:if>
+                -->
                 <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
             </description>
             <xsl:if test="$generate-enclosure = 1">
                 <xsl:apply-templates select="$item-content//a[@data-expanded-url]" mode="enclosure"/>
-                <xsl:apply-templates select="$item-media//div[@data-image-url]" mode="enclosure"/>
+                <xsl:apply-templates select="$item-media" mode="enclosure"/>
             </xsl:if>
         </item>
     </xsl:template>
                     <xsl:value-of select="concat('Twitter / ', $screen-name)"/>
                 </xsl:when>
                 <xsl:otherwise>
-                    <xsl:value-of select="concat('Twitter / ', normalize-space(//h1[1]))"/>
+                    <xsl:value-of select="concat('Twitter / ', normalize-space(//td[@id='search']//input/@value))"/>
                 </xsl:otherwise>
             </xsl:choose>
         </xsl:variable>
         <xsl:variable name="channel-link" select="//link[@rel='canonical']/@href"/>
+        <xsl:variable name="channel-image" select="//table[@class='profile-details' or @class='main-tweet']//td[@class='avatar']//img/@src"/>
 
         <rss version="2.0">
             <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
                     <xsl:value-of select="$channel-link"/>
                 </link>
                 <description>
-                    <xsl:value-of select="normalize-space(//div[@class='ProfileHeaderCard'])"/>
+                    <xsl:value-of select="normalize-space(//table[@class='profile-details' or @class='main-tweet']//td[@class='details'])"/>
                 </description>
-                <image>
-                    <title>
-                        <xsl:value-of select="$channel-title"/>
-                    </title>
-                    <link>
-                        <xsl:value-of select="$channel-link"/>
-                    </link>
-                    <url>
-                        <xsl:value-of select="//a[contains(@class, 'profile-picture')]/@href"/>
-                    </url>
-                </image>
-                <xsl:apply-templates select="//ol[@id='stream-items-id']/li[@data-item-id and @data-item-type='tweet' and not(contains(@class, 'has-profile-promoted-tweet'))]"/>
+                <xsl:if test="$channel-image != ''">
+                    <image>
+                        <title>
+                            <xsl:value-of select="$channel-title"/>
+                        </title>
+                        <link>
+                            <xsl:value-of select="$channel-link"/>
+                        </link>
+                        <url>
+                            <xsl:value-of select="$channel-image"/>
+                        </url>
+                    </image>
+                </xsl:if>
+                <xsl:apply-templates select="//div[contains(@class, 'timeline')]/table[@class='tweet  ']|//div[@class='main-tweet-container']/table[@class='main-tweet']"/>
             </channel>
         </rss>
     </xsl:template>