tweeper.php: fix a problem with https URLs ending up in the enclosure element
[tweeper.git] / rss_converter_twitter.com.xsl
index 9efc769..c154141 100644 (file)
@@ -24,7 +24,7 @@
     xsl:extension-element-prefixes="php"
     exclude-result-prefixes="php">
 
-    <xsl:param name="generateEnclosure"/>
+    <xsl:param name="generate-enclosure"/>
 
     <xsl:output method="xml" indent="yes"/>
 
         <xsl:text>https://twitter.com</xsl:text>
     </xsl:variable>
 
+    <!-- Identity transform -->
+    <xsl:template match="@*|node()">
+        <xsl:copy>
+            <xsl:apply-templates select="@*|node()"/>
+        </xsl:copy>
+    </xsl:template>
+
+    <!--
+         Anchors to external links provide the direct URL in the
+         data-expanded-url attribute, so use this in the href attribute too
+         instead of the default short URL which uses the t.co redirection
+         service.
+
+         NOTE: when creating an element, attributes must be processed _before_
+         adding the contents (either children or a value):
+         http://stackoverflow.com/questions/21984867/
+    -->
     <xsl:template match="a[@data-expanded-url]">
-        <xsl:value-of disable-output-escaping="yes" select="php:function('Tweeper::generate_enclosure', string(./@data-expanded-url))"/>
+        <!-- Prepend and append a white space for aestethic reasons -->
+        <xsl:text> </xsl:text>
+        <a>
+            <xsl:attribute name="href">
+                <xsl:value-of select="@data-expanded-url"/>
+            </xsl:attribute>
+            <!-- Also strip &nbsp; and &hellip; -->
+            <xsl:value-of select="translate(., '&#xA0;&#x2026;', '')"/>
+        </a>
+        <xsl:text> </xsl:text>
+    </xsl:template>
+
+    <!--
+         These are links to pic.twitter.com, use the direct link for those
+         too instead of the t.co redirections.
+    -->
+    <xsl:template match="a[@data-pre-embedded='true']">
+        <!-- Prepend and append a white space for aestethic reasons -->
+        <xsl:text> </xsl:text>
+        <a>
+            <xsl:attribute name="href">
+                <xsl:value-of select="concat('https://', .)"/>
+            </xsl:attribute>
+            <xsl:value-of select="concat('https://', .)"/>
+        </a>
+        <xsl:text> </xsl:text>
+    </xsl:template>
+
+    <!-- Present images in a more convenient way -->
+    <xsl:template match="div[@data-image-url]">
+        <a>
+            <xsl:attribute name="href">
+                <xsl:value-of select="concat(@data-image-url, ':orig')"/>
+            </xsl:attribute>
+            <img>
+                <xsl:attribute name="src">
+                    <xsl:value-of select="@data-image-url"/>
+                </xsl:attribute>
+            </img>
+        </a>
+    </xsl:template>
+
+    <!-- Don't repeat background in embedded media content -->
+    <xsl:template match="div[contains(@class, 'PlayableMedia-player')]">
+        <xsl:copy>
+            <xsl:apply-templates select="@*"/>
+            <xsl:attribute name="style">
+                <xsl:value-of select="concat(@style, '; background-repeat: no-repeat')"/>
+            </xsl:attribute>
+            <xsl:apply-templates select="node()"/>
+        </xsl:copy>
+    </xsl:template>
+
+    <xsl:template match="a[@data-expanded-url]" mode="enclosure">
+        <xsl:copy-of select="php:functionString('Tweeper::generateEnclosure', ./@data-expanded-url)"/>
+    </xsl:template>
+
+    <xsl:template match="div[@data-image-url]" mode="enclosure">
+        <xsl:copy-of select="php:functionString('Tweeper::generateEnclosure', concat(./@data-image-url, ':orig'))"/>
     </xsl:template>
 
     <xsl:variable name="screen-name" select="//div[@class='user-actions btn-group not-following ']/@data-screen-name"/>
 
-    <xsl:template match="//*[@data-item-type='tweet' and @role='listitem']">
+    <xsl:template match="//li[@data-item-id and @data-item-type='tweet']">
         <xsl:variable name="user-name" select=".//div[contains(@class, 'js-stream-tweet')]/@data-screen-name"/>
         <xsl:variable name="item-content" select=".//p[contains(@class, 'js-tweet-text')]"/>
-        <xsl:variable name="item-permalink" select="concat($BaseURL, .//a[contains(@class, 'js-permalink')]/@href)"/>
+        <xsl:variable name="item-media" select=".//div[contains(@class, 'AdaptiveMedia-container')]"/>
+        <xsl:variable name="item-permalink" select="concat($BaseURL, .//div[@data-permalink-path]/@data-permalink-path)"/>
+
+        <xsl:variable name="item-has-video" select="$item-media//*[contains(@class, 'PlayableMedia--video')]"/>
         <item>
             <title>
-                <xsl:value-of select="concat($user-name, ': ', $item-content)"/>
+                <xsl:value-of select="concat($user-name, ': ')"/>
+                <xsl:if test="$item-has-video">
+                    <xsl:text>(Video) </xsl:text>
+                </xsl:if>
+                <!--
+                     Prepend a space in front of the URLs which are not
+                     preceded by an open parenthesis, for aestethic reasons.
+                     Also, regex, I know: http://xkcd.com/1171/
+                -->
+                <xsl:variable
+                    name="processed-title"
+                    select="php:functionString('preg_replace', '@((?&lt;!\()(?:http[s]?://|pic.twitter.com))@', ' \1', $item-content)"/>
+                <!-- Also strip &nbsp; and &hellip; -->
+                <xsl:value-of select="normalize-space(translate($processed-title, '&#xA0;&#x2026;', ''))"/>
             </title>
             <link>
                 <xsl:value-of select="$item-permalink"/>
             </guid>
             <pubDate>
                 <xsl:variable name="timestamp" select=".//span[contains(@class, 'js-short-timestamp')]/@data-time"/>
-                <xsl:value-of select="php:functionString('Tweeper::epoch_to_gmdate', number($timestamp))"/>
+                <xsl:value-of select="php:functionString('Tweeper::epochToRssDate', number($timestamp))"/>
             </pubDate>
             <description>
                 <xsl:value-of select="concat($user-name, ': ')"/>
                 <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
-                <xsl:copy-of select="$item-content/node()"/>
+                <xsl:if test="$item-has-video">
+                    <xsl:text>(Video) </xsl:text>
+                </xsl:if>
+                <xsl:apply-templates select="$item-content/node()"/>
+                <xsl:apply-templates select="$item-media/node()"/>
                 <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
             </description>
-            <xsl:if test="$generateEnclosure = 1">
-                <xsl:apply-templates select="$item-content//a[@data-expanded-url]"/>
+            <xsl:if test="$generate-enclosure = 1">
+                <xsl:apply-templates select="$item-content//a[@data-expanded-url]" mode="enclosure"/>
+                <xsl:apply-templates select="$item-media//div[@data-image-url]" mode="enclosure"/>
             </xsl:if>
         </item>
     </xsl:template>
 
     <xsl:template match="/">
-        <xsl:variable name="channel-title" select="concat('Twitter / ', $screen-name)"/>
+        <xsl:variable name="channel-title">
+            <xsl:choose>
+                <xsl:when test="$screen-name != ''">
+                    <xsl:value-of select="concat('Twitter / ', $screen-name)"/>
+                </xsl:when>
+                <xsl:otherwise>
+                    <xsl:value-of select="concat('Twitter / ', normalize-space(//h1[1]))"/>
+                </xsl:otherwise>
+            </xsl:choose>
+        </xsl:variable>
         <xsl:variable name="channel-link" select="//link[@rel='canonical']/@href"/>
 
         <rss version="2.0">
                         <xsl:value-of select="$channel-link"/>
                     </link>
                     <url>
-                        <xsl:value-of select="//a[contains(@class, 'profile-picture media-thumbnail')]/@href"/>
+                        <xsl:value-of select="//a[contains(@class, 'profile-picture')]/@href"/>
                     </url>
                 </image>
-                <xsl:apply-templates select="//*[@data-item-type='tweet' and @role='listitem']"/>
+                <xsl:apply-templates select="//ol[@id='stream-items-id']/li[@data-item-id and @data-item-type='tweet']"/>
             </channel>
         </rss>
     </xsl:template>