Add back partial support for twitter.com using the old twitter mobile UI
authorAntonio Ospite <ao2@ao2.it>
Mon, 8 Jun 2020 21:49:15 +0000 (23:49 +0200)
committerAntonio Ospite <ao2@ao2.it>
Mon, 8 Jun 2020 21:49:15 +0000 (23:49 +0200)
On June 1st 2020 twitter.com completely disabled serving the legacy UI
which tweeper kept supporting using a User-Agent trick.

The new official UI uses retrieves json after authenticating with
cookies and generates the HTML client-side, so it's too complicated for
the current Tweeper structure.

Work around the issue with the help of another User-Agent trick, pretend
to be an old Android phone, which makes tweeper serve the old mobile UI
which can be easily scraped by tweeper.

This approach looses support for some functionalities like embedded
media but at least makes Tweeper work again with twitter.com

src/Tweeper.php
src/rss_converter_twitter.com.xsl

index e98623b..5b9f235 100644 (file)
@@ -36,7 +36,7 @@ date_default_timezone_set('UTC');
  */
 class Tweeper {
 
-  private static $userAgent = "Mozilla/5.0";
+  private static $userAgent = "Mozilla/5.0 (Linux; U; Android 4.3; en-us; SM-N900T Build/JSS15J)";
   private static $maxConnectionTimeout = 5;
   private static $maxConnectionRetries = 5;
 
@@ -87,6 +87,46 @@ class Tweeper {
   }
 
   /**
+   * Convert Twitter mobile date to the date format expected in a RSS document.
+   */
+  public static function twitterToRssDate($date) {
+    // Twitter uses relative timestamps in minutes for recent tweets.
+    if (preg_match('/^(\d+)m$/', $date, $matches)) {
+      $timestamp = strtotime("+" . $matches[1] . " min", time());
+      if (FALSE === $timestamp) {
+        $timestamp = 0;
+      }
+    }
+    else {
+      /*
+       * In case the time is specified put it after the date,
+       * to make it recognized by strptime().
+       */
+      if (preg_match('/(.*) - (.*)/', $date, $matches)) {
+        $date = $matches[2] . " " . $matches[1];
+      }
+
+      $timestamp = strtotime($date);
+      if (FALSE === $timestamp) {
+        $timestamp = 0;
+      }
+
+      /*
+       * The twitter mobile UI usually only specifies the month and the day, so
+       * strtotime($date) may interpret the date as being in the future.
+       *
+       * If the date is in the future it is probably in the same day but in the
+       * previous year.
+       */
+      if ($timestamp > time()) {
+        $timestamp = strtotime('-1 years', $timestamp);
+      }
+    }
+
+    return Tweeper::epochToRssDate($timestamp);
+  }
+
+  /**
    * Convert string to UpperCamelCase.
    */
   public static function toUpperCamelCase($str, $delim = ' ') {
index 1c20e70..bd3b589 100644 (file)
         </xsl:copy>
     </xsl:template>
 
+    <!-- Strip leading spaces in first text node of the tweet-text. -->
+    <xsl:template match="div[@class='tweet-text']/div/text()[1]">
+        <xsl:value-of select="substring-after(substring-after(., ' '), ' ')"/>
+    </xsl:template>
+
     <!--
          Anchors to external links provide the direct URL in the
          data-expanded-url attribute, so use this in the href attribute too
          http://stackoverflow.com/questions/21984867/
     -->
     <xsl:template match="a[@data-expanded-url]">
-        <!-- Prepend and append a white space for aestethic reasons -->
-        <xsl:text> </xsl:text>
         <a>
             <xsl:attribute name="href">
                 <xsl:value-of select="@data-expanded-url"/>
             </xsl:attribute>
-            <!-- Also strip &nbsp; and &hellip; -->
-            <xsl:value-of select="translate(., '&#xA0;&#x2026;', '')"/>
+            <xsl:value-of select="@data-expanded-url"/>
         </a>
-        <xsl:text> </xsl:text>
     </xsl:template>
 
     <!--
     -->
     <xsl:template match="a[@data-pre-embedded='true']">
         <xsl:if test="$show-multimedia = 1">
-            <!-- Prepend and append a white space for aestethic reasons -->
-            <xsl:text> </xsl:text>
             <a>
                 <xsl:attribute name="href">
-                    <xsl:value-of select="concat('https://', .)"/>
+                    <xsl:value-of select="@data-url"/>
                 </xsl:attribute>
                 <xsl:value-of select="concat('https://', .)"/>
             </a>
-            <xsl:text> </xsl:text>
         </xsl:if>
     </xsl:template>
 
     <!-- Present images in a more convenient way -->
-    <xsl:template match="div[@data-image-url]">
+    <!-- TODO: not supported in mobile UI
+    <xsl:template match="a[@data-pre-embedded='true' and contains(@data-url, '/photo/')]">
+        <xsl:variable name="embedded-photo-url" select="concat('https://pbs.twimg.com/media/', @data-tco-id, '?format=jpg')"/>
         <a>
             <xsl:attribute name="href">
-                <xsl:value-of select="concat(@data-image-url, ':orig')"/>
+                <xsl:value-of select="$embedded-photo-url"/>
             </xsl:attribute>
             <img style="max-width: 100%">
                 <xsl:attribute name="src">
-                    <xsl:value-of select="@data-image-url"/>
+                    <xsl:value-of select="$embedded-photo-url"/>
                 </xsl:attribute>
             </img>
         </a>
     </xsl:template>
+    -->
 
     <!-- Don't repeat background in embedded media content -->
+    <!-- TODO: not supported in mobile UI
     <xsl:template match="div[contains(@class, 'PlayableMedia-player')]">
         <xsl:copy>
             <xsl:apply-templates select="@*"/>
             <xsl:apply-templates select="node()"/>
         </xsl:copy>
     </xsl:template>
+    -->
 
     <xsl:template match="a[@data-expanded-url]" mode="enclosure">
         <xsl:copy-of select="php:functionString('Tweeper\Tweeper::generateEnclosure', ./@data-expanded-url)"/>
     </xsl:template>
 
-    <xsl:template match="div[@data-image-url]" mode="enclosure">
-        <xsl:copy-of select="php:functionString('Tweeper\Tweeper::generateEnclosure', concat(./@data-image-url, ':orig'))"/>
+    <xsl:template match="a[@data-pre-embedded='true']" mode="enclosure">
+        <xsl:copy-of select="php:functionString('Tweeper\Tweeper::generateEnclosure', @data-url)"/>
     </xsl:template>
 
-    <xsl:variable name="screen-name" select="//div[@class='user-actions btn-group not-following ']/@data-screen-name"/>
+    <xsl:variable name="screen-name" select="normalize-space(substring-after(//table[@class='profile-details' or @class='main-tweet']//*[@class='username'], '@'))"/>
 
-    <xsl:template match="//div[@class='permalink-inner permalink-tweet-container'] | //li[@data-item-id and @data-item-type='tweet']">
-        <xsl:variable name="user-name" select=".//div[@data-tweet-id]/@data-screen-name"/>
-        <xsl:variable name="item-content" select=".//p[contains(@class, 'js-tweet-text')]"/>
-        <xsl:variable name="item-media" select=".//div[contains(@class, 'AdaptiveMedia-container')]"/>
-        <xsl:variable name="item-permalink" select="concat($BaseURL, .//div[@data-permalink-path]/@data-permalink-path)"/>
+    <xsl:template match="//div[contains(@class, 'timeline')]/table[@class='tweet  ']|//div[@class='main-tweet-container']/table[@class='main-tweet']">
+        <xsl:variable name="user-name" select="normalize-space(.//*[@class='username']/text()[2])"/>
+        <xsl:variable name="item-content" select=".//div[@class='tweet-text']/div"/>
+        <xsl:variable name="item-media" select=".//a[@data-pre-embedded='true']"/>
+        <xsl:variable name="item-permalink">
+            <xsl:choose>
+                <xsl:when test="@href">
+                    <xsl:value-of select="concat($BaseURL, substring-before(@href, '?'))"/>
+                </xsl:when>
+                <xsl:otherwise>
+                    <!--
+                        The main tweet in permalink pages do not have a timestamp tag,
+                        just use the canonical URL as permalink.
+                    -->
+                    <xsl:value-of select="//link[@rel='canonical']/@href"/>
+                </xsl:otherwise>
+            </xsl:choose>
+        </xsl:variable>
+
+        <!-- TODO twitter mobile UI does not have a way to detect this
+        <xsl:variable name="item-has-video" select="$item-media//*[contains(@class, 'PlayableMedia- -video')]"/>
+        <xsl:variable name="item-has-gif" select="$item-media//*[contains(@class, 'PlayableMedia- -gif')]"/>
+        -->
 
-        <xsl:variable name="item-has-video" select="$item-media//*[contains(@class, 'PlayableMedia--video')]"/>
-        <xsl:variable name="item-has-gif" select="$item-media//*[contains(@class, 'PlayableMedia--gif')]"/>
         <item>
             <title>
                 <xsl:if test="($show-usernames = 1) or ($screen-name != $user-name)">
                     <xsl:value-of select="concat($user-name, ': ')"/>
                 </xsl:if>
+                <!-- TODO twitter mobile UI does not have a way to detect this
                 <xsl:if test="$item-has-video">
                     <xsl:text>(Video) </xsl:text>
                 </xsl:if>
+                -->
                 <!--
                      Prepend a space in front of the URLs which are not
                      preceded by an open parenthesis, for aestethic reasons.
                 <xsl:value-of select="$item-permalink"/>
             </guid>
             <pubDate>
-                <xsl:variable name="timestamp" select=".//span[contains(@class, 'js-short-timestamp')]/@data-time"/>
-                <xsl:value-of select="php:functionString('Tweeper\Tweeper::epochToRssDate', number($timestamp))"/>
+                <xsl:variable name="timestamp" select=".//td[@class='timestamp']/a|.//div[@class='metadata']/a"/>
+                <xsl:value-of select="php:functionString('Tweeper\Tweeper::twitterToRssDate', $timestamp)"/>
             </pubDate>
             <description>
                 <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
                     <xsl:value-of select="concat($user-name, ':')"/>
                     <xsl:element name="br"/>
                 </xsl:if>
+                <!-- TODO twitter mobile UI does not support embedded media
                 <xsl:if test="$item-has-video">
                     <xsl:text> (Video)</xsl:text>
                     <xsl:element name="br"/>
                     <xsl:text> (GIF)</xsl:text>
                     <xsl:element name="br"/>
                 </xsl:if>
+                -->
                 <xsl:element name="span">
                     <xsl:attribute name="style">white-space: pre-wrap;</xsl:attribute>
                     <xsl:apply-templates select="$item-content/node()"/>
                 </xsl:element>
+
+                <!-- TODO twitter mobile UI does not support embedded media
                 <xsl:if test="$show-multimedia = 1">
-                    <xsl:apply-templates select="$item-media/node()"/>
+                    <xsl:apply-templates select="$item-media"/>
                 </xsl:if>
+                -->
                 <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
             </description>
             <xsl:if test="$generate-enclosure = 1">
                 <xsl:apply-templates select="$item-content//a[@data-expanded-url]" mode="enclosure"/>
-                <xsl:apply-templates select="$item-media//div[@data-image-url]" mode="enclosure"/>
+                <xsl:apply-templates select="$item-media" mode="enclosure"/>
             </xsl:if>
         </item>
     </xsl:template>
                     <xsl:value-of select="concat('Twitter / ', $screen-name)"/>
                 </xsl:when>
                 <xsl:otherwise>
-                    <xsl:value-of select="concat('Twitter / ', normalize-space(//h1[1]))"/>
+                    <xsl:value-of select="concat('Twitter / ', normalize-space(//td[@id='search']//input/@value))"/>
                 </xsl:otherwise>
             </xsl:choose>
         </xsl:variable>
         <xsl:variable name="channel-link" select="//link[@rel='canonical']/@href"/>
-        <xsl:variable name="channel-image" select="//a[contains(@class, 'profile-picture')]/@href"/>
+        <xsl:variable name="channel-image" select="//table[@class='profile-details' or @class='main-tweet']//td[@class='avatar']//img/@src"/>
 
         <rss version="2.0">
             <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
                     <xsl:value-of select="$channel-link"/>
                 </link>
                 <description>
-                    <xsl:value-of select="normalize-space(//div[@class='ProfileHeaderCard'])"/>
-                    <!-- The following rule should only match on hashtag URLs -->
-                    <xsl:value-of select="normalize-space(//div[@class='SearchNavigation-textContainer'])"/>
+                    <xsl:value-of select="normalize-space(//table[@class='profile-details' or @class='main-tweet']//td[@class='details'])"/>
                 </description>
                 <xsl:if test="$channel-image != ''">
                     <image>
                         </url>
                     </image>
                 </xsl:if>
-                <xsl:apply-templates select="//ol[@id='stream-items-id']/li[@data-item-id and @data-item-type='tweet' and not(contains(@class, 'has-profile-promoted-tweet'))]"/>
-
-                <!-- These rules will only match on permalink URLs -->
-                <xsl:apply-templates select="//div[@class='permalink-inner permalink-tweet-container']"/>
-                <xsl:apply-templates select="//div[@data-component-context='replies']//li[@data-item-id and @data-item-type='tweet' and not(contains(@class, 'has-profile-promoted-tweet'))]"/>
-
+                <xsl:apply-templates select="//div[contains(@class, 'timeline')]/table[@class='tweet  ']|//div[@class='main-tweet-container']/table[@class='main-tweet']"/>
             </channel>
         </rss>
     </xsl:template>