From af103c976dd4992d79e9d9a71837aecff30d6e9c Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Mon, 8 Jun 2020 23:49:15 +0200 Subject: [PATCH] Add back partial support for twitter.com using the old twitter mobile UI On June 1st 2020 twitter.com completely disabled serving the legacy UI which tweeper kept supporting using a User-Agent trick. The new official UI uses retrieves json after authenticating with cookies and generates the HTML client-side, so it's too complicated for the current Tweeper structure. Work around the issue with the help of another User-Agent trick, pretend to be an old Android phone, which makes tweeper serve the old mobile UI which can be easily scraped by tweeper. This approach looses support for some functionalities like embedded media but at least makes Tweeper work again with twitter.com --- src/Tweeper.php | 42 +++++++++++++++++- src/rss_converter_twitter.com.xsl | 93 +++++++++++++++++++++++---------------- 2 files changed, 97 insertions(+), 38 deletions(-) diff --git a/src/Tweeper.php b/src/Tweeper.php index e98623b..5b9f235 100644 --- a/src/Tweeper.php +++ b/src/Tweeper.php @@ -36,7 +36,7 @@ date_default_timezone_set('UTC'); */ class Tweeper { - private static $userAgent = "Mozilla/5.0"; + private static $userAgent = "Mozilla/5.0 (Linux; U; Android 4.3; en-us; SM-N900T Build/JSS15J)"; private static $maxConnectionTimeout = 5; private static $maxConnectionRetries = 5; @@ -87,6 +87,46 @@ class Tweeper { } /** + * Convert Twitter mobile date to the date format expected in a RSS document. + */ + public static function twitterToRssDate($date) { + // Twitter uses relative timestamps in minutes for recent tweets. + if (preg_match('/^(\d+)m$/', $date, $matches)) { + $timestamp = strtotime("+" . $matches[1] . " min", time()); + if (FALSE === $timestamp) { + $timestamp = 0; + } + } + else { + /* + * In case the time is specified put it after the date, + * to make it recognized by strptime(). + */ + if (preg_match('/(.*) - (.*)/', $date, $matches)) { + $date = $matches[2] . " " . $matches[1]; + } + + $timestamp = strtotime($date); + if (FALSE === $timestamp) { + $timestamp = 0; + } + + /* + * The twitter mobile UI usually only specifies the month and the day, so + * strtotime($date) may interpret the date as being in the future. + * + * If the date is in the future it is probably in the same day but in the + * previous year. + */ + if ($timestamp > time()) { + $timestamp = strtotime('-1 years', $timestamp); + } + } + + return Tweeper::epochToRssDate($timestamp); + } + + /** * Convert string to UpperCamelCase. */ public static function toUpperCamelCase($str, $delim = ' ') { diff --git a/src/rss_converter_twitter.com.xsl b/src/rss_converter_twitter.com.xsl index 1c20e70..bd3b589 100644 --- a/src/rss_converter_twitter.com.xsl +++ b/src/rss_converter_twitter.com.xsl @@ -45,6 +45,11 @@ + + + + + - - - - + - - - - + - - + + - - + + - + - - - - - + + + + + + + + + + + + + + + + + - - <xsl:if test="($show-usernames = 1) or ($screen-name != $user-name)"> <xsl:value-of select="concat($user-name, ': ')"/> </xsl:if> + <!-- TODO twitter mobile UI does not have a way to detect this <xsl:if test="$item-has-video"> <xsl:text>(Video) </xsl:text> </xsl:if> + --> <!-- Prepend a space in front of the URLs which are not preceded by an open parenthesis, for aestethic reasons. @@ -155,8 +176,8 @@ <xsl:value-of select="$item-permalink"/> </guid> <pubDate> - <xsl:variable name="timestamp" select=".//span[contains(@class, 'js-short-timestamp')]/@data-time"/> - <xsl:value-of select="php:functionString('Tweeper\Tweeper::epochToRssDate', number($timestamp))"/> + <xsl:variable name="timestamp" select=".//td[@class='timestamp']/a|.//div[@class='metadata']/a"/> + <xsl:value-of select="php:functionString('Tweeper\Tweeper::twitterToRssDate', $timestamp)"/> </pubDate> <description> <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text> @@ -164,6 +185,7 @@ <xsl:value-of select="concat($user-name, ':')"/> <xsl:element name="br"/> </xsl:if> + <!-- TODO twitter mobile UI does not support embedded media <xsl:if test="$item-has-video"> <xsl:text> (Video)</xsl:text> <xsl:element name="br"/> @@ -172,18 +194,22 @@ <xsl:text> (GIF)</xsl:text> <xsl:element name="br"/> </xsl:if> + --> <xsl:element name="span"> <xsl:attribute name="style">white-space: pre-wrap;</xsl:attribute> <xsl:apply-templates select="$item-content/node()"/> </xsl:element> + + <!-- TODO twitter mobile UI does not support embedded media <xsl:if test="$show-multimedia = 1"> - <xsl:apply-templates select="$item-media/node()"/> + <xsl:apply-templates select="$item-media"/> </xsl:if> + --> <xsl:text disable-output-escaping="yes">]]></xsl:text> </description> <xsl:if test="$generate-enclosure = 1"> <xsl:apply-templates select="$item-content//a[@data-expanded-url]" mode="enclosure"/> - <xsl:apply-templates select="$item-media//div[@data-image-url]" mode="enclosure"/> + <xsl:apply-templates select="$item-media" mode="enclosure"/> </xsl:if> </item> </xsl:template> @@ -195,12 +221,12 @@ <xsl:value-of select="concat('Twitter / ', $screen-name)"/> </xsl:when> <xsl:otherwise> - <xsl:value-of select="concat('Twitter / ', normalize-space(//h1[1]))"/> + <xsl:value-of select="concat('Twitter / ', normalize-space(//td[@id='search']//input/@value))"/> </xsl:otherwise> </xsl:choose> </xsl:variable> <xsl:variable name="channel-link" select="//link[@rel='canonical']/@href"/> - <xsl:variable name="channel-image" select="//a[contains(@class, 'profile-picture')]/@href"/> + <xsl:variable name="channel-image" select="//table[@class='profile-details' or @class='main-tweet']//td[@class='avatar']//img/@src"/> <rss version="2.0"> <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute> @@ -213,9 +239,7 @@ <xsl:value-of select="$channel-link"/> </link> <description> - <xsl:value-of select="normalize-space(//div[@class='ProfileHeaderCard'])"/> - <!-- The following rule should only match on hashtag URLs --> - <xsl:value-of select="normalize-space(//div[@class='SearchNavigation-textContainer'])"/> + <xsl:value-of select="normalize-space(//table[@class='profile-details' or @class='main-tweet']//td[@class='details'])"/> </description> <xsl:if test="$channel-image != ''"> <image> @@ -230,12 +254,7 @@ </url> </image> </xsl:if> - <xsl:apply-templates select="//ol[@id='stream-items-id']/li[@data-item-id and @data-item-type='tweet' and not(contains(@class, 'has-profile-promoted-tweet'))]"/> - - <!-- These rules will only match on permalink URLs --> - <xsl:apply-templates select="//div[@class='permalink-inner permalink-tweet-container']"/> - <xsl:apply-templates select="//div[@data-component-context='replies']//li[@data-item-id and @data-item-type='tweet' and not(contains(@class, 'has-profile-promoted-tweet'))]"/> - + <xsl:apply-templates select="//div[contains(@class, 'timeline')]/table[@class='tweet ']|//div[@class='main-tweet-container']/table[@class='main-tweet']"/> </channel> </rss> </xsl:template> -- 2.1.4