Merge tag 'v1.2.0' into debian/master
authorAntonio Ospite <ao2@ao2.it>
Sat, 24 Feb 2018 17:52:09 +0000 (18:52 +0100)
committerAntonio Ospite <ao2@ao2.it>
Sat, 24 Feb 2018 17:52:09 +0000 (18:52 +0100)
Release v1.2.0

NEWS
TODO
src/Tweeper.php
src/rss_converter_facebook.com.xsl
src/rss_converter_instagram.com.xsl
src/rss_converter_twitter.com.xsl

diff --git a/NEWS b/NEWS
index 59e21b7..cd5a3bc 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,17 @@
+News for v1.2.0:
+================
+
+  * Add support for scraping Instagram location pages, like for example
+    https://www.instagram.com/explore/locations/833277432/
+  * Make scraping Instagram.com more robust
+  * Improve and fix scraping Facebook.com pages once again
+  * Add support for Twitter.com permalink URLs
+  * Make the generated Twitter.com feed mach more closely the original
+    content, now spaces and line wrap are preserved in feed reader which can
+    render the HTML code embedded in the <description/> element, this way
+    ASCII art tweets can be fully appreciated when read via tweeper.
+    Check out https://twitter.com/sarahjeong/status/955651919279722496
+
 News for v1.1.0:
 ================
 
diff --git a/TODO b/TODO
index 7b72745..51b294b 100644 (file)
--- a/TODO
+++ b/TODO
@@ -12,3 +12,5 @@
 
 - The dependencies on the symphony components in composer.json could be more
   relaxed like ">=2.7.0", but for now sticking to "2.7.*" is good enough.
+
+- Add support for instagram tags
index 8ac2fe3..566decb 100644 (file)
@@ -249,7 +249,13 @@ class Tweeper {
       return NULL;
     }
 
-    return Tweeper::jsonToXml($matches[1], 'instagram');
+    // The "qe" object contains elements which will result in invalid XML
+    // element names, so remove it.
+    $data = json_decode($matches[1], $assoc = TRUE);
+    unset($data["qe"]);
+    $json = json_encode($data);
+
+    return Tweeper::jsonToXml($json, 'instagram');
   }
 
   /**
index def8e69..520d6ce 100644 (file)
@@ -52,7 +52,7 @@
         name="page-id"
         select="substring-after(//meta[@property='al:android:url']/@content, 'fb://page/')"/>
 
-    <xsl:template match="//div[contains(@class, 'fbUserContent') or contains(@class, 'userContentWrapper')]">
+    <xsl:template match="//div[contains(@class, 'fbUserStory') or contains(@class, 'userContentWrapper')]">
         <xsl:variable name="story-id" select=".//input[@name='ft_ent_identifier']/@value"/>
         <xsl:variable
             name="item-permalink"
 
     <xsl:template match="/">
         <xsl:variable name="channel-title" select="//title"/>
-        <xsl:variable name="channel-link" select="//div[contains(@class, 'fbUserContent') or contains(@class, 'userContentWrapper')][1]//a[1]/@href"/>
-        <xsl:variable name="channel-image" select="//div[contains(@class, 'fbUserContent') or contains(@class, 'userContentWrapper')][1]//a[1]//img/@src"/>
+        <xsl:variable name="channel-link" select="//meta[@property='og:url']/@content"/>
+        <xsl:variable name="channel-image" select="//meta[@property='og:image']/@content"/>
 
         <rss version="2.0">
             <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
                 </link>
                 <description>
                     <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
-                    <xsl:copy-of select="//div[@data-id='1']/node()"/>
+                    <xsl:value-of select="//meta[@property='og:description']/@content"/>
                     <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
                 </description>
                 <image>
                         <xsl:value-of select="$channel-image"/>
                     </url>
                 </image>
-                <xsl:apply-templates select="//div[contains(@class, 'fbUserContent') or contains(@class, 'userContentWrapper')]"/>
+                <xsl:apply-templates select="//div[contains(@class, 'fbUserStory') or contains(@class, 'userContentWrapper')]"/>
             </channel>
         </rss>
     </xsl:template>
index 609be66..a2de8b3 100644 (file)
 
     <xsl:variable name="user-name" select="//ProfilePage/user/username"/>
 
-    <!-- Some users do not specify the full name -->
+    <!--
+         NOTE: some users do not specify the full name.
+
+         Remember to handle this case when using it and fall-back to the plain
+         user name when appropriate.
+    -->
     <xsl:variable name="full-name" select="//ProfilePage/user/full_name"/>
+
+    <xsl:variable name="location-name" select="//LocationsPage/location/name"/>
+
     <xsl:variable name="screen-name">
         <xsl:choose>
+            <xsl:when test="$location-name != ''">
+                <xsl:variable name="location-latitude" select="//LocationsPage/location/lat"/>
+                <xsl:variable name="location-longitude" select="//LocationsPage/location/lng"/>
+                <xsl:value-of select="concat($location-name, ' (', $location-latitude, ', ', $location-longitude, ')')"/>
+            </xsl:when>
             <xsl:when test="$full-name != ''">
                 <xsl:value-of select="$full-name"/>
             </xsl:when>
@@ -47,7 +60,7 @@
         </xsl:choose>
     </xsl:variable>
 
-    <xsl:template match="//ProfilePage/user/media/nodes">
+    <xsl:template match="//media/nodes">
         <xsl:variable name="item-content-image" select="./display_src"/>
         <xsl:variable name="item-content-caption" select="./caption"/>
         <xsl:variable name="item-permalink" select="concat($BaseURL, '/p/', ./code, '/')"/>
     </xsl:template>
 
     <xsl:template match="/">
+
         <xsl:variable name="channel-title" select="concat('Instagram / ', $screen-name)"/>
-        <xsl:variable name="channel-link" select="concat($BaseURL, '/', $user-name)"/>
+        <xsl:variable name="channel-link">
+            <xsl:choose>
+                <xsl:when test="$location-name != ''">
+                    <xsl:variable name="location-id" select="//LocationsPage/location/id"/>
+                    <xsl:value-of select="concat($BaseURL, '/explore/locations/', $location-id)"/>
+                </xsl:when>
+                <xsl:otherwise>
+                    <xsl:value-of select="concat($BaseURL, '/', $user-name)"/>
+                </xsl:otherwise>
+            </xsl:choose>
+        </xsl:variable>
+        <xsl:variable name="channel-image" select="//ProfilePage/user/profile_pic_url"/>
 
         <rss version="2.0">
             <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
                     </xsl:if>
                     <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
                 </description>
-                <image>
-                    <title>
-                        <xsl:value-of select="$channel-title"/>
-                    </title>
-                    <link>
-                        <xsl:value-of select="$channel-link"/>
-                    </link>
-                    <url>
-                        <xsl:value-of select="//ProfilePage/user/profile_pic_url"/>
-                    </url>
-                </image>
-                <xsl:apply-templates select="//ProfilePage/user/media/nodes"/>
+                <xsl:if test="$channel-image != ''">
+                    <image>
+                        <title>
+                            <xsl:value-of select="$channel-title"/>
+                        </title>
+                        <link>
+                            <xsl:value-of select="$channel-link"/>
+                        </link>
+                        <url>
+                            <xsl:value-of select="$channel-image"/>
+                        </url>
+                    </image>
+                </xsl:if>
+                <xsl:apply-templates select="//ProfilePage/user/media/nodes|//LocationsPage/location/media/nodes"/>
             </channel>
         </rss>
     </xsl:template>
index e2c5125..44a0416 100644 (file)
 
     <xsl:variable name="screen-name" select="//div[@class='user-actions btn-group not-following ']/@data-screen-name"/>
 
-    <xsl:template match="//li[@data-item-id and @data-item-type='tweet']">
-        <xsl:variable name="user-name" select=".//div[contains(@class, 'js-stream-tweet')]/@data-screen-name"/>
+    <xsl:template match="//div[@class='permalink-inner permalink-tweet-container'] | //li[@data-item-id and @data-item-type='tweet']">
+        <xsl:variable name="user-name" select=".//div[@data-tweet-id]/@data-screen-name"/>
         <xsl:variable name="item-content" select=".//p[contains(@class, 'js-tweet-text')]"/>
         <xsl:variable name="item-media" select=".//div[contains(@class, 'AdaptiveMedia-container')]"/>
         <xsl:variable name="item-permalink" select="concat($BaseURL, .//div[@data-permalink-path]/@data-permalink-path)"/>
                 <xsl:value-of select="php:functionString('Tweeper\Tweeper::epochToRssDate', number($timestamp))"/>
             </pubDate>
             <description>
-                <xsl:value-of select="concat($user-name, ': ')"/>
                 <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
                 <xsl:if test="$item-has-video">
-                    <xsl:text>(Video) </xsl:text>
+                    <xsl:text>(Video)</xsl:text>
+                    <xsl:element name="br"/>
                 </xsl:if>
-                <xsl:apply-templates select="$item-content/node()"/>
+                <xsl:element name="span">
+                    <xsl:attribute name="style">white-space: pre-wrap;</xsl:attribute>
+                    <xsl:apply-templates select="$item-content/node()"/>
+                </xsl:element>
                 <xsl:apply-templates select="$item-media/node()"/>
                 <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
             </description>
                     </url>
                 </image>
                 <xsl:apply-templates select="//ol[@id='stream-items-id']/li[@data-item-id and @data-item-type='tweet' and not(contains(@class, 'has-profile-promoted-tweet'))]"/>
+
+                <!-- These rules will only match on permalink URLs -->
+                <xsl:apply-templates select="//div[@class='permalink-inner permalink-tweet-container']"/>
+                <xsl:apply-templates select="//div[@data-component-context='replies']//li[@data-item-id and @data-item-type='tweet' and not(contains(@class, 'has-profile-promoted-tweet'))]"/>
+
             </channel>
         </rss>
     </xsl:template>