rss_converter_instagram.com.xsl: fix scraping Instagram.com
[tweeper.git] / src / rss_converter_instagram.com.xsl
index 609be66..4045224 100644 (file)
@@ -1,7 +1,7 @@
 <!--
   Stylesheet to convert Instagram user timelines to RSS.
 
-  Copyright (C) 2015  Antonio Ospite <ao2@ao2.it>
+  Copyright (C) 2015-2018  Antonio Ospite <ao2@ao2.it>
 
   This file is part of tweeper.
 
         <xsl:text>https://instagram.com</xsl:text>
     </xsl:variable>
 
-    <xsl:variable name="user-name" select="//ProfilePage/user/username"/>
+    <xsl:variable name="user-name" select="//ProfilePage/graphql/user/username"/>
+
+    <!--
+         NOTE: some users do not specify the full name.
+
+         Remember to handle this case when using it and fall-back to the plain
+         user name when appropriate.
+    -->
+    <xsl:variable name="full-name" select="//ProfilePage/graphql/user/full_name"/>
+
+    <xsl:variable name="location-name" select="//LocationsPage/graphql/location/name"/>
 
-    <!-- Some users do not specify the full name -->
-    <xsl:variable name="full-name" select="//ProfilePage/user/full_name"/>
     <xsl:variable name="screen-name">
         <xsl:choose>
+            <xsl:when test="$location-name != ''">
+                <xsl:variable name="location-latitude" select="//LocationsPage/location/lat"/>
+                <xsl:variable name="location-longitude" select="//LocationsPage/location/lng"/>
+                <xsl:value-of select="concat($location-name, ' (', $location-latitude, ', ', $location-longitude, ')')"/>
+            </xsl:when>
             <xsl:when test="$full-name != ''">
                 <xsl:value-of select="$full-name"/>
             </xsl:when>
         </xsl:choose>
     </xsl:variable>
 
-    <xsl:template match="//ProfilePage/user/media/nodes">
-        <xsl:variable name="item-content-image" select="./display_src"/>
-        <xsl:variable name="item-content-caption" select="./caption"/>
-        <xsl:variable name="item-permalink" select="concat($BaseURL, '/p/', ./code, '/')"/>
+    <xsl:template match="//edges/node">
+        <xsl:variable name="item-content-image" select="./display_url"/>
+        <xsl:variable name="item-content-caption" select="./edge_media_to_caption/edges/node/text"/>
+        <xsl:variable name="item-permalink" select="concat($BaseURL, '/p/', ./shortcode, '/')"/>
         <item>
             <title>
                 <xsl:variable name="title-length" select="140"/>
@@ -74,7 +87,7 @@
                 <xsl:value-of select="$item-permalink"/>
             </guid>
             <pubDate>
-                <xsl:variable name="timestamp" select="./date"/>
+                <xsl:variable name="timestamp" select="./taken_at_timestamp"/>
                 <xsl:value-of select="php:functionString('Tweeper\Tweeper::epochToRssDate', $timestamp)"/>
             </pubDate>
             <description>
     </xsl:template>
 
     <xsl:template match="/">
+
         <xsl:variable name="channel-title" select="concat('Instagram / ', $screen-name)"/>
-        <xsl:variable name="channel-link" select="concat($BaseURL, '/', $user-name)"/>
+        <xsl:variable name="channel-link">
+            <xsl:choose>
+                <xsl:when test="$location-name != ''">
+                    <xsl:variable name="location-id" select="//LocationsPage/graphql/location/id"/>
+                    <xsl:value-of select="concat($BaseURL, '/explore/locations/', $location-id)"/>
+                </xsl:when>
+                <xsl:otherwise>
+                    <xsl:value-of select="concat($BaseURL, '/', $user-name)"/>
+                </xsl:otherwise>
+            </xsl:choose>
+        </xsl:variable>
+        <xsl:variable name="channel-image" select="//ProfilePage/graphql/user/profile_pic_url"/>
 
         <rss version="2.0">
             <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
                     </xsl:if>
                     <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
                 </description>
-                <image>
-                    <title>
-                        <xsl:value-of select="$channel-title"/>
-                    </title>
-                    <link>
-                        <xsl:value-of select="$channel-link"/>
-                    </link>
-                    <url>
-                        <xsl:value-of select="//ProfilePage/user/profile_pic_url"/>
-                    </url>
-                </image>
-                <xsl:apply-templates select="//ProfilePage/user/media/nodes"/>
+                <xsl:if test="$channel-image != ''">
+                    <image>
+                        <title>
+                            <xsl:value-of select="$channel-title"/>
+                        </title>
+                        <link>
+                            <xsl:value-of select="$channel-link"/>
+                        </link>
+                        <url>
+                            <xsl:value-of select="$channel-image"/>
+                        </url>
+                    </image>
+                </xsl:if>
+                <xsl:apply-templates select="//ProfilePage/graphql/user/edge_owner_to_timeline_media/edges/node|//LocationsPage/graphql/location/edge_location_to_media/edges/node"/>
             </channel>
         </rss>
     </xsl:template>