rss_converter_instagram.com.xsl: fix scraping Instagram.com
authorAntonio Ospite <ao2@ao2.it>
Tue, 3 Apr 2018 16:08:59 +0000 (18:08 +0200)
committerAntonio Ospite <ao2@ao2.it>
Tue, 3 Apr 2018 16:08:59 +0000 (18:08 +0200)
src/rss_converter_instagram.com.xsl

index 238848f..4045224 100644 (file)
@@ -32,7 +32,7 @@
         <xsl:text>https://instagram.com</xsl:text>
     </xsl:variable>
 
         <xsl:text>https://instagram.com</xsl:text>
     </xsl:variable>
 
-    <xsl:variable name="user-name" select="//ProfilePage/user/username"/>
+    <xsl:variable name="user-name" select="//ProfilePage/graphql/user/username"/>
 
     <!--
          NOTE: some users do not specify the full name.
 
     <!--
          NOTE: some users do not specify the full name.
@@ -40,9 +40,9 @@
          Remember to handle this case when using it and fall-back to the plain
          user name when appropriate.
     -->
          Remember to handle this case when using it and fall-back to the plain
          user name when appropriate.
     -->
-    <xsl:variable name="full-name" select="//ProfilePage/user/full_name"/>
+    <xsl:variable name="full-name" select="//ProfilePage/graphql/user/full_name"/>
 
 
-    <xsl:variable name="location-name" select="//LocationsPage/location/name"/>
+    <xsl:variable name="location-name" select="//LocationsPage/graphql/location/name"/>
 
     <xsl:variable name="screen-name">
         <xsl:choose>
 
     <xsl:variable name="screen-name">
         <xsl:choose>
         </xsl:choose>
     </xsl:variable>
 
         </xsl:choose>
     </xsl:variable>
 
-    <xsl:template match="//media/nodes">
-        <xsl:variable name="item-content-image" select="./display_src"/>
-        <xsl:variable name="item-content-caption" select="./caption"/>
-        <xsl:variable name="item-permalink" select="concat($BaseURL, '/p/', ./code, '/')"/>
+    <xsl:template match="//edges/node">
+        <xsl:variable name="item-content-image" select="./display_url"/>
+        <xsl:variable name="item-content-caption" select="./edge_media_to_caption/edges/node/text"/>
+        <xsl:variable name="item-permalink" select="concat($BaseURL, '/p/', ./shortcode, '/')"/>
         <item>
             <title>
                 <xsl:variable name="title-length" select="140"/>
         <item>
             <title>
                 <xsl:variable name="title-length" select="140"/>
@@ -87,7 +87,7 @@
                 <xsl:value-of select="$item-permalink"/>
             </guid>
             <pubDate>
                 <xsl:value-of select="$item-permalink"/>
             </guid>
             <pubDate>
-                <xsl:variable name="timestamp" select="./date"/>
+                <xsl:variable name="timestamp" select="./taken_at_timestamp"/>
                 <xsl:value-of select="php:functionString('Tweeper\Tweeper::epochToRssDate', $timestamp)"/>
             </pubDate>
             <description>
                 <xsl:value-of select="php:functionString('Tweeper\Tweeper::epochToRssDate', $timestamp)"/>
             </pubDate>
             <description>
         <xsl:variable name="channel-link">
             <xsl:choose>
                 <xsl:when test="$location-name != ''">
         <xsl:variable name="channel-link">
             <xsl:choose>
                 <xsl:when test="$location-name != ''">
-                    <xsl:variable name="location-id" select="//LocationsPage/location/id"/>
+                    <xsl:variable name="location-id" select="//LocationsPage/graphql/location/id"/>
                     <xsl:value-of select="concat($BaseURL, '/explore/locations/', $location-id)"/>
                 </xsl:when>
                 <xsl:otherwise>
                     <xsl:value-of select="concat($BaseURL, '/explore/locations/', $location-id)"/>
                 </xsl:when>
                 <xsl:otherwise>
                 </xsl:otherwise>
             </xsl:choose>
         </xsl:variable>
                 </xsl:otherwise>
             </xsl:choose>
         </xsl:variable>
-        <xsl:variable name="channel-image" select="//ProfilePage/user/profile_pic_url"/>
+        <xsl:variable name="channel-image" select="//ProfilePage/graphql/user/profile_pic_url"/>
 
         <rss version="2.0">
             <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
 
         <rss version="2.0">
             <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
                         </url>
                     </image>
                 </xsl:if>
                         </url>
                     </image>
                 </xsl:if>
-                <xsl:apply-templates select="//ProfilePage/user/media/nodes|//LocationsPage/location/media/nodes"/>
+                <xsl:apply-templates select="//ProfilePage/graphql/user/edge_owner_to_timeline_media/edges/node|//LocationsPage/graphql/location/edge_location_to_media/edges/node"/>
             </channel>
         </rss>
     </xsl:template>
             </channel>
         </rss>
     </xsl:template>