+News for v1.2.0:
+================
+
+ * Add support for scraping Instagram location pages, like for example
+ https://www.instagram.com/explore/locations/833277432/
+ * Make scraping Instagram.com more robust
+ * Improve and fix scraping Facebook.com pages once again
+ * Add support for Twitter.com permalink URLs
+ * Make the generated Twitter.com feed mach more closely the original
+ content, now spaces and line wrap are preserved in feed reader which can
+ render the HTML code embedded in the <description/> element, this way
+ ASCII art tweets can be fully appreciated when read via tweeper.
+ Check out https://twitter.com/sarahjeong/status/955651919279722496
+
News for v1.1.0:
================
- The dependencies on the symphony components in composer.json could be more
relaxed like ">=2.7.0", but for now sticking to "2.7.*" is good enough.
+
+- Add support for instagram tags
return NULL;
}
- return Tweeper::jsonToXml($matches[1], 'instagram');
+ // The "qe" object contains elements which will result in invalid XML
+ // element names, so remove it.
+ $data = json_decode($matches[1], $assoc = TRUE);
+ unset($data["qe"]);
+ $json = json_encode($data);
+
+ return Tweeper::jsonToXml($json, 'instagram');
}
/**
name="page-id"
select="substring-after(//meta[@property='al:android:url']/@content, 'fb://page/')"/>
- <xsl:template match="//div[contains(@class, 'fbUserContent') or contains(@class, 'userContentWrapper')]">
+ <xsl:template match="//div[contains(@class, 'fbUserStory') or contains(@class, 'userContentWrapper')]">
<xsl:variable name="story-id" select=".//input[@name='ft_ent_identifier']/@value"/>
<xsl:variable
name="item-permalink"
<xsl:template match="/">
<xsl:variable name="channel-title" select="//title"/>
- <xsl:variable name="channel-link" select="//div[contains(@class, 'fbUserContent') or contains(@class, 'userContentWrapper')][1]//a[1]/@href"/>
- <xsl:variable name="channel-image" select="//div[contains(@class, 'fbUserContent') or contains(@class, 'userContentWrapper')][1]//a[1]//img/@src"/>
+ <xsl:variable name="channel-link" select="//meta[@property='og:url']/@content"/>
+ <xsl:variable name="channel-image" select="//meta[@property='og:image']/@content"/>
<rss version="2.0">
<xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
</link>
<description>
<xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
- <xsl:copy-of select="//div[@data-id='1']/node()"/>
+ <xsl:value-of select="//meta[@property='og:description']/@content"/>
<xsl:text disable-output-escaping="yes">]]></xsl:text>
</description>
<image>
<xsl:value-of select="$channel-image"/>
</url>
</image>
- <xsl:apply-templates select="//div[contains(@class, 'fbUserContent') or contains(@class, 'userContentWrapper')]"/>
+ <xsl:apply-templates select="//div[contains(@class, 'fbUserStory') or contains(@class, 'userContentWrapper')]"/>
</channel>
</rss>
</xsl:template>
<xsl:variable name="user-name" select="//ProfilePage/user/username"/>
- <!-- Some users do not specify the full name -->
+ <!--
+ NOTE: some users do not specify the full name.
+
+ Remember to handle this case when using it and fall-back to the plain
+ user name when appropriate.
+ -->
<xsl:variable name="full-name" select="//ProfilePage/user/full_name"/>
+
+ <xsl:variable name="location-name" select="//LocationsPage/location/name"/>
+
<xsl:variable name="screen-name">
<xsl:choose>
+ <xsl:when test="$location-name != ''">
+ <xsl:variable name="location-latitude" select="//LocationsPage/location/lat"/>
+ <xsl:variable name="location-longitude" select="//LocationsPage/location/lng"/>
+ <xsl:value-of select="concat($location-name, ' (', $location-latitude, ', ', $location-longitude, ')')"/>
+ </xsl:when>
<xsl:when test="$full-name != ''">
<xsl:value-of select="$full-name"/>
</xsl:when>
</xsl:choose>
</xsl:variable>
- <xsl:template match="//ProfilePage/user/media/nodes">
+ <xsl:template match="//media/nodes">
<xsl:variable name="item-content-image" select="./display_src"/>
<xsl:variable name="item-content-caption" select="./caption"/>
<xsl:variable name="item-permalink" select="concat($BaseURL, '/p/', ./code, '/')"/>
</xsl:template>
<xsl:template match="/">
+
<xsl:variable name="channel-title" select="concat('Instagram / ', $screen-name)"/>
- <xsl:variable name="channel-link" select="concat($BaseURL, '/', $user-name)"/>
+ <xsl:variable name="channel-link">
+ <xsl:choose>
+ <xsl:when test="$location-name != ''">
+ <xsl:variable name="location-id" select="//LocationsPage/location/id"/>
+ <xsl:value-of select="concat($BaseURL, '/explore/locations/', $location-id)"/>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="concat($BaseURL, '/', $user-name)"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:variable>
+ <xsl:variable name="channel-image" select="//ProfilePage/user/profile_pic_url"/>
<rss version="2.0">
<xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
</xsl:if>
<xsl:text disable-output-escaping="yes">]]></xsl:text>
</description>
- <image>
- <title>
- <xsl:value-of select="$channel-title"/>
- </title>
- <link>
- <xsl:value-of select="$channel-link"/>
- </link>
- <url>
- <xsl:value-of select="//ProfilePage/user/profile_pic_url"/>
- </url>
- </image>
- <xsl:apply-templates select="//ProfilePage/user/media/nodes"/>
+ <xsl:if test="$channel-image != ''">
+ <image>
+ <title>
+ <xsl:value-of select="$channel-title"/>
+ </title>
+ <link>
+ <xsl:value-of select="$channel-link"/>
+ </link>
+ <url>
+ <xsl:value-of select="$channel-image"/>
+ </url>
+ </image>
+ </xsl:if>
+ <xsl:apply-templates select="//ProfilePage/user/media/nodes|//LocationsPage/location/media/nodes"/>
</channel>
</rss>
</xsl:template>
<xsl:variable name="screen-name" select="//div[@class='user-actions btn-group not-following ']/@data-screen-name"/>
- <xsl:template match="//li[@data-item-id and @data-item-type='tweet']">
- <xsl:variable name="user-name" select=".//div[contains(@class, 'js-stream-tweet')]/@data-screen-name"/>
+ <xsl:template match="//div[@class='permalink-inner permalink-tweet-container'] | //li[@data-item-id and @data-item-type='tweet']">
+ <xsl:variable name="user-name" select=".//div[@data-tweet-id]/@data-screen-name"/>
<xsl:variable name="item-content" select=".//p[contains(@class, 'js-tweet-text')]"/>
<xsl:variable name="item-media" select=".//div[contains(@class, 'AdaptiveMedia-container')]"/>
<xsl:variable name="item-permalink" select="concat($BaseURL, .//div[@data-permalink-path]/@data-permalink-path)"/>
<xsl:value-of select="php:functionString('Tweeper\Tweeper::epochToRssDate', number($timestamp))"/>
</pubDate>
<description>
- <xsl:value-of select="concat($user-name, ': ')"/>
<xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
<xsl:if test="$item-has-video">
- <xsl:text>(Video) </xsl:text>
+ <xsl:text>(Video)</xsl:text>
+ <xsl:element name="br"/>
</xsl:if>
- <xsl:apply-templates select="$item-content/node()"/>
+ <xsl:element name="span">
+ <xsl:attribute name="style">white-space: pre-wrap;</xsl:attribute>
+ <xsl:apply-templates select="$item-content/node()"/>
+ </xsl:element>
<xsl:apply-templates select="$item-media/node()"/>
<xsl:text disable-output-escaping="yes">]]></xsl:text>
</description>
</url>
</image>
<xsl:apply-templates select="//ol[@id='stream-items-id']/li[@data-item-id and @data-item-type='tweet' and not(contains(@class, 'has-profile-promoted-tweet'))]"/>
+
+ <!-- These rules will only match on permalink URLs -->
+ <xsl:apply-templates select="//div[@class='permalink-inner permalink-tweet-container']"/>
+ <xsl:apply-templates select="//div[@data-component-context='replies']//li[@data-item-id and @data-item-type='tweet' and not(contains(@class, 'has-profile-promoted-tweet'))]"/>
+
</channel>
</rss>
</xsl:template>