+++ /dev/null
-<!--
- Stylesheet to convert Dilbert daily strips to RSS.
-
- Copyright (C) 2013-2014 Antonio Ospite <ao2@ao2.it>
-
- This file is part of tweeper.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
--->
-
-<!--
- Since June 18, 2013 dilbert.com strips are not accessible anymore
- directly from the RSS feed, this message is displayed instead:
-
- Dilbert readers - Please visit Dilbert.com to read this feature. Due
- to changes with our feeds, we are now making this RSS feed a link to
- Dilbert.com.
-
- How unhandy is that, was it because of a management decision?
- Maybe a parody dilbert strip is needed about this issue...
--->
-
-<xsl:stylesheet version="1.0"
- xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- xmlns:php="http://php.net/xsl"
- xsl:extension-element-prefixes="php"
- exclude-result-prefixes="php">
-
- <xsl:output method="xml" indent="yes"/>
-
- <xsl:variable name="BaseURL" select="//meta[@property='og:url']/@content"/>
-
- <xsl:template match="//section[@class='comic-item']">
- <xsl:variable name="item-permalink" select=".//a[@class='img-comic-link']/@href"/>
- <xsl:variable name="picture-url" select=".//img[@class='img-responsive img-comic']/@src"/>
- <xsl:variable name="picture-title" select=".//img[@class='img-responsive img-comic']/@alt"/>
- <item>
- <title>
- <xsl:variable name="title-length" select="140"/>
- <!-- ellipsize, inspired from http://stackoverflow.com/questions/13622338 -->
- <xsl:choose>
- <xsl:when test="string-length($picture-title) > $title-length">
- <xsl:variable name="truncated-length" select="$title-length - 3"/>
- <xsl:value-of select="substring($picture-title, 1, $truncated-length)"/>
- <xsl:text>...</xsl:text>
- </xsl:when>
- <xsl:otherwise>
- <xsl:value-of select="$picture-title"/>
- </xsl:otherwise>
- </xsl:choose>
- </title>
- <link>
- <xsl:value-of select="$item-permalink"/>
- </link>
- <guid>
- <xsl:value-of select="$item-permalink"/>
- </guid>
- <pubDate>
- <xsl:value-of select="php:functionString('Tweeper::strToRssDate', normalize-space(.//date))"/>
- </pubDate>
- <description>
- <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
- <img src="{$picture-url}" alt="{$picture-title}"/>
- <xsl:text disable-output-escaping="yes">]]></xsl:text>
- </description>
- <xsl:if test="$generate-enclosure = 1">
- <xsl:copy-of select="php:functionString('Tweeper::generateEnclosure', $picture-url)"/>
- </xsl:if>
- </item>
- </xsl:template>
-
- <xsl:template match="/">
- <xsl:variable name="channel-title" select="//meta[@property='og:title']/@content"/>
- <xsl:variable name="channel-link" select="$BaseURL"/>
-
- <rss version="2.0">
- <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
- <channel>
- <generator>Tweeper</generator>
- <title>
- <xsl:value-of select="$channel-title"/>
- </title>
- <link>
- <xsl:value-of select="$channel-link"/>
- </link>
- <description>
- <xsl:value-of select="//meta[@property='og:description']/@content"/>
- </description>
- <image>
- <title>
- <xsl:value-of select="$channel-title"/>
- </title>
- <link>
- <xsl:value-of select="$channel-link"/>
- </link>
- <url>
- <xsl:value-of select="concat($BaseURL, //img[@alt='Dilbert logo']/@src)"/>
- </url>
- </image>
- <xsl:apply-templates select="//section[@class='comic-item']"/>
- </channel>
- </rss>
- </xsl:template>
-</xsl:stylesheet>
+++ /dev/null
-<!--
- Stylesheet to convert a Facebook public page to RSS.
-
- Copyright (C) 2015 Antonio Ospite <ao2@ao2.it>
-
- This file is part of tweeper.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
--->
-
-<!--
- Since June 23rd, 2015 facebook.com deprecated the RSS feed endpoint for public pages:
- https://developers.facebook.com/docs/apps/changelog#v2_3_90_day_deprecations
-
- They suggest to use the Graph API but they fail to mention that it does not
- work anymore without authentication, so it cannot be considered an
- _equivalent_ solution.
-
- Luckily we've got Tweeper!
--->
-
-<xsl:stylesheet version="1.0"
- xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- xmlns:php="http://php.net/xsl"
- xsl:extension-element-prefixes="php"
- exclude-result-prefixes="php">
-
- <xsl:output method="xml" indent="yes"/>
-
- <xsl:variable name="BaseURL">
- <xsl:text>https://facebook.com</xsl:text>
- </xsl:variable>
-
- <!--
- Extract the page id from an element like:
- <meta property="al:android:url" content="fb://page/793837197390834">
-
- The page id will be used to build the permalink.
- -->
- <xsl:variable
- name="page-id"
- select="substring-after(//meta[@property='al:android:url']/@content, 'fb://page/')"/>
-
- <xsl:template match="//div[contains(@class, 'userContentWrapper')]">
- <xsl:variable name="story-id" select=".//input[@name='ft_ent_identifier']/@value"/>
- <xsl:variable
- name="item-permalink"
- select="concat($BaseURL, '/permalink.php?id=', $page-id, '&story_fbid=', $story-id)"/>
-
- <!-- Get only the first child in order to skip the footer of the content -->
- <xsl:variable name="item-content" select="div[1]"/>
-
- <item>
- <title>
- <xsl:variable name="item-title" select="$item-content//p"/>
- <xsl:variable name="title-length" select="140"/>
- <!-- ellipsize, inspired from http://stackoverflow.com/questions/13622338 -->
- <xsl:choose>
- <xsl:when test="string-length($item-title) > $title-length">
- <xsl:variable name="truncated-length" select="$title-length - 3"/>
- <xsl:value-of select="substring($item-title, 1, $truncated-length)"/>
- <xsl:text>...</xsl:text>
- </xsl:when>
- <xsl:otherwise>
- <xsl:value-of select="$item-title"/>
- </xsl:otherwise>
- </xsl:choose>
- </title>
- <link>
- <xsl:value-of select="$item-permalink"/>
- </link>
- <guid>
- <xsl:value-of select="$item-permalink"/>
- </guid>
- <pubDate>
- <xsl:variable name="timestamp" select=".//abbr[@data-shorten]/@data-utime"/>
- <xsl:value-of select="php:functionString('Tweeper::epochToRssDate', $timestamp)"/>
- </pubDate>
- <description>
-
- <!--
- Get only the children starting from the one with class="userContent",
- this way the content header is skipped
- -->
- <xsl:variable
- name="usercontent-position"
- select="count($item-content/div[contains(@class, 'userContent')]/preceding-sibling::*) + 1"/>
-
- <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
- <xsl:copy-of select="$item-content/div[position() >= $usercontent-position]"/>
- <xsl:text disable-output-escaping="yes">]]></xsl:text>
- </description>
- </item>
- </xsl:template>
-
- <xsl:template match="/">
- <xsl:variable name="channel-title" select="//title"/>
- <xsl:variable name="channel-link" select="//div[contains(@class, 'userContentWrapper')][1]//a[1]/@href"/>
-
- <rss version="2.0">
- <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
- <channel>
- <generator>Tweeper</generator>
- <title>
- <xsl:value-of select="$channel-title"/>
- </title>
- <link>
- <xsl:value-of select="$channel-link"/>
- </link>
- <description>
- <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
- <xsl:copy-of select="//div[@data-id='1']/node()"/>
- <xsl:text disable-output-escaping="yes">]]></xsl:text>
- </description>
- <image>
- <title>
- <xsl:value-of select="$channel-title"/>
- </title>
- <link>
- <xsl:value-of select="$channel-link"/>
- </link>
- <url>
- <xsl:value-of select="//img[@class='profilePic img']/@src"/>
- </url>
- </image>
- <xsl:apply-templates select="//div[contains(@class, 'userContentWrapper')]"/>
- </channel>
- </rss>
- </xsl:template>
-</xsl:stylesheet>
+++ /dev/null
-<!--
- Stylesheet to convert Howtoons.com to RSS.
-
- Copyright (C) 2014 Antonio Ospite <ao2@ao2.it>
-
- This file is part of tweeper.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
--->
-
-<!--
- The RSS feed link is broken on http://howtoons.com so just work around it.
-
- Howtoons uses Wordpress, so maybe this style sheet can be used as a base for
- scraping other Wordpress sites.
--->
-
-<xsl:stylesheet version="1.0"
- xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- xmlns:php="http://php.net/xsl"
- xsl:extension-element-prefixes="php"
- exclude-result-prefixes="php">
-
- <xsl:output method="xml" indent="yes"/>
-
- <xsl:variable name="BaseURL">
- <xsl:text>http://howtoons.com</xsl:text>
- </xsl:variable>
-
- <xsl:template match="//div[contains(@id, 'post-')]">
- <xsl:variable name="item-permalink" select=".//div[@class='post-headline']//a/@href"/>
- <item>
- <title>
- <xsl:value-of select="normalize-space(.//div[@class='post-headline']//a)"/>
- </title>
- <link>
- <xsl:value-of select="$item-permalink"/>
- </link>
- <guid>
- <xsl:value-of select="$item-permalink"/>
- </guid>
- <pubDate>
- <xsl:variable name="date" select="substring-before(.//div[@class='post-byline'], ',')"/>
- <!-- date format is MM.DD.YY -->
- <xsl:variable name="month" select="substring($date, 1, 2)"/>
- <xsl:variable name="day" select="substring($date, 4, 2)"/>
- <xsl:variable name="year" select="substring($date, 7, 2)"/>
- <xsl:variable name="iso-date" select="concat('20', $year, '-', $month, '-', $day)"/>
- <xsl:value-of select="php:functionString('Tweeper::strToRssDate', $iso-date)"/>
- </pubDate>
- <description>
- <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
- <xsl:copy-of select=".//div[contains(@class, 'post-bodycopy')]/p"/>
- <xsl:text disable-output-escaping="yes">]]></xsl:text>
- </description>
- </item>
- </xsl:template>
-
- <xsl:template match="/">
- <xsl:variable name="channel-title" select="//title"/>
- <xsl:variable name="channel-link" select="$BaseURL"/>
-
- <rss version="2.0">
- <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
- <channel>
- <generator>Tweeper</generator>
- <title>
- <xsl:value-of select="$channel-title"/>
- </title>
- <link>
- <xsl:value-of select="$channel-link"/>
- </link>
- <description>
- <xsl:text>The world's greatest D.I.Y. comic website! Tools of mass construction!</xsl:text>
- </description>
- <image>
- <title>
- <xsl:value-of select="$channel-title"/>
- </title>
- <link>
- <xsl:value-of select="$channel-link"/>
- </link>
- <url>
- <xsl:text>http://www.howtoons.com/wp-content/themes/atahualpa/images/header/tuck1000.png</xsl:text>
- </url>
- </image>
- <xsl:apply-templates select="//div[contains(@id, 'post-')]"/>
- </channel>
- </rss>
- </xsl:template>
-</xsl:stylesheet>
+++ /dev/null
-rss_converter_pump.io.xsl
\ No newline at end of file
+++ /dev/null
-<!--
- Stylesheet to convert Instagram user timelines to RSS.
-
- Copyright (C) 2015 Antonio Ospite <ao2@ao2.it>
-
- This file is part of tweeper.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
--->
-<xsl:stylesheet version="1.0"
- xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- xmlns:php="http://php.net/xsl"
- xsl:extension-element-prefixes="php"
- exclude-result-prefixes="php">
-
- <xsl:param name="generate-enclosure"/>
-
- <xsl:output method="xml" indent="yes"/>
-
- <xsl:variable name="BaseURL">
- <xsl:text>https://instagram.com</xsl:text>
- </xsl:variable>
-
- <xsl:variable name="user-name" select="//ProfilePage/user/username"/>
-
- <!-- Some users do not specify the full name -->
- <xsl:variable name="full-name" select="//ProfilePage/user/full_name"/>
- <xsl:variable name="screen-name">
- <xsl:choose>
- <xsl:when test="$full-name != ''">
- <xsl:value-of select="$full-name"/>
- </xsl:when>
- <xsl:otherwise>
- <xsl:value-of select="$user-name"/>
- </xsl:otherwise>
- </xsl:choose>
- </xsl:variable>
-
- <xsl:template match="//ProfilePage/user/media/nodes">
- <xsl:variable name="item-content-image" select="./display_src"/>
- <xsl:variable name="item-content-caption" select="./caption"/>
- <xsl:variable name="item-permalink" select="concat($BaseURL, '/p/', ./code, '/')"/>
- <item>
- <title>
- <xsl:variable name="title-length" select="140"/>
- <xsl:variable name="item-content-title" select="normalize-space(concat($user-name, ': ', $item-content-caption))"/>
- <!-- ellipsize, inspired from http://stackoverflow.com/questions/13622338 -->
- <xsl:choose>
- <xsl:when test="string-length($item-content-title) > $title-length">
- <xsl:variable name="truncated-length" select="$title-length - 3"/>
- <xsl:value-of select="substring($item-content-title, 1, $truncated-length)"/>
- <xsl:text>...</xsl:text>
- </xsl:when>
- <xsl:otherwise>
- <xsl:value-of select="$item-content-title"/>
- </xsl:otherwise>
- </xsl:choose>
- </title>
- <link>
- <xsl:value-of select="$item-permalink"/>
- </link>
- <guid>
- <xsl:value-of select="$item-permalink"/>
- </guid>
- <pubDate>
- <xsl:variable name="timestamp" select="./date"/>
- <xsl:value-of select="php:functionString('Tweeper::epochToRssDate', $timestamp)"/>
- </pubDate>
- <description>
- <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
- <p>
- <xsl:if test="./is_video/text() = 1">
- (Video)
- </xsl:if>
- <xsl:value-of select="$item-content-caption"/>
- </p><br />
- <a href="{$item-permalink}"><img src="{$item-content-image}" style="max-width: 100%"/></a>
- <xsl:text disable-output-escaping="yes">]]></xsl:text>
- </description>
- <xsl:if test="$generate-enclosure = 1">
- <xsl:copy-of select="php:functionString('Tweeper::generateEnclosure', $item-content-image)"/>
- </xsl:if>
- </item>
- </xsl:template>
-
- <xsl:template match="/">
- <xsl:variable name="channel-title" select="concat('Instagram / ', $screen-name)"/>
- <xsl:variable name="channel-link" select="concat($BaseURL, '/', $user-name)"/>
-
- <rss version="2.0">
- <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
- <channel>
- <generator>Tweeper</generator>
- <title>
- <xsl:value-of select="$channel-title"/>
- </title>
- <link>
- <xsl:value-of select="$channel-link"/>
- </link>
- <description>
- <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
- <xsl:value-of select="normalize-space(concat($screen-name, '. ', //user/biography))"/>
- <xsl:variable name="external-url" select="//user/external_url"/>
- <xsl:if test="$external-url != ''">
- <xsl:text> </xsl:text><a href="{$external-url}"><xsl:value-of select="$external-url"/></a>
- </xsl:if>
- <xsl:text disable-output-escaping="yes">]]></xsl:text>
- </description>
- <image>
- <title>
- <xsl:value-of select="$channel-title"/>
- </title>
- <link>
- <xsl:value-of select="$channel-link"/>
- </link>
- <url>
- <xsl:value-of select="//ProfilePage/user/profile_pic_url"/>
- </url>
- </image>
- <xsl:apply-templates select="//ProfilePage/user/media/nodes"/>
- </channel>
- </rss>
- </xsl:template>
-</xsl:stylesheet>
+++ /dev/null
-<!--
- Stylesheet to convert Pump.io activity streams to RSS.
-
- Copyright (C) 2013-2014 Antonio Ospite <ao2@ao2.it>
-
- This file is part of tweeper.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
--->
-<!-- To Evan, please reconsider publishing RSS ouput for _public_ contents -->
-<xsl:stylesheet version="1.0"
- xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- xmlns:php="http://php.net/xsl"
- xsl:extension-element-prefixes="php"
- exclude-result-prefixes="php">
-
- <xsl:output method="xml" indent="yes"/>
-
- <xsl:variable name="domain-name" select="substring-after(//div[@id='profile-block']/@data-profile-id, '@')"/>
- <xsl:variable name="BaseURL" select="concat('https://', $domain-name)"/>
-
- <xsl:variable name="user-name" select="substring-after(//div[@id='profile-block']/@data-profile-id, ':')"/>
-
- <xsl:template match="//div[@id='user-content-activities']//ul[@id='major-stream']/li">
- <xsl:variable name="item-content" select=".//div[@class='activity-content']"/>
- <xsl:variable name="item-permalink" select=".//p[@class='muted']/small/a/@href"/>
- <item>
- <title>
- <xsl:value-of select="concat($user-name, ': ', normalize-space($item-content))"/>
- </title>
- <link>
- <xsl:value-of select="$item-permalink"/>
- </link>
- <guid>
- <xsl:value-of select="$item-permalink"/>
- </guid>
- <pubDate>
- <xsl:value-of select="php:functionString('Tweeper::strToRssDate', .//abbr[@class='easydate']/@title)"/>
- </pubDate>
- <description>
- <xsl:value-of select="concat($user-name, ': ')"/>
- <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
- <xsl:copy-of select="$item-content/node()"/>
- <xsl:text disable-output-escaping="yes">]]></xsl:text>
- </description>
- <xsl:if test="$generate-enclosure = 1">
- <xsl:variable name="image-thumb-link" select=".//img[contains(@class, 'object-image')]/@src"/>
- <xsl:if test="$image-thumb-link">
- <xsl:variable name="image-link" select="php:functionString('str_replace', '_thumb', '', $image-thumb-link)"/>
- <xsl:copy-of select="php:functionString('Tweeper::generateEnclosure', $image-link)"/>
- </xsl:if>
- </xsl:if>
- </item>
- </xsl:template>
-
- <xsl:template match="/">
- <xsl:variable name="channel-title" select="concat(substring-after($user-name, '@'), ' / ', substring-before($user-name, '@'))"/>
- <xsl:variable name="channel-link" select="concat('https://', substring-after($user-name, '@'), '/', substring-before($user-name, '@'))"/>
-
- <rss version="2.0">
- <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
- <channel>
- <generator>Tweeper</generator>
- <title>
- <xsl:value-of select="$channel-title"/>
- </title>
- <link>
- <xsl:value-of select="$channel-link"/>
- </link>
- <description>
- <xsl:value-of select="normalize-space(//h1[@class='media-header'])"/>
- </description>
- <image>
- <title>
- <xsl:value-of select="$channel-title"/>
- </title>
- <link>
- <xsl:value-of select="$channel-link"/>
- </link>
- <url>
- <xsl:value-of select="//div[@id='profile-block']/span/img[@class='img-rounded media-object']/@src"/>
- </url>
- </image>
- <xsl:apply-templates select="//div[@id='user-content-activities']//ul[@id='major-stream']/li"/>
- </channel>
- </rss>
- </xsl:template>
-</xsl:stylesheet>
+++ /dev/null
-<!--
- Stylesheet to convert Twitter user timelines to RSS.
-
- Copyright (C) 2013-2014 Antonio Ospite <ao2@ao2.it>
-
- This file is part of tweeper.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
--->
-<xsl:stylesheet version="1.0"
- xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- xmlns:php="http://php.net/xsl"
- xsl:extension-element-prefixes="php"
- exclude-result-prefixes="php">
-
- <xsl:param name="generate-enclosure"/>
-
- <xsl:output method="xml" indent="yes"/>
-
- <xsl:variable name="BaseURL">
- <xsl:text>https://twitter.com</xsl:text>
- </xsl:variable>
-
- <!-- Identity transform -->
- <xsl:template match="@*|node()">
- <xsl:copy>
- <xsl:apply-templates select="@*|node()"/>
- </xsl:copy>
- </xsl:template>
-
- <!--
- Anchors to external links provide the direct URL in the
- data-expanded-url attribute, so use this in the href attribute too
- instead of the default short URL which uses the t.co redirection
- service.
-
- NOTE: when creating an element, attributes must be processed _before_
- adding the contents (either children or a value):
- http://stackoverflow.com/questions/21984867/
- -->
- <xsl:template match="a[@data-expanded-url]">
- <!-- Prepend and append a white space for aestethic reasons -->
- <xsl:text> </xsl:text>
- <a>
- <xsl:attribute name="href">
- <xsl:value-of select="@data-expanded-url"/>
- </xsl:attribute>
- <!-- Also strip and … -->
- <xsl:value-of select="translate(., ' …', '')"/>
- </a>
- <xsl:text> </xsl:text>
- </xsl:template>
-
- <!--
- These are links to pic.twitter.com, use the direct link for those
- too instead of the t.co redirections.
- -->
- <xsl:template match="a[@data-pre-embedded='true']">
- <!-- Prepend and append a white space for aestethic reasons -->
- <xsl:text> </xsl:text>
- <a>
- <xsl:attribute name="href">
- <xsl:value-of select="concat('https://', .)"/>
- </xsl:attribute>
- <xsl:value-of select="concat('https://', .)"/>
- </a>
- <xsl:text> </xsl:text>
- </xsl:template>
-
- <!-- Present images in a more convenient way -->
- <xsl:template match="div[@data-image-url]">
- <a>
- <xsl:attribute name="href">
- <xsl:value-of select="concat(@data-image-url, ':orig')"/>
- </xsl:attribute>
- <img>
- <xsl:attribute name="src">
- <xsl:value-of select="@data-image-url"/>
- </xsl:attribute>
- </img>
- </a>
- </xsl:template>
-
- <!-- Don't repeat background in embedded media content -->
- <xsl:template match="div[contains(@class, 'PlayableMedia-player')]">
- <xsl:copy>
- <xsl:apply-templates select="@*"/>
- <xsl:attribute name="style">
- <xsl:value-of select="concat(@style, '; background-repeat: no-repeat')"/>
- </xsl:attribute>
- <xsl:apply-templates select="node()"/>
- </xsl:copy>
- </xsl:template>
-
- <xsl:template match="a[@data-expanded-url]" mode="enclosure">
- <xsl:copy-of select="php:functionString('Tweeper::generateEnclosure', ./@data-expanded-url)"/>
- </xsl:template>
-
- <xsl:template match="div[@data-image-url]" mode="enclosure">
- <xsl:copy-of select="php:functionString('Tweeper::generateEnclosure', concat(./@data-image-url, ':orig'))"/>
- </xsl:template>
-
- <xsl:variable name="screen-name" select="//div[@class='user-actions btn-group not-following ']/@data-screen-name"/>
-
- <xsl:template match="//li[@data-item-id and @data-item-type='tweet']">
- <xsl:variable name="user-name" select=".//div[contains(@class, 'js-stream-tweet')]/@data-screen-name"/>
- <xsl:variable name="item-content" select=".//p[contains(@class, 'js-tweet-text')]"/>
- <xsl:variable name="item-media" select=".//div[contains(@class, 'AdaptiveMedia-container')]"/>
- <xsl:variable name="item-permalink" select="concat($BaseURL, .//div[@data-permalink-path]/@data-permalink-path)"/>
-
- <xsl:variable name="item-has-video" select="$item-media//*[contains(@class, 'PlayableMedia--video')]"/>
- <item>
- <title>
- <xsl:value-of select="concat($user-name, ': ')"/>
- <xsl:if test="$item-has-video">
- <xsl:text>(Video) </xsl:text>
- </xsl:if>
- <!--
- Prepend a space in front of the URLs which are not
- preceded by an open parenthesis, for aestethic reasons.
- Also, regex, I know: http://xkcd.com/1171/
- -->
- <xsl:variable
- name="processed-title"
- select="php:functionString('preg_replace', '@((?<!\()(?:http[s]?://|pic.twitter.com))@', ' \1', $item-content)"/>
- <!-- Also strip and … -->
- <xsl:value-of select="normalize-space(translate($processed-title, ' …', ''))"/>
- </title>
- <link>
- <xsl:value-of select="$item-permalink"/>
- </link>
- <guid>
- <xsl:value-of select="$item-permalink"/>
- </guid>
- <pubDate>
- <xsl:variable name="timestamp" select=".//span[contains(@class, 'js-short-timestamp')]/@data-time"/>
- <xsl:value-of select="php:functionString('Tweeper::epochToRssDate', number($timestamp))"/>
- </pubDate>
- <description>
- <xsl:value-of select="concat($user-name, ': ')"/>
- <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
- <xsl:if test="$item-has-video">
- <xsl:text>(Video) </xsl:text>
- </xsl:if>
- <xsl:apply-templates select="$item-content/node()"/>
- <xsl:apply-templates select="$item-media/node()"/>
- <xsl:text disable-output-escaping="yes">]]></xsl:text>
- </description>
- <xsl:if test="$generate-enclosure = 1">
- <xsl:apply-templates select="$item-content//a[@data-expanded-url]" mode="enclosure"/>
- <xsl:apply-templates select="$item-media//div[@data-image-url]" mode="enclosure"/>
- </xsl:if>
- </item>
- </xsl:template>
-
- <xsl:template match="/">
- <xsl:variable name="channel-title">
- <xsl:choose>
- <xsl:when test="$screen-name != ''">
- <xsl:value-of select="concat('Twitter / ', $screen-name)"/>
- </xsl:when>
- <xsl:otherwise>
- <xsl:value-of select="concat('Twitter / ', normalize-space(//h1[1]))"/>
- </xsl:otherwise>
- </xsl:choose>
- </xsl:variable>
- <xsl:variable name="channel-link" select="//link[@rel='canonical']/@href"/>
-
- <rss version="2.0">
- <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
- <channel>
- <generator>Tweeper</generator>
- <title>
- <xsl:value-of select="$channel-title"/>
- </title>
- <link>
- <xsl:value-of select="$channel-link"/>
- </link>
- <description>
- <xsl:value-of select="normalize-space(//div[@class='ProfileHeaderCard'])"/>
- </description>
- <image>
- <title>
- <xsl:value-of select="$channel-title"/>
- </title>
- <link>
- <xsl:value-of select="$channel-link"/>
- </link>
- <url>
- <xsl:value-of select="//a[contains(@class, 'profile-picture')]/@href"/>
- </url>
- </image>
- <xsl:apply-templates select="//ol[@id='stream-items-id']/li[@data-item-id and @data-item-type='tweet']"/>
- </channel>
- </rss>
- </xsl:template>
-</xsl:stylesheet>
--- /dev/null
+<?php
+
+namespace Tweeper;
+
+/**
+ * @file
+ * Tweeper - a Twitter to RSS web scraper.
+ *
+ * Copyright (C) 2013-2015 Antonio Ospite <ao2@ao2.it>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+use DOMDocument;
+use XSLTProcessor;
+
+require_once 'Symfony/Component/Serializer/autoload.php';
+
+use Symfony\Component\Serializer\Serializer;
+use Symfony\Component\Serializer\Encoder\XmlEncoder;
+use Symfony\Component\Serializer\Normalizer\ObjectNormalizer;
+
+date_default_timezone_set('UTC');
+
+/**
+ * Scrape supported websites and perform conversion to RSS.
+ */
+class Tweeper {
+
+ private static $userAgent = "Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0";
+
+ /**
+ * Constructor sets up {@link $generate_enclosure}.
+ */
+ public function __construct($generate_enclosure = FALSE) {
+ $this->generate_enclosure = $generate_enclosure;
+ }
+
+ /**
+ * Convert numeric Epoch to the date format expected in a RSS document.
+ */
+ public static function epochToRssDate($timestamp) {
+ if (!is_numeric($timestamp) || is_nan($timestamp)) {
+ $timestamp = 0;
+ }
+
+ return gmdate(DATE_RSS, $timestamp);
+ }
+
+ /**
+ * Convert generic date string to the date format expected in a RSS document.
+ */
+ public static function strToRssDate($date) {
+ $timestamp = strtotime($date);
+ if (FALSE === $timestamp) {
+ $timestamp = 0;
+ }
+
+ return Tweeper::epochToRssDate($timestamp);
+ }
+
+ /**
+ * Convert string to UpperCamelCase.
+ */
+ public static function toUpperCamelCase($str, $delim = ' ') {
+ $str_upper = ucwords($str, $delim);
+ $str_camel_case = str_replace($delim, '', $str_upper);
+ return $str_camel_case;
+ }
+
+ /**
+ * Get the contents from a URL.
+ */
+ private static function getUrlContents($url) {
+ $ch = curl_init($url);
+ curl_setopt_array($ch, array(
+ CURLOPT_HEADER => FALSE,
+ // Follow http redirects to get the real URL.
+ CURLOPT_FOLLOWLOCATION => TRUE,
+ CURLOPT_RETURNTRANSFER => TRUE,
+ CURLOPT_SSL_VERIFYHOST => FALSE,
+ CURLOPT_SSL_VERIFYPEER => FALSE,
+ CURLOPT_HTTPHEADER => array('Accept-language: en'),
+ CURLOPT_USERAGENT => Tweeper::$userAgent,
+ ));
+ $contents = curl_exec($ch);
+ if (FALSE === $contents) {
+ trigger_error(curl_error($ch));
+ }
+ curl_close($ch);
+
+ return $contents;
+ }
+
+ /**
+ * Get the headers from a URL.
+ */
+ private static function getUrlInfo($url) {
+ $ch = curl_init($url);
+ curl_setopt_array($ch, array(
+ CURLOPT_HEADER => TRUE,
+ CURLOPT_NOBODY => TRUE,
+ // Follow http redirects to get the real URL.
+ CURLOPT_FOLLOWLOCATION => TRUE,
+ CURLOPT_RETURNTRANSFER => TRUE,
+ CURLOPT_SSL_VERIFYHOST => FALSE,
+ CURLOPT_SSL_VERIFYPEER => FALSE,
+ CURLOPT_USERAGENT => Tweeper::$userAgent,
+ ));
+ curl_exec($ch);
+ $url_info = curl_getinfo($ch);
+ if (FALSE === $url_info) {
+ trigger_error(curl_error($ch));
+ }
+ curl_close($ch);
+
+ return $url_info;
+ }
+
+ /**
+ * Generate an RSS <enclosure/> element.
+ */
+ public static function generateEnclosure($url) {
+ $supported_content_types = array(
+ "application/octet-stream",
+ "application/ogg",
+ "application/pdf",
+ "audio/aac",
+ "audio/mp4",
+ "audio/mpeg",
+ "audio/ogg",
+ "audio/vorbis",
+ "audio/wav",
+ "audio/webm",
+ "audio/x-midi",
+ "image/gif",
+ "image/jpeg",
+ "image/png",
+ "video/avi",
+ "video/mp4",
+ "video/mpeg",
+ "video/ogg",
+ );
+
+ $url_info = Tweeper::getUrlInfo($url);
+
+ $supported = in_array($url_info['content_type'], $supported_content_types);
+ if (!$supported) {
+ error_log("Unsupported enclosure content type \"" . $url_info['content_type'] . "\" for URL: " . $url_info['url']);
+ return '';
+ }
+
+ // The RSS specification says that the enclosure element URL must be http.
+ // See http://sourceforge.net/p/feedvalidator/bugs/72/
+ $http_url = preg_replace("/^https/", "http", $url_info['url']);
+
+ $dom = new DOMDocument();
+ $enc = $dom->createElement('enclosure');
+ $enc->setAttribute('url', $http_url);
+ $enc->setAttribute('length', $url_info['download_content_length']);
+ $enc->setAttribute('type', $url_info['content_type']);
+
+ return $enc;
+ }
+
+ /**
+ * Mimic the message from libxml.c::php_libxml_ctx_error_level()
+ */
+ private static function logXmlError($error) {
+ $output = "";
+
+ switch ($error->level) {
+ case LIBXML_ERR_WARNING:
+ $output .= "Warning $error->code: ";
+ break;
+
+ case LIBXML_ERR_ERROR:
+ $output .= "Error $error->code: ";
+ break;
+
+ case LIBXML_ERR_FATAL:
+ $output .= "Fatal Error $error->code: ";
+ break;
+ }
+
+ $output .= trim($error->message);
+
+ if ($error->file) {
+ $output .= " in $error->file";
+ }
+ else {
+ $output .= " in Entity,";
+ }
+
+ $output .= " line $error->line";
+
+ error_log($output);
+ }
+
+ /**
+ * Convert json to XML.
+ */
+ private static function jsonToXml($json, $root_node_name) {
+ // Apparently the ObjectNormalizer used afterwards is not able to handle
+ // the stdClass object created by json_decode() with the default setting
+ // $assoc = false; so use $assoc = true.
+ $data = json_decode($json, $assoc = TRUE);
+ if (!$data) {
+ return NULL;
+ }
+
+ $encoder = new XmlEncoder();
+ $normalizer = new ObjectNormalizer();
+ $serializer = new Serializer(array($normalizer), array($encoder));
+
+ $serializer_options = array(
+ 'xml_encoding' => "UTF-8",
+ 'xml_format_output' => TRUE,
+ 'xml_root_node_name' => $root_node_name,
+ );
+
+ $xml_data = $serializer->serialize($data, 'xml', $serializer_options);
+ if (!$xml_data) {
+ trigger_error("Cannot serialize data", E_USER_ERROR);
+ return NULL;
+ }
+
+ return $xml_data;
+ }
+
+ /**
+ * Convert the Instagram content to XML.
+ */
+ private function getXmlInstagramCom($html) {
+ // Extract the json data from the html code.
+ $json_match_expr = '/window._sharedData = (.*);/';
+ $ret = preg_match($json_match_expr, $html, $matches);
+ if ($ret !== 1) {
+ trigger_error("Cannot match expression: $json_match_expr\n", E_USER_ERROR);
+ return NULL;
+ }
+
+ return Tweeper::jsonToXml($matches[1], 'instagram');
+ }
+
+ /**
+ * Make the Facebook HTML processable.
+ */
+ private function preprocessHtmlFacebookCom($html) {
+ $html = str_replace('<!--', '', $html);
+ $html = str_replace('-->', '', $html);
+ return $html;
+ }
+
+ /**
+ * Convert the HTML retrieved from the site to XML.
+ */
+ private function htmlToXml($html, $host) {
+ $xmlDoc = new DOMDocument();
+
+ // Handle warnings and errors when loading invalid HTML.
+ $xml_errors_value = libxml_use_internal_errors(TRUE);
+
+ // If there is a host-specific method to get the XML data, use it!
+ $get_xml_host_method = 'getXml' . Tweeper::toUpperCamelCase($host, '.');
+ if (method_exists($this, $get_xml_host_method)) {
+ $xml_data = call_user_func_array(array($this, $get_xml_host_method), array($html));
+ $xmlDoc->loadXML($xml_data);
+ }
+ else {
+ $xmlDoc->loadHTML($html);
+ }
+
+ foreach (libxml_get_errors() as $xml_error) {
+ Tweeper::logXmlError($xml_error);
+ }
+ libxml_clear_errors();
+ libxml_use_internal_errors($xml_errors_value);
+
+ return $xmlDoc;
+ }
+
+ /**
+ * Load a stylesheet if the web site is supported.
+ */
+ private function loadStylesheet($host) {
+ $stylesheet = "file://" . __DIR__ . "/rss_converter_" . $host . ".xsl";
+ if (FALSE === file_exists($stylesheet)) {
+ trigger_error("Conversion to RSS not supported for $host ($stylesheet not found)", E_USER_ERROR);
+ return NULL;
+ }
+
+ $stylesheet_contents = Tweeper::getUrlContents($stylesheet);
+
+ $xslDoc = new DOMDocument();
+ $xslDoc->loadXML($stylesheet_contents);
+
+ $xsltProcessor = new XSLTProcessor();
+ $xsltProcessor->registerPHPFunctions();
+ $xsltProcessor->setParameter('', 'generate-enclosure', $this->generate_enclosure);
+ $xsltProcessor->importStylesheet($xslDoc);
+
+ return $xsltProcessor;
+ }
+
+ /**
+ * Convert the site content to RSS.
+ */
+ public function tweep($src_url) {
+ $url = parse_url($src_url);
+ if (FALSE === $url || empty($url["host"])) {
+ trigger_error("Invalid URL: $src_url", E_USER_ERROR);
+ return NULL;
+ }
+
+ $scheme = $url["scheme"];
+ if (!in_array($scheme, array("http", "https"))) {
+ trigger_error("unsupported scheme: $scheme", E_USER_ERROR);
+ return NULL;
+ }
+
+ // Strip the leading www. to be more forgiving on input URLs.
+ $host = preg_replace('/^www\./', '', $url["host"]);
+
+ $xsltProcessor = $this->loadStylesheet($host);
+ if (NULL === $xsltProcessor) {
+ return NULL;
+ }
+
+ $html = Tweeper::getUrlContents($src_url);
+ if (FALSE === $html) {
+ return NULL;
+ }
+
+ $preprocess_html_host_method = 'preprocessHtml' . Tweeper::toUpperCamelCase($host, '.');
+ if (method_exists($this, $preprocess_html_host_method)) {
+ $html = call_user_func_array(array($this, $preprocess_html_host_method), array($html));
+ }
+
+ $xmlDoc = $this->htmlToXml($html, $host);
+ if (NULL === $xmlDoc) {
+ return NULL;
+ }
+
+ $output = $xsltProcessor->transformToXML($xmlDoc);
+
+ if (FALSE === $output) {
+ trigger_error('XSL transformation failed.', E_USER_ERROR);
+ return NULL;
+ }
+ return $output;
+ }
+
+}
--- /dev/null
+<!--
+ Stylesheet to convert Dilbert daily strips to RSS.
+
+ Copyright (C) 2013-2014 Antonio Ospite <ao2@ao2.it>
+
+ This file is part of tweeper.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+-->
+
+<!--
+ Since June 18, 2013 dilbert.com strips are not accessible anymore
+ directly from the RSS feed, this message is displayed instead:
+
+ Dilbert readers - Please visit Dilbert.com to read this feature. Due
+ to changes with our feeds, we are now making this RSS feed a link to
+ Dilbert.com.
+
+ How unhandy is that, was it because of a management decision?
+ Maybe a parody dilbert strip is needed about this issue...
+-->
+
+<xsl:stylesheet version="1.0"
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:php="http://php.net/xsl"
+ xsl:extension-element-prefixes="php"
+ exclude-result-prefixes="php">
+
+ <xsl:output method="xml" indent="yes"/>
+
+ <xsl:variable name="BaseURL" select="//meta[@property='og:url']/@content"/>
+
+ <xsl:template match="//section[@class='comic-item']">
+ <xsl:variable name="item-permalink" select=".//a[@class='img-comic-link']/@href"/>
+ <xsl:variable name="picture-url" select=".//img[@class='img-responsive img-comic']/@src"/>
+ <xsl:variable name="picture-title" select=".//img[@class='img-responsive img-comic']/@alt"/>
+ <item>
+ <title>
+ <xsl:variable name="title-length" select="140"/>
+ <!-- ellipsize, inspired from http://stackoverflow.com/questions/13622338 -->
+ <xsl:choose>
+ <xsl:when test="string-length($picture-title) > $title-length">
+ <xsl:variable name="truncated-length" select="$title-length - 3"/>
+ <xsl:value-of select="substring($picture-title, 1, $truncated-length)"/>
+ <xsl:text>...</xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$picture-title"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </title>
+ <link>
+ <xsl:value-of select="$item-permalink"/>
+ </link>
+ <guid>
+ <xsl:value-of select="$item-permalink"/>
+ </guid>
+ <pubDate>
+ <xsl:value-of select="php:functionString('Tweeper::strToRssDate', normalize-space(.//date))"/>
+ </pubDate>
+ <description>
+ <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
+ <img src="{$picture-url}" alt="{$picture-title}"/>
+ <xsl:text disable-output-escaping="yes">]]></xsl:text>
+ </description>
+ <xsl:if test="$generate-enclosure = 1">
+ <xsl:copy-of select="php:functionString('Tweeper::generateEnclosure', $picture-url)"/>
+ </xsl:if>
+ </item>
+ </xsl:template>
+
+ <xsl:template match="/">
+ <xsl:variable name="channel-title" select="//meta[@property='og:title']/@content"/>
+ <xsl:variable name="channel-link" select="$BaseURL"/>
+
+ <rss version="2.0">
+ <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
+ <channel>
+ <generator>Tweeper</generator>
+ <title>
+ <xsl:value-of select="$channel-title"/>
+ </title>
+ <link>
+ <xsl:value-of select="$channel-link"/>
+ </link>
+ <description>
+ <xsl:value-of select="//meta[@property='og:description']/@content"/>
+ </description>
+ <image>
+ <title>
+ <xsl:value-of select="$channel-title"/>
+ </title>
+ <link>
+ <xsl:value-of select="$channel-link"/>
+ </link>
+ <url>
+ <xsl:value-of select="concat($BaseURL, //img[@alt='Dilbert logo']/@src)"/>
+ </url>
+ </image>
+ <xsl:apply-templates select="//section[@class='comic-item']"/>
+ </channel>
+ </rss>
+ </xsl:template>
+</xsl:stylesheet>
--- /dev/null
+<!--
+ Stylesheet to convert a Facebook public page to RSS.
+
+ Copyright (C) 2015 Antonio Ospite <ao2@ao2.it>
+
+ This file is part of tweeper.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+-->
+
+<!--
+ Since June 23rd, 2015 facebook.com deprecated the RSS feed endpoint for public pages:
+ https://developers.facebook.com/docs/apps/changelog#v2_3_90_day_deprecations
+
+ They suggest to use the Graph API but they fail to mention that it does not
+ work anymore without authentication, so it cannot be considered an
+ _equivalent_ solution.
+
+ Luckily we've got Tweeper!
+-->
+
+<xsl:stylesheet version="1.0"
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:php="http://php.net/xsl"
+ xsl:extension-element-prefixes="php"
+ exclude-result-prefixes="php">
+
+ <xsl:output method="xml" indent="yes"/>
+
+ <xsl:variable name="BaseURL">
+ <xsl:text>https://facebook.com</xsl:text>
+ </xsl:variable>
+
+ <!--
+ Extract the page id from an element like:
+ <meta property="al:android:url" content="fb://page/793837197390834">
+
+ The page id will be used to build the permalink.
+ -->
+ <xsl:variable
+ name="page-id"
+ select="substring-after(//meta[@property='al:android:url']/@content, 'fb://page/')"/>
+
+ <xsl:template match="//div[contains(@class, 'userContentWrapper')]">
+ <xsl:variable name="story-id" select=".//input[@name='ft_ent_identifier']/@value"/>
+ <xsl:variable
+ name="item-permalink"
+ select="concat($BaseURL, '/permalink.php?id=', $page-id, '&story_fbid=', $story-id)"/>
+
+ <!-- Get only the first child in order to skip the footer of the content -->
+ <xsl:variable name="item-content" select="div[1]"/>
+
+ <item>
+ <title>
+ <xsl:variable name="item-title" select="$item-content//p"/>
+ <xsl:variable name="title-length" select="140"/>
+ <!-- ellipsize, inspired from http://stackoverflow.com/questions/13622338 -->
+ <xsl:choose>
+ <xsl:when test="string-length($item-title) > $title-length">
+ <xsl:variable name="truncated-length" select="$title-length - 3"/>
+ <xsl:value-of select="substring($item-title, 1, $truncated-length)"/>
+ <xsl:text>...</xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$item-title"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </title>
+ <link>
+ <xsl:value-of select="$item-permalink"/>
+ </link>
+ <guid>
+ <xsl:value-of select="$item-permalink"/>
+ </guid>
+ <pubDate>
+ <xsl:variable name="timestamp" select=".//abbr[@data-shorten]/@data-utime"/>
+ <xsl:value-of select="php:functionString('Tweeper::epochToRssDate', $timestamp)"/>
+ </pubDate>
+ <description>
+
+ <!--
+ Get only the children starting from the one with class="userContent",
+ this way the content header is skipped
+ -->
+ <xsl:variable
+ name="usercontent-position"
+ select="count($item-content/div[contains(@class, 'userContent')]/preceding-sibling::*) + 1"/>
+
+ <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
+ <xsl:copy-of select="$item-content/div[position() >= $usercontent-position]"/>
+ <xsl:text disable-output-escaping="yes">]]></xsl:text>
+ </description>
+ </item>
+ </xsl:template>
+
+ <xsl:template match="/">
+ <xsl:variable name="channel-title" select="//title"/>
+ <xsl:variable name="channel-link" select="//div[contains(@class, 'userContentWrapper')][1]//a[1]/@href"/>
+
+ <rss version="2.0">
+ <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
+ <channel>
+ <generator>Tweeper</generator>
+ <title>
+ <xsl:value-of select="$channel-title"/>
+ </title>
+ <link>
+ <xsl:value-of select="$channel-link"/>
+ </link>
+ <description>
+ <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
+ <xsl:copy-of select="//div[@data-id='1']/node()"/>
+ <xsl:text disable-output-escaping="yes">]]></xsl:text>
+ </description>
+ <image>
+ <title>
+ <xsl:value-of select="$channel-title"/>
+ </title>
+ <link>
+ <xsl:value-of select="$channel-link"/>
+ </link>
+ <url>
+ <xsl:value-of select="//img[@class='profilePic img']/@src"/>
+ </url>
+ </image>
+ <xsl:apply-templates select="//div[contains(@class, 'userContentWrapper')]"/>
+ </channel>
+ </rss>
+ </xsl:template>
+</xsl:stylesheet>
--- /dev/null
+<!--
+ Stylesheet to convert Howtoons.com to RSS.
+
+ Copyright (C) 2014 Antonio Ospite <ao2@ao2.it>
+
+ This file is part of tweeper.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+-->
+
+<!--
+ The RSS feed link is broken on http://howtoons.com so just work around it.
+
+ Howtoons uses Wordpress, so maybe this style sheet can be used as a base for
+ scraping other Wordpress sites.
+-->
+
+<xsl:stylesheet version="1.0"
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:php="http://php.net/xsl"
+ xsl:extension-element-prefixes="php"
+ exclude-result-prefixes="php">
+
+ <xsl:output method="xml" indent="yes"/>
+
+ <xsl:variable name="BaseURL">
+ <xsl:text>http://howtoons.com</xsl:text>
+ </xsl:variable>
+
+ <xsl:template match="//div[contains(@id, 'post-')]">
+ <xsl:variable name="item-permalink" select=".//div[@class='post-headline']//a/@href"/>
+ <item>
+ <title>
+ <xsl:value-of select="normalize-space(.//div[@class='post-headline']//a)"/>
+ </title>
+ <link>
+ <xsl:value-of select="$item-permalink"/>
+ </link>
+ <guid>
+ <xsl:value-of select="$item-permalink"/>
+ </guid>
+ <pubDate>
+ <xsl:variable name="date" select="substring-before(.//div[@class='post-byline'], ',')"/>
+ <!-- date format is MM.DD.YY -->
+ <xsl:variable name="month" select="substring($date, 1, 2)"/>
+ <xsl:variable name="day" select="substring($date, 4, 2)"/>
+ <xsl:variable name="year" select="substring($date, 7, 2)"/>
+ <xsl:variable name="iso-date" select="concat('20', $year, '-', $month, '-', $day)"/>
+ <xsl:value-of select="php:functionString('Tweeper::strToRssDate', $iso-date)"/>
+ </pubDate>
+ <description>
+ <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
+ <xsl:copy-of select=".//div[contains(@class, 'post-bodycopy')]/p"/>
+ <xsl:text disable-output-escaping="yes">]]></xsl:text>
+ </description>
+ </item>
+ </xsl:template>
+
+ <xsl:template match="/">
+ <xsl:variable name="channel-title" select="//title"/>
+ <xsl:variable name="channel-link" select="$BaseURL"/>
+
+ <rss version="2.0">
+ <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
+ <channel>
+ <generator>Tweeper</generator>
+ <title>
+ <xsl:value-of select="$channel-title"/>
+ </title>
+ <link>
+ <xsl:value-of select="$channel-link"/>
+ </link>
+ <description>
+ <xsl:text>The world's greatest D.I.Y. comic website! Tools of mass construction!</xsl:text>
+ </description>
+ <image>
+ <title>
+ <xsl:value-of select="$channel-title"/>
+ </title>
+ <link>
+ <xsl:value-of select="$channel-link"/>
+ </link>
+ <url>
+ <xsl:text>http://www.howtoons.com/wp-content/themes/atahualpa/images/header/tuck1000.png</xsl:text>
+ </url>
+ </image>
+ <xsl:apply-templates select="//div[contains(@id, 'post-')]"/>
+ </channel>
+ </rss>
+ </xsl:template>
+</xsl:stylesheet>
--- /dev/null
+rss_converter_pump.io.xsl
\ No newline at end of file
--- /dev/null
+<!--
+ Stylesheet to convert Instagram user timelines to RSS.
+
+ Copyright (C) 2015 Antonio Ospite <ao2@ao2.it>
+
+ This file is part of tweeper.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+-->
+<xsl:stylesheet version="1.0"
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:php="http://php.net/xsl"
+ xsl:extension-element-prefixes="php"
+ exclude-result-prefixes="php">
+
+ <xsl:param name="generate-enclosure"/>
+
+ <xsl:output method="xml" indent="yes"/>
+
+ <xsl:variable name="BaseURL">
+ <xsl:text>https://instagram.com</xsl:text>
+ </xsl:variable>
+
+ <xsl:variable name="user-name" select="//ProfilePage/user/username"/>
+
+ <!-- Some users do not specify the full name -->
+ <xsl:variable name="full-name" select="//ProfilePage/user/full_name"/>
+ <xsl:variable name="screen-name">
+ <xsl:choose>
+ <xsl:when test="$full-name != ''">
+ <xsl:value-of select="$full-name"/>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$user-name"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:variable>
+
+ <xsl:template match="//ProfilePage/user/media/nodes">
+ <xsl:variable name="item-content-image" select="./display_src"/>
+ <xsl:variable name="item-content-caption" select="./caption"/>
+ <xsl:variable name="item-permalink" select="concat($BaseURL, '/p/', ./code, '/')"/>
+ <item>
+ <title>
+ <xsl:variable name="title-length" select="140"/>
+ <xsl:variable name="item-content-title" select="normalize-space(concat($user-name, ': ', $item-content-caption))"/>
+ <!-- ellipsize, inspired from http://stackoverflow.com/questions/13622338 -->
+ <xsl:choose>
+ <xsl:when test="string-length($item-content-title) > $title-length">
+ <xsl:variable name="truncated-length" select="$title-length - 3"/>
+ <xsl:value-of select="substring($item-content-title, 1, $truncated-length)"/>
+ <xsl:text>...</xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$item-content-title"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </title>
+ <link>
+ <xsl:value-of select="$item-permalink"/>
+ </link>
+ <guid>
+ <xsl:value-of select="$item-permalink"/>
+ </guid>
+ <pubDate>
+ <xsl:variable name="timestamp" select="./date"/>
+ <xsl:value-of select="php:functionString('Tweeper::epochToRssDate', $timestamp)"/>
+ </pubDate>
+ <description>
+ <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
+ <p>
+ <xsl:if test="./is_video/text() = 1">
+ (Video)
+ </xsl:if>
+ <xsl:value-of select="$item-content-caption"/>
+ </p><br />
+ <a href="{$item-permalink}"><img src="{$item-content-image}" style="max-width: 100%"/></a>
+ <xsl:text disable-output-escaping="yes">]]></xsl:text>
+ </description>
+ <xsl:if test="$generate-enclosure = 1">
+ <xsl:copy-of select="php:functionString('Tweeper::generateEnclosure', $item-content-image)"/>
+ </xsl:if>
+ </item>
+ </xsl:template>
+
+ <xsl:template match="/">
+ <xsl:variable name="channel-title" select="concat('Instagram / ', $screen-name)"/>
+ <xsl:variable name="channel-link" select="concat($BaseURL, '/', $user-name)"/>
+
+ <rss version="2.0">
+ <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
+ <channel>
+ <generator>Tweeper</generator>
+ <title>
+ <xsl:value-of select="$channel-title"/>
+ </title>
+ <link>
+ <xsl:value-of select="$channel-link"/>
+ </link>
+ <description>
+ <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
+ <xsl:value-of select="normalize-space(concat($screen-name, '. ', //user/biography))"/>
+ <xsl:variable name="external-url" select="//user/external_url"/>
+ <xsl:if test="$external-url != ''">
+ <xsl:text> </xsl:text><a href="{$external-url}"><xsl:value-of select="$external-url"/></a>
+ </xsl:if>
+ <xsl:text disable-output-escaping="yes">]]></xsl:text>
+ </description>
+ <image>
+ <title>
+ <xsl:value-of select="$channel-title"/>
+ </title>
+ <link>
+ <xsl:value-of select="$channel-link"/>
+ </link>
+ <url>
+ <xsl:value-of select="//ProfilePage/user/profile_pic_url"/>
+ </url>
+ </image>
+ <xsl:apply-templates select="//ProfilePage/user/media/nodes"/>
+ </channel>
+ </rss>
+ </xsl:template>
+</xsl:stylesheet>
--- /dev/null
+<!--
+ Stylesheet to convert Pump.io activity streams to RSS.
+
+ Copyright (C) 2013-2014 Antonio Ospite <ao2@ao2.it>
+
+ This file is part of tweeper.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+-->
+<!-- To Evan, please reconsider publishing RSS ouput for _public_ contents -->
+<xsl:stylesheet version="1.0"
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:php="http://php.net/xsl"
+ xsl:extension-element-prefixes="php"
+ exclude-result-prefixes="php">
+
+ <xsl:output method="xml" indent="yes"/>
+
+ <xsl:variable name="domain-name" select="substring-after(//div[@id='profile-block']/@data-profile-id, '@')"/>
+ <xsl:variable name="BaseURL" select="concat('https://', $domain-name)"/>
+
+ <xsl:variable name="user-name" select="substring-after(//div[@id='profile-block']/@data-profile-id, ':')"/>
+
+ <xsl:template match="//div[@id='user-content-activities']//ul[@id='major-stream']/li">
+ <xsl:variable name="item-content" select=".//div[@class='activity-content']"/>
+ <xsl:variable name="item-permalink" select=".//p[@class='muted']/small/a/@href"/>
+ <item>
+ <title>
+ <xsl:value-of select="concat($user-name, ': ', normalize-space($item-content))"/>
+ </title>
+ <link>
+ <xsl:value-of select="$item-permalink"/>
+ </link>
+ <guid>
+ <xsl:value-of select="$item-permalink"/>
+ </guid>
+ <pubDate>
+ <xsl:value-of select="php:functionString('Tweeper::strToRssDate', .//abbr[@class='easydate']/@title)"/>
+ </pubDate>
+ <description>
+ <xsl:value-of select="concat($user-name, ': ')"/>
+ <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
+ <xsl:copy-of select="$item-content/node()"/>
+ <xsl:text disable-output-escaping="yes">]]></xsl:text>
+ </description>
+ <xsl:if test="$generate-enclosure = 1">
+ <xsl:variable name="image-thumb-link" select=".//img[contains(@class, 'object-image')]/@src"/>
+ <xsl:if test="$image-thumb-link">
+ <xsl:variable name="image-link" select="php:functionString('str_replace', '_thumb', '', $image-thumb-link)"/>
+ <xsl:copy-of select="php:functionString('Tweeper::generateEnclosure', $image-link)"/>
+ </xsl:if>
+ </xsl:if>
+ </item>
+ </xsl:template>
+
+ <xsl:template match="/">
+ <xsl:variable name="channel-title" select="concat(substring-after($user-name, '@'), ' / ', substring-before($user-name, '@'))"/>
+ <xsl:variable name="channel-link" select="concat('https://', substring-after($user-name, '@'), '/', substring-before($user-name, '@'))"/>
+
+ <rss version="2.0">
+ <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
+ <channel>
+ <generator>Tweeper</generator>
+ <title>
+ <xsl:value-of select="$channel-title"/>
+ </title>
+ <link>
+ <xsl:value-of select="$channel-link"/>
+ </link>
+ <description>
+ <xsl:value-of select="normalize-space(//h1[@class='media-header'])"/>
+ </description>
+ <image>
+ <title>
+ <xsl:value-of select="$channel-title"/>
+ </title>
+ <link>
+ <xsl:value-of select="$channel-link"/>
+ </link>
+ <url>
+ <xsl:value-of select="//div[@id='profile-block']/span/img[@class='img-rounded media-object']/@src"/>
+ </url>
+ </image>
+ <xsl:apply-templates select="//div[@id='user-content-activities']//ul[@id='major-stream']/li"/>
+ </channel>
+ </rss>
+ </xsl:template>
+</xsl:stylesheet>
--- /dev/null
+<!--
+ Stylesheet to convert Twitter user timelines to RSS.
+
+ Copyright (C) 2013-2014 Antonio Ospite <ao2@ao2.it>
+
+ This file is part of tweeper.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+-->
+<xsl:stylesheet version="1.0"
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:php="http://php.net/xsl"
+ xsl:extension-element-prefixes="php"
+ exclude-result-prefixes="php">
+
+ <xsl:param name="generate-enclosure"/>
+
+ <xsl:output method="xml" indent="yes"/>
+
+ <xsl:variable name="BaseURL">
+ <xsl:text>https://twitter.com</xsl:text>
+ </xsl:variable>
+
+ <!-- Identity transform -->
+ <xsl:template match="@*|node()">
+ <xsl:copy>
+ <xsl:apply-templates select="@*|node()"/>
+ </xsl:copy>
+ </xsl:template>
+
+ <!--
+ Anchors to external links provide the direct URL in the
+ data-expanded-url attribute, so use this in the href attribute too
+ instead of the default short URL which uses the t.co redirection
+ service.
+
+ NOTE: when creating an element, attributes must be processed _before_
+ adding the contents (either children or a value):
+ http://stackoverflow.com/questions/21984867/
+ -->
+ <xsl:template match="a[@data-expanded-url]">
+ <!-- Prepend and append a white space for aestethic reasons -->
+ <xsl:text> </xsl:text>
+ <a>
+ <xsl:attribute name="href">
+ <xsl:value-of select="@data-expanded-url"/>
+ </xsl:attribute>
+ <!-- Also strip and … -->
+ <xsl:value-of select="translate(., ' …', '')"/>
+ </a>
+ <xsl:text> </xsl:text>
+ </xsl:template>
+
+ <!--
+ These are links to pic.twitter.com, use the direct link for those
+ too instead of the t.co redirections.
+ -->
+ <xsl:template match="a[@data-pre-embedded='true']">
+ <!-- Prepend and append a white space for aestethic reasons -->
+ <xsl:text> </xsl:text>
+ <a>
+ <xsl:attribute name="href">
+ <xsl:value-of select="concat('https://', .)"/>
+ </xsl:attribute>
+ <xsl:value-of select="concat('https://', .)"/>
+ </a>
+ <xsl:text> </xsl:text>
+ </xsl:template>
+
+ <!-- Present images in a more convenient way -->
+ <xsl:template match="div[@data-image-url]">
+ <a>
+ <xsl:attribute name="href">
+ <xsl:value-of select="concat(@data-image-url, ':orig')"/>
+ </xsl:attribute>
+ <img>
+ <xsl:attribute name="src">
+ <xsl:value-of select="@data-image-url"/>
+ </xsl:attribute>
+ </img>
+ </a>
+ </xsl:template>
+
+ <!-- Don't repeat background in embedded media content -->
+ <xsl:template match="div[contains(@class, 'PlayableMedia-player')]">
+ <xsl:copy>
+ <xsl:apply-templates select="@*"/>
+ <xsl:attribute name="style">
+ <xsl:value-of select="concat(@style, '; background-repeat: no-repeat')"/>
+ </xsl:attribute>
+ <xsl:apply-templates select="node()"/>
+ </xsl:copy>
+ </xsl:template>
+
+ <xsl:template match="a[@data-expanded-url]" mode="enclosure">
+ <xsl:copy-of select="php:functionString('Tweeper::generateEnclosure', ./@data-expanded-url)"/>
+ </xsl:template>
+
+ <xsl:template match="div[@data-image-url]" mode="enclosure">
+ <xsl:copy-of select="php:functionString('Tweeper::generateEnclosure', concat(./@data-image-url, ':orig'))"/>
+ </xsl:template>
+
+ <xsl:variable name="screen-name" select="//div[@class='user-actions btn-group not-following ']/@data-screen-name"/>
+
+ <xsl:template match="//li[@data-item-id and @data-item-type='tweet']">
+ <xsl:variable name="user-name" select=".//div[contains(@class, 'js-stream-tweet')]/@data-screen-name"/>
+ <xsl:variable name="item-content" select=".//p[contains(@class, 'js-tweet-text')]"/>
+ <xsl:variable name="item-media" select=".//div[contains(@class, 'AdaptiveMedia-container')]"/>
+ <xsl:variable name="item-permalink" select="concat($BaseURL, .//div[@data-permalink-path]/@data-permalink-path)"/>
+
+ <xsl:variable name="item-has-video" select="$item-media//*[contains(@class, 'PlayableMedia--video')]"/>
+ <item>
+ <title>
+ <xsl:value-of select="concat($user-name, ': ')"/>
+ <xsl:if test="$item-has-video">
+ <xsl:text>(Video) </xsl:text>
+ </xsl:if>
+ <!--
+ Prepend a space in front of the URLs which are not
+ preceded by an open parenthesis, for aestethic reasons.
+ Also, regex, I know: http://xkcd.com/1171/
+ -->
+ <xsl:variable
+ name="processed-title"
+ select="php:functionString('preg_replace', '@((?<!\()(?:http[s]?://|pic.twitter.com))@', ' \1', $item-content)"/>
+ <!-- Also strip and … -->
+ <xsl:value-of select="normalize-space(translate($processed-title, ' …', ''))"/>
+ </title>
+ <link>
+ <xsl:value-of select="$item-permalink"/>
+ </link>
+ <guid>
+ <xsl:value-of select="$item-permalink"/>
+ </guid>
+ <pubDate>
+ <xsl:variable name="timestamp" select=".//span[contains(@class, 'js-short-timestamp')]/@data-time"/>
+ <xsl:value-of select="php:functionString('Tweeper::epochToRssDate', number($timestamp))"/>
+ </pubDate>
+ <description>
+ <xsl:value-of select="concat($user-name, ': ')"/>
+ <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text>
+ <xsl:if test="$item-has-video">
+ <xsl:text>(Video) </xsl:text>
+ </xsl:if>
+ <xsl:apply-templates select="$item-content/node()"/>
+ <xsl:apply-templates select="$item-media/node()"/>
+ <xsl:text disable-output-escaping="yes">]]></xsl:text>
+ </description>
+ <xsl:if test="$generate-enclosure = 1">
+ <xsl:apply-templates select="$item-content//a[@data-expanded-url]" mode="enclosure"/>
+ <xsl:apply-templates select="$item-media//div[@data-image-url]" mode="enclosure"/>
+ </xsl:if>
+ </item>
+ </xsl:template>
+
+ <xsl:template match="/">
+ <xsl:variable name="channel-title">
+ <xsl:choose>
+ <xsl:when test="$screen-name != ''">
+ <xsl:value-of select="concat('Twitter / ', $screen-name)"/>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="concat('Twitter / ', normalize-space(//h1[1]))"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:variable>
+ <xsl:variable name="channel-link" select="//link[@rel='canonical']/@href"/>
+
+ <rss version="2.0">
+ <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
+ <channel>
+ <generator>Tweeper</generator>
+ <title>
+ <xsl:value-of select="$channel-title"/>
+ </title>
+ <link>
+ <xsl:value-of select="$channel-link"/>
+ </link>
+ <description>
+ <xsl:value-of select="normalize-space(//div[@class='ProfileHeaderCard'])"/>
+ </description>
+ <image>
+ <title>
+ <xsl:value-of select="$channel-title"/>
+ </title>
+ <link>
+ <xsl:value-of select="$channel-link"/>
+ </link>
+ <url>
+ <xsl:value-of select="//a[contains(@class, 'profile-picture')]/@href"/>
+ </url>
+ </image>
+ <xsl:apply-templates select="//ol[@id='stream-items-id']/li[@data-item-id and @data-item-type='tweet']"/>
+ </channel>
+ </rss>
+ </xsl:template>
+</xsl:stylesheet>
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-require_once 'Symfony/Component/Serializer/autoload.php';
+require_once 'src/Tweeper.php';
-use Symfony\Component\Serializer\Serializer;
-use Symfony\Component\Serializer\Encoder\XmlEncoder;
-use Symfony\Component\Serializer\Normalizer\ObjectNormalizer;
+use Tweeper\Tweeper;
date_default_timezone_set('UTC');
/**
- * Scrape supported websites and perform conversion to RSS.
- */
-class Tweeper {
-
- private static $userAgent = "Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0";
-
- /**
- * Constructor sets up {@link $generate_enclosure}.
- */
- public function __construct($generate_enclosure = FALSE) {
- $this->generate_enclosure = $generate_enclosure;
- }
-
- /**
- * Convert numeric Epoch to the date format expected in a RSS document.
- */
- public static function epochToRssDate($timestamp) {
- if (!is_numeric($timestamp) || is_nan($timestamp)) {
- $timestamp = 0;
- }
-
- return gmdate(DATE_RSS, $timestamp);
- }
-
- /**
- * Convert generic date string to the date format expected in a RSS document.
- */
- public static function strToRssDate($date) {
- $timestamp = strtotime($date);
- if (FALSE === $timestamp) {
- $timestamp = 0;
- }
-
- return Tweeper::epochToRssDate($timestamp);
- }
-
- /**
- * Convert string to UpperCamelCase.
- */
- public static function toUpperCamelCase($str, $delim = ' ') {
- $str_upper = ucwords($str, $delim);
- $str_camel_case = str_replace($delim, '', $str_upper);
- return $str_camel_case;
- }
-
- /**
- * Get the contents from a URL.
- */
- private static function getUrlContents($url) {
- $ch = curl_init($url);
- curl_setopt_array($ch, array(
- CURLOPT_HEADER => FALSE,
- // Follow http redirects to get the real URL.
- CURLOPT_FOLLOWLOCATION => TRUE,
- CURLOPT_RETURNTRANSFER => TRUE,
- CURLOPT_SSL_VERIFYHOST => FALSE,
- CURLOPT_SSL_VERIFYPEER => FALSE,
- CURLOPT_HTTPHEADER => array('Accept-language: en'),
- CURLOPT_USERAGENT => Tweeper::$userAgent,
- ));
- $contents = curl_exec($ch);
- if (FALSE === $contents) {
- trigger_error(curl_error($ch));
- }
- curl_close($ch);
-
- return $contents;
- }
-
- /**
- * Get the headers from a URL.
- */
- private static function getUrlInfo($url) {
- $ch = curl_init($url);
- curl_setopt_array($ch, array(
- CURLOPT_HEADER => TRUE,
- CURLOPT_NOBODY => TRUE,
- // Follow http redirects to get the real URL.
- CURLOPT_FOLLOWLOCATION => TRUE,
- CURLOPT_RETURNTRANSFER => TRUE,
- CURLOPT_SSL_VERIFYHOST => FALSE,
- CURLOPT_SSL_VERIFYPEER => FALSE,
- CURLOPT_USERAGENT => Tweeper::$userAgent,
- ));
- curl_exec($ch);
- $url_info = curl_getinfo($ch);
- if (FALSE === $url_info) {
- trigger_error(curl_error($ch));
- }
- curl_close($ch);
-
- return $url_info;
- }
-
- /**
- * Generate an RSS <enclosure/> element.
- */
- public static function generateEnclosure($url) {
- $supported_content_types = array(
- "application/octet-stream",
- "application/ogg",
- "application/pdf",
- "audio/aac",
- "audio/mp4",
- "audio/mpeg",
- "audio/ogg",
- "audio/vorbis",
- "audio/wav",
- "audio/webm",
- "audio/x-midi",
- "image/gif",
- "image/jpeg",
- "image/png",
- "video/avi",
- "video/mp4",
- "video/mpeg",
- "video/ogg",
- );
-
- $url_info = Tweeper::getUrlInfo($url);
-
- $supported = in_array($url_info['content_type'], $supported_content_types);
- if (!$supported) {
- error_log("Unsupported enclosure content type \"" . $url_info['content_type'] . "\" for URL: " . $url_info['url']);
- return '';
- }
-
- // The RSS specification says that the enclosure element URL must be http.
- // See http://sourceforge.net/p/feedvalidator/bugs/72/
- $http_url = preg_replace("/^https/", "http", $url_info['url']);
-
- $dom = new DOMDocument();
- $enc = $dom->createElement('enclosure');
- $enc->setAttribute('url', $http_url);
- $enc->setAttribute('length', $url_info['download_content_length']);
- $enc->setAttribute('type', $url_info['content_type']);
-
- return $enc;
- }
-
- /**
- * Mimic the message from libxml.c::php_libxml_ctx_error_level()
- */
- private static function logXmlError($error) {
- $output = "";
-
- switch ($error->level) {
- case LIBXML_ERR_WARNING:
- $output .= "Warning $error->code: ";
- break;
-
- case LIBXML_ERR_ERROR:
- $output .= "Error $error->code: ";
- break;
-
- case LIBXML_ERR_FATAL:
- $output .= "Fatal Error $error->code: ";
- break;
- }
-
- $output .= trim($error->message);
-
- if ($error->file) {
- $output .= " in $error->file";
- }
- else {
- $output .= " in Entity,";
- }
-
- $output .= " line $error->line";
-
- error_log($output);
- }
-
- /**
- * Convert json to XML.
- */
- private static function jsonToXml($json, $root_node_name) {
- // Apparently the ObjectNormalizer used afterwards is not able to handle
- // the stdClass object created by json_decode() with the default setting
- // $assoc = false; so use $assoc = true.
- $data = json_decode($json, $assoc = TRUE);
- if (!$data) {
- return NULL;
- }
-
- $encoder = new XmlEncoder();
- $normalizer = new ObjectNormalizer();
- $serializer = new Serializer(array($normalizer), array($encoder));
-
- $serializer_options = array(
- 'xml_encoding' => "UTF-8",
- 'xml_format_output' => TRUE,
- 'xml_root_node_name' => $root_node_name,
- );
-
- $xml_data = $serializer->serialize($data, 'xml', $serializer_options);
- if (!$xml_data) {
- trigger_error("Cannot serialize data", E_USER_ERROR);
- return NULL;
- }
-
- return $xml_data;
- }
-
- /**
- * Convert the Instagram content to XML.
- */
- private function getXmlInstagramCom($html) {
- // Extract the json data from the html code.
- $json_match_expr = '/window._sharedData = (.*);/';
- $ret = preg_match($json_match_expr, $html, $matches);
- if ($ret !== 1) {
- trigger_error("Cannot match expression: $json_match_expr\n", E_USER_ERROR);
- return NULL;
- }
-
- return Tweeper::jsonToXml($matches[1], 'instagram');
- }
-
- /**
- * Make the Facebook HTML processable.
- */
- private function preprocessHtmlFacebookCom($html) {
- $html = str_replace('<!--', '', $html);
- $html = str_replace('-->', '', $html);
- return $html;
- }
-
- /**
- * Convert the HTML retrieved from the site to XML.
- */
- private function htmlToXml($html, $host) {
- $xmlDoc = new DOMDocument();
-
- // Handle warnings and errors when loading invalid HTML.
- $xml_errors_value = libxml_use_internal_errors(TRUE);
-
- // If there is a host-specific method to get the XML data, use it!
- $get_xml_host_method = 'getXml' . Tweeper::toUpperCamelCase($host, '.');
- if (method_exists($this, $get_xml_host_method)) {
- $xml_data = call_user_func_array(array($this, $get_xml_host_method), array($html));
- $xmlDoc->loadXML($xml_data);
- }
- else {
- $xmlDoc->loadHTML($html);
- }
-
- foreach (libxml_get_errors() as $xml_error) {
- Tweeper::logXmlError($xml_error);
- }
- libxml_clear_errors();
- libxml_use_internal_errors($xml_errors_value);
-
- return $xmlDoc;
- }
-
- /**
- * Load a stylesheet if the web site is supported.
- */
- private function loadStylesheet($host) {
- $stylesheet = "file://" . __DIR__ . "/rss_converter_" . $host . ".xsl";
- if (FALSE === file_exists($stylesheet)) {
- trigger_error("Conversion to RSS not supported for $host ($stylesheet not found)", E_USER_ERROR);
- return NULL;
- }
-
- $stylesheet_contents = Tweeper::getUrlContents($stylesheet);
-
- $xslDoc = new DOMDocument();
- $xslDoc->loadXML($stylesheet_contents);
-
- $xsltProcessor = new XSLTProcessor();
- $xsltProcessor->registerPHPFunctions();
- $xsltProcessor->setParameter('', 'generate-enclosure', $this->generate_enclosure);
- $xsltProcessor->importStylesheet($xslDoc);
-
- return $xsltProcessor;
- }
-
- /**
- * Convert the site content to RSS.
- */
- public function tweep($src_url) {
- $url = parse_url($src_url);
- if (FALSE === $url || empty($url["host"])) {
- trigger_error("Invalid URL: $src_url", E_USER_ERROR);
- return NULL;
- }
-
- $scheme = $url["scheme"];
- if (!in_array($scheme, array("http", "https"))) {
- trigger_error("unsupported scheme: $scheme", E_USER_ERROR);
- return NULL;
- }
-
- // Strip the leading www. to be more forgiving on input URLs.
- $host = preg_replace('/^www\./', '', $url["host"]);
-
- $xsltProcessor = $this->loadStylesheet($host);
- if (NULL === $xsltProcessor) {
- return NULL;
- }
-
- $html = Tweeper::getUrlContents($src_url);
- if (FALSE === $html) {
- return NULL;
- }
-
- $preprocess_html_host_method = 'preprocessHtml' . Tweeper::toUpperCamelCase($host, '.');
- if (method_exists($this, $preprocess_html_host_method)) {
- $html = call_user_func_array(array($this, $preprocess_html_host_method), array($html));
- }
-
- $xmlDoc = $this->htmlToXml($html, $host);
- if (NULL === $xmlDoc) {
- return NULL;
- }
-
- $output = $xsltProcessor->transformToXML($xmlDoc);
-
- if (FALSE === $output) {
- trigger_error('XSL transformation failed.', E_USER_ERROR);
- return NULL;
- }
- return $output;
- }
-
-}
-
-/**
* Check if the script is being run from the command line.
*/
function is_cli() {