From: Antonio Ospite <ao2@ao2.it> Date: Wed, 1 Jul 2015 11:47:53 +0000 (+0200) Subject: Add support for Facebook.com public pages X-Git-Tag: v0.4~20 X-Git-Url: https://git.ao2.it/tweeper.git/commitdiff_plain/481f2d015d14180be9982ced2f281494e0ec3855 Add support for Facebook.com public pages --- diff --git a/README b/README index 8612636..d2d200b 100644 --- a/README +++ b/README @@ -35,6 +35,7 @@ The currently supported sites are: * Dilbert.com * Howtoons.com * Instagram.com + * Facebook.com (public pages) Tweeper can be used via web or as a command line program, for example as a filter in your feed reader, by passing the URL of the user's public timeline diff --git a/rss_converter_facebook.com.xsl b/rss_converter_facebook.com.xsl new file mode 100644 index 0000000..b50be3d --- /dev/null +++ b/rss_converter_facebook.com.xsl @@ -0,0 +1,115 @@ +<!-- + Stylesheet to convert a Facebook public page to RSS. + + Copyright (C) 2015 Antonio Ospite <ao2@ao2.it> + + This file is part of tweeper. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +--> + +<!-- + Since June 23rd, 2015 facebook.com deprecated the RSS feed endpoint for public pages: + https://developers.facebook.com/docs/apps/changelog#v2_3_90_day_deprecations + + They suggest to use the Graph API but they fail to mention that it does not + work anymore without authentication, so it cannot be considered an + _equivalent_ solution. + + Luckily we've got Tweeper! +--> + +<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:php="http://php.net/xsl" + xsl:extension-element-prefixes="php" + exclude-result-prefixes="php"> + + <xsl:output method="xml" indent="yes"/> + + <xsl:variable name="BaseURL"> + <xsl:text>https://facebook.com</xsl:text> + </xsl:variable> + + <xsl:template match="//div[contains(@class, 'userContentWrapper')]"> + <xsl:variable name="item-content" select=".//div[contains(@class, 'userContent')]"/> + <xsl:variable name="item-permalink" select="concat($BaseURL, .//a[@target='']/@href)"/> + <item> + <title> + <xsl:variable name="item-title" select="$item-content/p"/> + <xsl:variable name="title-length" select="140"/> + <!-- ellipsize, inspired from http://stackoverflow.com/questions/13622338 --> + <xsl:choose> + <xsl:when test="string-length($item-title) > $title-length"> + <xsl:variable name="truncated-length" select="$title-length - 3"/> + <xsl:value-of select="substring($item-title, 1, $truncated-length)"/> + <xsl:text>...</xsl:text> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$item-title"/> + </xsl:otherwise> + </xsl:choose> + </title> + <link> + <xsl:value-of select="$item-permalink"/> + </link> + <guid> + <xsl:value-of select="$item-permalink"/> + </guid> + <pubDate> + <xsl:variable name="timestamp" select=".//abbr[@data-shorten]/@data-utime"/> + <xsl:value-of select="php:functionString('Tweeper::epoch_to_gmdate', number($timestamp))"/> + </pubDate> + <description> + <xsl:text disable-output-escaping="yes"><![CDATA[</xsl:text> + <xsl:copy-of select="$item-content/node()"/> + <xsl:copy-of select=".//div[@class='mtm']/node()"/> + <xsl:text disable-output-escaping="yes">]]></xsl:text> + </description> + </item> + </xsl:template> + + <xsl:template match="/"> + <xsl:variable name="channel-title" select="//meta[@property='og:title']/@content"/> + <xsl:variable name="channel-link" select="//meta[@property='og:url']/@content"/> + + <rss version="2.0"> + <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute> + <channel> + <generator>Tweeper</generator> + <title> + <xsl:value-of select="$channel-title"/> + </title> + <link> + <xsl:value-of select="$channel-link"/> + </link> + <description> + <xsl:value-of select="normalize-space(//meta[@property='og:description']/@content)"/> + </description> + <image> + <title> + <xsl:value-of select="$channel-title"/> + </title> + <link> + <xsl:value-of select="$channel-link"/> + </link> + <url> + <xsl:value-of select="//img[@class='profilePic img']/@src"/> + </url> + </image> + <xsl:apply-templates select="//div[contains(@class, 'userContentWrapper')]"/> + </channel> + </rss> + </xsl:template> +</xsl:stylesheet> diff --git a/tweeper.1.asciidoc b/tweeper.1.asciidoc index d6ced90..2782dac 100644 --- a/tweeper.1.asciidoc +++ b/tweeper.1.asciidoc @@ -30,6 +30,7 @@ The sites that tweeper is able to scrape and convert to RSS are: * Dilbert.com * Howtoons.com * Instagram.com +* Facebook.com (public pages) tweeper can be used as: diff --git a/tweeper.php b/tweeper.php index efc0fd6..5e9d242 100644 --- a/tweeper.php +++ b/tweeper.php @@ -213,6 +213,12 @@ class Tweeper { return $this->json_to_xml($html, '/window._sharedData = (.*);/', 'instagram'); } + private function preprocess_html_facebook_com($html) { + $html = str_replace('<!--', '', $html); + $html = str_replace('-->', '', $html); + return $html; + } + private function html_to_xml($html, $host) { $xmlDoc = new DOMDocument();