Add support for Facebook.com public pages
authorAntonio Ospite <ao2@ao2.it>
Wed, 1 Jul 2015 11:47:53 +0000 (13:47 +0200)
committerAntonio Ospite <ao2@ao2.it>
Mon, 6 Jul 2015 14:59:18 +0000 (16:59 +0200)
README
rss_converter_facebook.com.xsl [new file with mode: 0644]
tweeper.1.asciidoc
tweeper.php

diff --git a/README b/README
index 8612636..d2d200b 100644 (file)
--- a/README
+++ b/README
@@ -35,6 +35,7 @@ The currently supported sites are:
   * Dilbert.com
   * Howtoons.com
   * Instagram.com
+  * Facebook.com (public pages)
 
 Tweeper can be used via web or as a command line program, for example as
 a filter in your feed reader, by passing the URL of the user's public timeline
diff --git a/rss_converter_facebook.com.xsl b/rss_converter_facebook.com.xsl
new file mode 100644 (file)
index 0000000..b50be3d
--- /dev/null
@@ -0,0 +1,115 @@
+<!--
+  Stylesheet to convert a Facebook public page to RSS.
+
+  Copyright (C) 2015  Antonio Ospite <ao2@ao2.it>
+
+  This file is part of tweeper.
+
+  This program is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+-->
+
+<!--
+  Since June 23rd, 2015 facebook.com deprecated the RSS feed endpoint for public pages:
+  https://developers.facebook.com/docs/apps/changelog#v2_3_90_day_deprecations
+
+  They suggest to use the Graph API but they fail to mention that it does not
+  work anymore without authentication, so it cannot be considered an
+  _equivalent_ solution.
+
+  Luckily we've got Tweeper!
+-->
+
+<xsl:stylesheet version="1.0"
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+    xmlns:php="http://php.net/xsl"
+    xsl:extension-element-prefixes="php"
+    exclude-result-prefixes="php">
+
+    <xsl:output method="xml" indent="yes"/>
+
+    <xsl:variable name="BaseURL">
+        <xsl:text>https://facebook.com</xsl:text>
+    </xsl:variable>
+
+    <xsl:template match="//div[contains(@class, 'userContentWrapper')]">
+        <xsl:variable name="item-content" select=".//div[contains(@class, 'userContent')]"/>
+        <xsl:variable name="item-permalink" select="concat($BaseURL, .//a[@target='']/@href)"/>
+        <item>
+            <title>
+                <xsl:variable name="item-title" select="$item-content/p"/>
+                <xsl:variable name="title-length" select="140"/>
+                <!-- ellipsize, inspired from http://stackoverflow.com/questions/13622338 -->
+                <xsl:choose>
+                    <xsl:when test="string-length($item-title) > $title-length">
+                        <xsl:variable name="truncated-length" select="$title-length - 3"/>
+                        <xsl:value-of select="substring($item-title, 1, $truncated-length)"/>
+                        <xsl:text>...</xsl:text>
+                    </xsl:when>
+                    <xsl:otherwise>
+                        <xsl:value-of select="$item-title"/>
+                    </xsl:otherwise>
+                </xsl:choose>
+            </title>
+            <link>
+                <xsl:value-of select="$item-permalink"/>
+            </link>
+            <guid>
+                <xsl:value-of select="$item-permalink"/>
+            </guid>
+            <pubDate>
+                <xsl:variable name="timestamp" select=".//abbr[@data-shorten]/@data-utime"/>
+                <xsl:value-of select="php:functionString('Tweeper::epoch_to_gmdate', number($timestamp))"/>
+            </pubDate>
+            <description>
+                <xsl:text disable-output-escaping="yes">&lt;![CDATA[</xsl:text>
+                <xsl:copy-of select="$item-content/node()"/>
+                <xsl:copy-of select=".//div[@class='mtm']/node()"/>
+                <xsl:text disable-output-escaping="yes">]]&gt;</xsl:text>
+            </description>
+        </item>
+    </xsl:template>
+
+    <xsl:template match="/">
+        <xsl:variable name="channel-title" select="//meta[@property='og:title']/@content"/>
+        <xsl:variable name="channel-link" select="//meta[@property='og:url']/@content"/>
+
+        <rss version="2.0">
+            <xsl:attribute name="xml:base"><xsl:value-of select="$BaseURL" /></xsl:attribute>
+            <channel>
+                <generator>Tweeper</generator>
+                <title>
+                    <xsl:value-of select="$channel-title"/>
+                </title>
+                <link>
+                    <xsl:value-of select="$channel-link"/>
+                </link>
+                <description>
+                    <xsl:value-of select="normalize-space(//meta[@property='og:description']/@content)"/>
+                </description>
+                <image>
+                    <title>
+                        <xsl:value-of select="$channel-title"/>
+                    </title>
+                    <link>
+                        <xsl:value-of select="$channel-link"/>
+                    </link>
+                    <url>
+                        <xsl:value-of select="//img[@class='profilePic img']/@src"/>
+                    </url>
+                </image>
+                <xsl:apply-templates select="//div[contains(@class, 'userContentWrapper')]"/>
+            </channel>
+        </rss>
+    </xsl:template>
+</xsl:stylesheet>
index d6ced90..2782dac 100644 (file)
@@ -30,6 +30,7 @@ The sites that tweeper is able to scrape and convert to RSS are:
 * Dilbert.com
 * Howtoons.com
 * Instagram.com
+* Facebook.com (public pages)
 
 tweeper can be used as:
 
index efc0fd6..5e9d242 100644 (file)
@@ -213,6 +213,12 @@ class Tweeper {
     return $this->json_to_xml($html, '/window._sharedData = (.*);/', 'instagram');
   }
 
+  private function preprocess_html_facebook_com($html) {
+    $html = str_replace('<!--', '', $html);
+    $html = str_replace('-->', '', $html);
+    return $html;
+  }
+
   private function html_to_xml($html, $host) {
     $xmlDoc = new DOMDocument();