From 234c30c2df9c044a85c778d29dc55a83724fd9a0 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Sun, 6 Nov 2016 09:43:06 +0100 Subject: [PATCH 01/16] TODO: add a note about the version of the dependencies in composer.json --- TODO | 3 +++ 1 file changed, 3 insertions(+) diff --git a/TODO b/TODO index 31b9e41..3c71811 100644 --- a/TODO +++ b/TODO @@ -5,3 +5,6 @@ - check the encoding of the tweets when UTF is used, maybe solvable with mb_convert_encoding()? See http://php.net/manual/en/domdocument.loadhtml.php + +- The dependencies on the symphony components in composer.json could be more + relaxed like ">=2.7.0", but for now sticking to "2.7.*" is good enough. -- 2.1.4 From 901618842d5be25fa21cee16a1e123568ca117dc Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Sun, 6 Nov 2016 10:06:19 +0100 Subject: [PATCH 02/16] autoload.php: improve the comment about the system-wide dependencies --- autoload.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autoload.php b/autoload.php index 1e11adf..4ba7832 100644 --- a/autoload.php +++ b/autoload.php @@ -73,7 +73,7 @@ else { /* * 2. load the system-wide autoloader from php-symphony-serializer * - * This allows to run tweeper without composer, provided that the + * This allows to run tweeper without composer, as long as the Symphony * dependencies are available system-wide. * * For example, the Debian package takes care of that. -- 2.1.4 From acf3e387ca460a9c5d5ecaaf14079553cce63971 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Sat, 10 Dec 2016 19:34:57 +0100 Subject: [PATCH 03/16] tweeper: allow running tweeper from vendor/bin also when it's not a symlink --- tweeper | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tweeper b/tweeper index 6256e20..d4b04e3 100755 --- a/tweeper +++ b/tweeper @@ -6,4 +6,17 @@ * CLI file to run tweeper. */ -require dirname(__FILE__) . '/tweeper.php'; +if (preg_match('/' . preg_quote('/vendor/bin', '/') . '$/', __DIR__)) { + /* + * This covers the case of tweeper running from a "vendor/bin" directory in + * a composer setup, but with the tweeper executable _not_ being a symlink. + * + * This can happen when the filesystem does not support symlinks. + */ + $package_name = 'ao2/tweeper'; + require __DIR__ . '/../' . $package_name . '/tweeper.php'; +} +else { + /* For the other cases look at the autoload.php required by tweeper.php */ + require __DIR__ . '/tweeper.php'; +} -- 2.1.4 From 26c0c241140cf859141313bca3758fa52da8a8ca Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Sat, 10 Dec 2016 21:57:38 +0100 Subject: [PATCH 04/16] Makefile: fix installation after the code restructuring --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4625aa8..7937d41 100644 --- a/Makefile +++ b/Makefile @@ -21,9 +21,10 @@ installdocs: docs install: installdocs install -d $(DESTDIR)$(TWEEPER_DIR) - install -m644 *.xsl $(DESTDIR)$(TWEEPER_DIR) install -m644 *.php $(DESTDIR)$(TWEEPER_DIR) install -m755 tweeper $(DESTDIR)$(TWEEPER_DIR) + install -d $(DESTDIR)$(TWEEPER_DIR)/src + install -m644 src/* $(DESTDIR)$(TWEEPER_DIR)/src install -d $(DESTDIR)$(BIN_DIR) ln -sf $(TWEEPER_DIR)/tweeper $(DESTDIR)$(BIN_DIR)/tweeper @echo -e "\n\nINSTALLATION COMPLETE" -- 2.1.4 From 3035fd6142ce3459fb7ba28c2f56a7c44fc47dd5 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Sat, 10 Dec 2016 21:59:19 +0100 Subject: [PATCH 05/16] Makefile: make the symlink in BIN_DIR refer to the executable in DESTDIR Also make the symlink relative, this way it is always valid whether DESTDIR is specified or not. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7937d41..0102bc2 100644 --- a/Makefile +++ b/Makefile @@ -26,6 +26,6 @@ install: installdocs install -d $(DESTDIR)$(TWEEPER_DIR)/src install -m644 src/* $(DESTDIR)$(TWEEPER_DIR)/src install -d $(DESTDIR)$(BIN_DIR) - ln -sf $(TWEEPER_DIR)/tweeper $(DESTDIR)$(BIN_DIR)/tweeper + ln -rsf $(DESTDIR)$(TWEEPER_DIR)/tweeper $(DESTDIR)$(BIN_DIR)/tweeper @echo -e "\n\nINSTALLATION COMPLETE" @echo -e "Make sure '$(PHP_SCRIPT_DIR)' is in PHP include_path!\n" -- 2.1.4 From 637c14afc01074bc957c771db666643ef2249c56 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Sat, 10 Dec 2016 22:01:47 +0100 Subject: [PATCH 06/16] Makefile: mention DESTDIR in the "INSTALLATION COMPLETE" message --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0102bc2..eff450a 100644 --- a/Makefile +++ b/Makefile @@ -28,4 +28,4 @@ install: installdocs install -d $(DESTDIR)$(BIN_DIR) ln -rsf $(DESTDIR)$(TWEEPER_DIR)/tweeper $(DESTDIR)$(BIN_DIR)/tweeper @echo -e "\n\nINSTALLATION COMPLETE" - @echo -e "Make sure '$(PHP_SCRIPT_DIR)' is in PHP include_path!\n" + @echo -e "Make sure '$(DESTDIR)$(PHP_SCRIPT_DIR)' is in PHP include_path!\n" -- 2.1.4 From 2e380be4bd841ca87362a6573adc51f0333a1760 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Sun, 11 Dec 2016 00:38:14 +0100 Subject: [PATCH 07/16] composer.json: make the dependencies on symfony components more relaxed --- composer.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/composer.json b/composer.json index abc1538..d490494 100644 --- a/composer.json +++ b/composer.json @@ -19,8 +19,8 @@ "ext-dom": "*", "ext-json": "*", "ext-xsl": "*", - "symfony/serializer": "2.7.*", - "symfony/property-access": "2.7.*" + "symfony/serializer": ">=2.7.0", + "symfony/property-access": ">=2.7.0" }, "autoload": { "psr-4": { "Tweeper\\": "src/" } -- 2.1.4 From e686dd8220ae668e8c828c0ade7264468e78f7b3 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Sun, 11 Dec 2016 10:23:20 +0100 Subject: [PATCH 08/16] NEWS: add release notes for the v1.0.0 release The release numbering scheme has been changed to match what composer expects. --- NEWS | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/NEWS b/NEWS index d125dd5..6fccebb 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,15 @@ +News for v1.0.0: +================ + + * Support "application/octet-stream" as an enclosure content type + * Support "application/pdf" as an enclosure content type + * Fix information leakage by validating the URL scheme + * Code restructuring to make it easier to use tweeper as a library in other + projects + * Allow installing tweeper via composer, the packagist page is at: + https://packagist.org/packages/ao2/tweeper + * Misc robustness fixes + News for v0.6: ============== -- 2.1.4 From 0ea00a5f93f7a0ab1c52020ab75aa15bd190497f Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Thu, 9 Feb 2017 00:52:00 +0100 Subject: [PATCH 09/16] rss_converter_facebook.com.xsl: new wrapper classname Facebook seems to have changed the classname of the wrapping div from "userContentWrapper" to "fbUserContent". --- src/rss_converter_facebook.com.xsl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rss_converter_facebook.com.xsl b/src/rss_converter_facebook.com.xsl index 933d3d2..bff19d3 100644 --- a/src/rss_converter_facebook.com.xsl +++ b/src/rss_converter_facebook.com.xsl @@ -52,7 +52,7 @@ name="page-id" select="substring-after(//meta[@property='al:android:url']/@content, 'fb://page/')"/> - + - + @@ -134,7 +134,7 @@ - + -- 2.1.4 From 1e4bae7fc60b93a284aee6b5bdce038345b33e83 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Thu, 9 Feb 2017 15:49:59 +0100 Subject: [PATCH 10/16] rss_converter_facebook.com.xsl: fix the URL of the channel image --- src/rss_converter_facebook.com.xsl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/rss_converter_facebook.com.xsl b/src/rss_converter_facebook.com.xsl index bff19d3..fc4f272 100644 --- a/src/rss_converter_facebook.com.xsl +++ b/src/rss_converter_facebook.com.xsl @@ -107,6 +107,7 @@ + @@ -131,7 +132,7 @@ - + -- 2.1.4 From c00b4af91e650a23612cf94067606ec5851e6204 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Thu, 9 Feb 2017 16:48:55 +0100 Subject: [PATCH 11/16] Tweeper.php: allow to pass parameters to Tweeper::tweep() This allows to call Tweeper::tweep() on file:// URLs which can make development faster. --- src/Tweeper.php | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/Tweeper.php b/src/Tweeper.php index 93ac9e0..8ac2fe3 100644 --- a/src/Tweeper.php +++ b/src/Tweeper.php @@ -315,21 +315,30 @@ class Tweeper { /** * Convert the site content to RSS. */ - public function tweep($src_url) { + public function tweep($src_url, $host=NULL, $validate_scheme=TRUE) { $url = parse_url($src_url); - if (FALSE === $url || empty($url["host"])) { + if (FALSE === $url) { trigger_error("Invalid URL: $src_url", E_USER_ERROR); return NULL; } - $scheme = $url["scheme"]; - if (!in_array($scheme, array("http", "https"))) { - trigger_error("unsupported scheme: $scheme", E_USER_ERROR); - return NULL; + if (TRUE === $validate_scheme) { + $scheme = $url["scheme"]; + if (!in_array($scheme, array("http", "https"))) { + trigger_error("unsupported scheme: $scheme", E_USER_ERROR); + return NULL; + } } - // Strip the leading www. to be more forgiving on input URLs. - $host = preg_replace('/^www\./', '', $url["host"]); + // if the host is not given derive it from the URL + if (NULL === $host) { + if (empty($url["host"])) { + trigger_error("Invalid host in URL: $src_url", E_USER_ERROR); + return NULL; + } + // Strip the leading www. to be more forgiving on input URLs. + $host = preg_replace('/^www\./', '', $url["host"]); + } $xsltProcessor = $this->loadStylesheet($host); if (NULL === $xsltProcessor) { -- 2.1.4 From b54460b67e0ad7a8b4c49b6ff15dba68ac575b1a Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Thu, 9 Feb 2017 18:15:54 +0100 Subject: [PATCH 12/16] Add the helper script tests/fetch_facebook_page.sh The script helps retrieving the actual html of a public page on facebook.com, ignoring the pages which require the CAPTCHA. This allows to have a local copy of the page to test tweeper on. --- tests/fetch_facebook_page.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100755 tests/fetch_facebook_page.sh diff --git a/tests/fetch_facebook_page.sh b/tests/fetch_facebook_page.sh new file mode 100755 index 0000000..f25966e --- /dev/null +++ b/tests/fetch_facebook_page.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Facebook requires a CAPTCHA most of the times, so keep fetching the URL as +# long as needed, until the page is shown with no CAPTCHA. + +set -e + +USER_AGENT="Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0"; + +while true; +do + # Force language to en-us to make sure that the string matching works + OUTPUT=$(wget -nv --user-agent="$USER_AGENT" --header='Accept-Language: en-us' -O - -- "$1") + if echo $OUTPUT | grep -q -v "Security Check Required"; + then + echo "$OUTPUT" > facebook.html + break + fi + sleep 5 +done -- 2.1.4 From 128e753561ab887cbf4b6d04900d05b88fd4027a Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Thu, 9 Feb 2017 18:21:17 +0100 Subject: [PATCH 13/16] Add the helper script tests/tweeper_file The script allows to scrape a local file, this speeds up development and testing. --- tests/tweeper_file | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100755 tests/tweeper_file diff --git a/tests/tweeper_file b/tests/tweeper_file new file mode 100755 index 0000000..15de10c --- /dev/null +++ b/tests/tweeper_file @@ -0,0 +1,25 @@ +#!/usr/bin/env php + \n"; + +if ($argc < 3) { + fwrite(STDERR, $usage); + exit(1); +} + +$file_url = 'file://' . realpath($argv[1]); +$host = $argv[2]; + +$tweeper = new Tweeper(); +$output = $tweeper->tweep($file_url, $host, false); +if (is_null($output)) { + exit(1); +} +echo $output; -- 2.1.4 From fe993292dedb4b5e6444f778218b8a02fc84e69b Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Tue, 14 Feb 2017 09:41:35 +0100 Subject: [PATCH 14/16] HACKING: add instructions about installing the Drupal style in PHP_CodeSniffer --- HACKING | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/HACKING b/HACKING index 14fba7e..8345eaf 100644 --- a/HACKING +++ b/HACKING @@ -5,6 +5,13 @@ Style compliance can be checked using the Coder Sniffer extension to the PEAR PHP_CodeSniffer project, for instructions about how to install Coder Sniffer see https://www.drupal.org/node/1419988 -Use this command to check the style: +TL;DR: install drupla/coder and enable the Drupal coding standard in +PHP_CodeSniffer: + + $ composer global require drupal/coder + $ export PATH="$HOME/.config/composer/vendor/bin:$PATH" + $ phpcs --config-set installed_paths $HOME/.config/composer/vendor/drupal/coder/coder_sniffer/ + +And then use this command to check the style: $ phpcs --standard=Drupal . -- 2.1.4 From ad6d0cb44b8ea2b895658bf2b74d293b9a880ca1 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Wed, 8 Mar 2017 09:20:01 +0100 Subject: [PATCH 15/16] rss_converter_facebook.com.xsl: match both the new and the old wrapper class Facebook still seems to use the "userContentWrapper" sometimes, it's not clear if "fbUserContent" was only used for a short period of time or if both are actually used; in the doubt support both. --- src/rss_converter_facebook.com.xsl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rss_converter_facebook.com.xsl b/src/rss_converter_facebook.com.xsl index fc4f272..def8e69 100644 --- a/src/rss_converter_facebook.com.xsl +++ b/src/rss_converter_facebook.com.xsl @@ -52,7 +52,7 @@ name="page-id" select="substring-after(//meta[@property='al:android:url']/@content, 'fb://page/')"/> - + - - + + @@ -135,7 +135,7 @@ - + -- 2.1.4 From 6817108435b055272e0fff6883206ee1b386b9f6 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Thu, 8 Jun 2017 15:35:27 +0200 Subject: [PATCH 16/16] rss_converter_twitter.com.xsl: strip the style attribute from HTML elements Elements in an RSS item description are not supposed to have a style attribute, and they don't really need to anyways, so filter it out in the identity template. This also fixes an issue with Twitter images being shown with a offset in liferea. --- src/rss_converter_twitter.com.xsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rss_converter_twitter.com.xsl b/src/rss_converter_twitter.com.xsl index 58539ae..fc250e5 100644 --- a/src/rss_converter_twitter.com.xsl +++ b/src/rss_converter_twitter.com.xsl @@ -35,7 +35,7 @@ - + -- 2.1.4