X-Git-Url: https://git.ao2.it/tweeper.git/blobdiff_plain/b187bb677361d17a468abf749332d081a194b4bd..b247492bd16cd46b7697ee33dc114e874ba8b48d:/src/Tweeper.php diff --git a/src/Tweeper.php b/src/Tweeper.php index 73cbe81..d60e43b 100644 --- a/src/Tweeper.php +++ b/src/Tweeper.php @@ -6,7 +6,7 @@ namespace Tweeper; * @file * Tweeper - a Twitter to RSS web scraper. * - * Copyright (C) 2013-2015 Antonio Ospite + * Copyright (C) 2013-2016 Antonio Ospite * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,8 +25,6 @@ namespace Tweeper; use DOMDocument; use XSLTProcessor; -require_once 'Symfony/Component/Serializer/autoload.php'; - use Symfony\Component\Serializer\Serializer; use Symfony\Component\Serializer\Encoder\XmlEncoder; use Symfony\Component\Serializer\Normalizer\ObjectNormalizer; @@ -251,7 +249,10 @@ class Tweeper { return NULL; } - return Tweeper::jsonToXml($matches[1], 'instagram'); + // Filter elements which will result in invalid XML element names. + $json = str_replace('404_as_react', '_404_as_react', $matches[1]); + + return Tweeper::jsonToXml($json, 'instagram'); } /** @@ -317,21 +318,30 @@ class Tweeper { /** * Convert the site content to RSS. */ - public function tweep($src_url) { + public function tweep($src_url, $host=NULL, $validate_scheme=TRUE) { $url = parse_url($src_url); - if (FALSE === $url || empty($url["host"])) { + if (FALSE === $url) { trigger_error("Invalid URL: $src_url", E_USER_ERROR); return NULL; } - $scheme = $url["scheme"]; - if (!in_array($scheme, array("http", "https"))) { - trigger_error("unsupported scheme: $scheme", E_USER_ERROR); - return NULL; + if (TRUE === $validate_scheme) { + $scheme = $url["scheme"]; + if (!in_array($scheme, array("http", "https"))) { + trigger_error("unsupported scheme: $scheme", E_USER_ERROR); + return NULL; + } } - // Strip the leading www. to be more forgiving on input URLs. - $host = preg_replace('/^www\./', '', $url["host"]); + // if the host is not given derive it from the URL + if (NULL === $host) { + if (empty($url["host"])) { + trigger_error("Invalid host in URL: $src_url", E_USER_ERROR); + return NULL; + } + // Strip the leading www. to be more forgiving on input URLs. + $host = preg_replace('/^www\./', '', $url["host"]); + } $xsltProcessor = $this->loadStylesheet($host); if (NULL === $xsltProcessor) {