X-Git-Url: https://git.ao2.it/tweeper.git/blobdiff_plain/6c535395860b51d2c1debf92437192c6bf862b16..1e7a09621f810d54f9badc8bcf3f01a6c6ae2347:/src/Tweeper.php diff --git a/src/Tweeper.php b/src/Tweeper.php index d58b529..e98623b 100644 --- a/src/Tweeper.php +++ b/src/Tweeper.php @@ -36,7 +36,7 @@ date_default_timezone_set('UTC'); */ class Tweeper { - private static $userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0"; + private static $userAgent = "Mozilla/5.0"; private static $maxConnectionTimeout = 5; private static $maxConnectionRetries = 5; @@ -49,10 +49,18 @@ class Tweeper { * Enables showing the username in front of the content for multi-user * sites (enabled by default). Only some stylesheets supports this * functionality (twitter, instagram, pump.io). + * @param bool $show_multimedia + * Enables showing multimedia content (images, videos) directly in the + * item description (enabled by default). Only some stylesheets supports + * this functionality (twitter, instagram, dilbert). + * @param bool $verbose_output + * Enables showing non-fatal errors like XML parsing errors. */ - public function __construct($generate_enclosure = FALSE, $show_usernames = TRUE) { + public function __construct($generate_enclosure = FALSE, $show_usernames = TRUE, $show_multimedia = TRUE, $verbose_output = TRUE) { $this->generate_enclosure = $generate_enclosure; $this->show_usernames = $show_usernames; + $this->show_multimedia = $show_multimedia; + $this->verbose_output = $verbose_output; } /** @@ -116,9 +124,8 @@ class Tweeper { CURLOPT_CONNECTTIMEOUT => Tweeper::$maxConnectionTimeout, // Follow http redirects to get the real URL. CURLOPT_FOLLOWLOCATION => TRUE, + CURLOPT_COOKIEFILE => "", CURLOPT_RETURNTRANSFER => TRUE, - CURLOPT_SSL_VERIFYHOST => FALSE, - CURLOPT_SSL_VERIFYPEER => FALSE, CURLOPT_HTTPHEADER => array('Accept-language: en'), CURLOPT_USERAGENT => Tweeper::$userAgent, )); @@ -140,8 +147,6 @@ class Tweeper { // Follow http redirects to get the real URL. CURLOPT_FOLLOWLOCATION => TRUE, CURLOPT_RETURNTRANSFER => TRUE, - CURLOPT_SSL_VERIFYHOST => FALSE, - CURLOPT_SSL_VERIFYPEER => FALSE, CURLOPT_USERAGENT => Tweeper::$userAgent, )); @@ -201,10 +206,17 @@ class Tweeper { // See http://sourceforge.net/p/feedvalidator/bugs/72/ $http_url = preg_replace("/^https/", "http", $url_info['url']); + // When the server does not provide a Content-Length header, + // curl_getinfo() would return a negative value for + // "download_content_length", however RSS recommends to use 0 when the + // enclosure's size cannot be determined. + // See: https://www.feedvalidator.org/docs/error/UseZeroForUnknown.html + $length = max($url_info['download_content_length'], 0); + $dom = new DOMDocument(); $enc = $dom->createElement('enclosure'); $enc->setAttribute('url', $http_url); - $enc->setAttribute('length', $url_info['download_content_length']); + $enc->setAttribute('length', $length); $enc->setAttribute('type', $url_info['content_type']); return $enc; @@ -330,8 +342,10 @@ class Tweeper { $xmlDoc->loadHTML($html); } - foreach (libxml_get_errors() as $xml_error) { - Tweeper::logXmlError($xml_error); + if ($this->verbose_output) { + foreach (libxml_get_errors() as $xml_error) { + Tweeper::logXmlError($xml_error); + } } libxml_clear_errors(); libxml_use_internal_errors($xml_errors_value); @@ -362,6 +376,7 @@ class Tweeper { $xsltProcessor->registerPHPFunctions(); $xsltProcessor->setParameter('', 'generate-enclosure', $this->generate_enclosure); $xsltProcessor->setParameter('', 'show-usernames', $this->show_usernames); + $xsltProcessor->setParameter('', 'show-multimedia', $this->show_multimedia); $xsltProcessor->importStylesheet($xslDoc); return $xsltProcessor; @@ -370,7 +385,7 @@ class Tweeper { /** * Convert the site content to RSS. */ - public function tweep($src_url, $host=NULL, $validate_scheme=TRUE) { + public function tweep($src_url, $host = NULL, $validate_scheme = TRUE) { $url = parse_url($src_url); if (FALSE === $url) { trigger_error("Invalid URL: $src_url", E_USER_WARNING); @@ -385,7 +400,7 @@ class Tweeper { } } - // if the host is not given derive it from the URL + // If the host is not given derive it from the URL. if (NULL === $host) { if (empty($url["host"])) { trigger_error("Invalid host in URL: $src_url", E_USER_WARNING);