X-Git-Url: https://git.ao2.it/tweeper.git/blobdiff_plain/4336e714ebb2cf1b07c59a4534b9d9acd244c6f9..da9250bb9572c83ed397d3bfda0e44fa13016efb:/src/Tweeper.php diff --git a/src/Tweeper.php b/src/Tweeper.php index 7318900..b75409b 100644 --- a/src/Tweeper.php +++ b/src/Tweeper.php @@ -37,6 +37,8 @@ date_default_timezone_set('UTC'); class Tweeper { private static $userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0"; + private static $maxConnectionTimeout = 5; + private static $maxConnectionRetries = 5; /** * Create a new Tweeper object controlling optional settings. @@ -86,12 +88,32 @@ class Tweeper { } /** + * Perform a cURL session multiple times when it fails with a timeout. + * + * @param resource $ch + * a cURL session handle. + */ + private static function curlExec($ch) { + $ret = FALSE; + $attempt = 0; + do { + $ret = curl_exec($ch); + if (FALSE === $ret) { + trigger_error(curl_error($ch), E_USER_WARNING); + } + } while (curl_errno($ch) == CURLE_OPERATION_TIMEDOUT && ++$attempt < Tweeper::$maxConnectionRetries); + + return $ret; + } + + /** * Get the contents from a URL. */ private static function getUrlContents($url) { $ch = curl_init($url); curl_setopt_array($ch, array( CURLOPT_HEADER => FALSE, + CURLOPT_CONNECTTIMEOUT => Tweeper::$maxConnectionTimeout, // Follow http redirects to get the real URL. CURLOPT_FOLLOWLOCATION => TRUE, CURLOPT_RETURNTRANSFER => TRUE, @@ -100,10 +122,7 @@ class Tweeper { CURLOPT_HTTPHEADER => array('Accept-language: en'), CURLOPT_USERAGENT => Tweeper::$userAgent, )); - $contents = curl_exec($ch); - if (FALSE === $contents) { - trigger_error(curl_error($ch), E_USER_WARNING); - } + $contents = Tweeper::curlExec($ch); curl_close($ch); return $contents; @@ -117,6 +136,7 @@ class Tweeper { curl_setopt_array($ch, array( CURLOPT_HEADER => TRUE, CURLOPT_NOBODY => TRUE, + CURLOPT_CONNECTTIMEOUT => Tweeper::$maxConnectionTimeout, // Follow http redirects to get the real URL. CURLOPT_FOLLOWLOCATION => TRUE, CURLOPT_RETURNTRANSFER => TRUE, @@ -125,9 +145,8 @@ class Tweeper { CURLOPT_USERAGENT => Tweeper::$userAgent, )); - $ret = curl_exec($ch); + $ret = Tweeper::curlExec($ch); if (FALSE === $ret) { - trigger_error(curl_error($ch), E_USER_WARNING); curl_close($ch); return FALSE; } @@ -332,6 +351,7 @@ class Tweeper { $stylesheet_contents = Tweeper::getUrlContents($stylesheet); if (FALSE === $stylesheet_contents) { + trigger_error("Cannot open $stylesheet", E_USER_WARNING); return NULL; } @@ -350,7 +370,7 @@ class Tweeper { /** * Convert the site content to RSS. */ - public function tweep($src_url, $host=NULL, $validate_scheme=TRUE) { + public function tweep($src_url, $host = NULL, $validate_scheme = TRUE) { $url = parse_url($src_url); if (FALSE === $url) { trigger_error("Invalid URL: $src_url", E_USER_WARNING); @@ -365,7 +385,7 @@ class Tweeper { } } - // if the host is not given derive it from the URL + // If the host is not given derive it from the URL. if (NULL === $host) { if (empty($url["host"])) { trigger_error("Invalid host in URL: $src_url", E_USER_WARNING); @@ -382,6 +402,7 @@ class Tweeper { $html = Tweeper::getUrlContents($src_url); if (FALSE === $html) { + trigger_error("Failed to retrieve $src_url", E_USER_WARNING); return NULL; }