From: Antonio Ospite Date: Wed, 14 Nov 2018 16:03:06 +0000 (+0100) Subject: src/Tweeper.php: add a retry mechanism for cURL sessions X-Git-Tag: v1.4.0~6 X-Git-Url: https://git.ao2.it/tweeper.git/commitdiff_plain/6c535395860b51d2c1debf92437192c6bf862b16?ds=inline src/Tweeper.php: add a retry mechanism for cURL sessions Sometimes the connection to a remote host may stall and a resource cannot be retrieved. This makes Tweeper hang for a very long time which can be annoying for users. Setting a shorter timeout and a retry mechanism usually works around the problem allowing the resource to be retrieved eventually. Implement such a mechanism by adding curlExec() method and while at it move non-curl related messages outside of getUrlContents() and getUrlInfo() to give the user a better understanding of what actually failed when even the retry mechanism was not able to retrieve the resource. --- diff --git a/src/Tweeper.php b/src/Tweeper.php index 7318900..d58b529 100644 --- a/src/Tweeper.php +++ b/src/Tweeper.php @@ -37,6 +37,8 @@ date_default_timezone_set('UTC'); class Tweeper { private static $userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0"; + private static $maxConnectionTimeout = 5; + private static $maxConnectionRetries = 5; /** * Create a new Tweeper object controlling optional settings. @@ -86,12 +88,32 @@ class Tweeper { } /** + * Perform a cURL session multiple times when it fails with a timeout. + * + * @param resource $ch + * a cURL session handle. + */ + private static function curlExec($ch) { + $ret = FALSE; + $attempt = 0; + do { + $ret = curl_exec($ch); + if (FALSE === $ret) { + trigger_error(curl_error($ch), E_USER_WARNING); + } + } while (curl_errno($ch) == CURLE_OPERATION_TIMEDOUT && ++$attempt < Tweeper::$maxConnectionRetries); + + return $ret; + } + + /** * Get the contents from a URL. */ private static function getUrlContents($url) { $ch = curl_init($url); curl_setopt_array($ch, array( CURLOPT_HEADER => FALSE, + CURLOPT_CONNECTTIMEOUT => Tweeper::$maxConnectionTimeout, // Follow http redirects to get the real URL. CURLOPT_FOLLOWLOCATION => TRUE, CURLOPT_RETURNTRANSFER => TRUE, @@ -100,10 +122,7 @@ class Tweeper { CURLOPT_HTTPHEADER => array('Accept-language: en'), CURLOPT_USERAGENT => Tweeper::$userAgent, )); - $contents = curl_exec($ch); - if (FALSE === $contents) { - trigger_error(curl_error($ch), E_USER_WARNING); - } + $contents = Tweeper::curlExec($ch); curl_close($ch); return $contents; @@ -117,6 +136,7 @@ class Tweeper { curl_setopt_array($ch, array( CURLOPT_HEADER => TRUE, CURLOPT_NOBODY => TRUE, + CURLOPT_CONNECTTIMEOUT => Tweeper::$maxConnectionTimeout, // Follow http redirects to get the real URL. CURLOPT_FOLLOWLOCATION => TRUE, CURLOPT_RETURNTRANSFER => TRUE, @@ -125,9 +145,8 @@ class Tweeper { CURLOPT_USERAGENT => Tweeper::$userAgent, )); - $ret = curl_exec($ch); + $ret = Tweeper::curlExec($ch); if (FALSE === $ret) { - trigger_error(curl_error($ch), E_USER_WARNING); curl_close($ch); return FALSE; } @@ -332,6 +351,7 @@ class Tweeper { $stylesheet_contents = Tweeper::getUrlContents($stylesheet); if (FALSE === $stylesheet_contents) { + trigger_error("Cannot open $stylesheet", E_USER_WARNING); return NULL; } @@ -382,6 +402,7 @@ class Tweeper { $html = Tweeper::getUrlContents($src_url); if (FALSE === $html) { + trigger_error("Failed to retrieve $src_url", E_USER_WARNING); return NULL; }