From 2efcaf768f68d35872c0d06136279e673128c46f Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Sat, 27 Jul 2019 22:06:15 +0200 Subject: [PATCH] src/Tweeper.php: enable cookie handling to fix scraping twitter.com When the user agent used by a client matches an actual browser, twitter.com enables content-security-policy and redirects the client on the first request to make it reload the content. After the redirection, the server assumes that the client sets cookies appropriately, however cURL does not do that by default. Enable cookie handling in cURL to fix scraping twitter.com. NOTE: the CURLOPT_COOKIEFILE option is set to an empty string to enable in-memory handling of the cookies, removing the need for a temporary file on the filesystem, see: https://www.php.net/manual/en/function.curl-setopt.php --- src/Tweeper.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Tweeper.php b/src/Tweeper.php index 09bd7cc..7ecbf2f 100644 --- a/src/Tweeper.php +++ b/src/Tweeper.php @@ -121,6 +121,7 @@ class Tweeper { CURLOPT_CONNECTTIMEOUT => Tweeper::$maxConnectionTimeout, // Follow http redirects to get the real URL. CURLOPT_FOLLOWLOCATION => TRUE, + CURLOPT_COOKIEFILE => "", CURLOPT_RETURNTRANSFER => TRUE, CURLOPT_SSL_VERIFYHOST => FALSE, CURLOPT_SSL_VERIFYPEER => FALSE, -- 2.1.4