From da4568f5a2d24e0933d44b16b5ef180095c42dab Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Sun, 9 Feb 2020 23:31:43 +0100 Subject: [PATCH] src/Tweeper.php: use a minimal User-Agent string to fix scraping twitter.com Twitter.com has started serving the user timeline via json when the user agent is a modern browser, this breaks scraping in Tweeper which expects html content. Remove any version info from the User-Agent header used by Tweeper to make twitter.com think it is talking with a very old browser, tricking it into serving html content. NOTE: Tweeper cannot just use the default User-Agent from the CURL library because this would break scraping Facebook.com; using a minimal but still browser-like User-Agent seems to be a viable common denominator for all sites currently supported by Tweeper. --- src/Tweeper.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Tweeper.php b/src/Tweeper.php index 877e882..aedde4d 100644 --- a/src/Tweeper.php +++ b/src/Tweeper.php @@ -36,7 +36,7 @@ date_default_timezone_set('UTC'); */ class Tweeper { - private static $userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0"; + private static $userAgent = "Mozilla/5.0"; private static $maxConnectionTimeout = 5; private static $maxConnectionRetries = 5; -- 2.1.4