From b922824bc561f7f3e31c6f9962d96e9084497ced Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Wed, 10 Jun 2020 00:28:54 +0200 Subject: [PATCH 1/1] src/Tweeper.php: only override the User-Agent to a mobile one for twitter.com Using a mobile User-Agent made it possible to scrape twitter.com again but it also had side effects: it was forcing facebook.com to serve the mobile version too. However tweeper expected the desktop version of facebook.com so this was breaking support for facebook.com Scraping the mobile version of facebook.com would be inconvenient because the xsl would have to be rewritten extensively, and also the date of posts is not readily available as a timestamp in the mobile version. So override the User-Agent for twitter.com only, this makes the code a little uglier but it works well enough for now. --- src/Tweeper.php | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Tweeper.php b/src/Tweeper.php index b794368..f1d579f 100644 --- a/src/Tweeper.php +++ b/src/Tweeper.php @@ -36,7 +36,7 @@ date_default_timezone_set('UTC'); */ class Tweeper { - private static $userAgent = "Mozilla/5.0 (Linux; U; Android 4.3; en-us; SM-N900T Build/JSS15J)"; + private static $userAgent = "Mozilla/5.0"; private static $maxConnectionTimeout = 5; private static $maxConnectionRetries = 5; @@ -455,7 +455,15 @@ class Tweeper { return NULL; } - $html = Tweeper::getUrlContents($src_url); + // Override User-Agent for twitter.com to force it to serve the mobile UI. + if ($host == "twitter.com") { + $user_agent = "Mozilla/5.0 (Linux; U; Android 4.3; en-us; SM-N900T Build/JSS15J)"; + } + else { + $user_agent = NULL; + } + + $html = Tweeper::getUrlContents($src_url, $user_agent); if (FALSE === $html) { trigger_error("Failed to retrieve $src_url", E_USER_WARNING); return NULL; -- 2.1.4