From b54460b67e0ad7a8b4c49b6ff15dba68ac575b1a Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ao2@ao2.it>
Date: Thu, 9 Feb 2017 18:15:54 +0100
Subject: [PATCH 1/1] Add the helper script tests/fetch_facebook_page.sh

The script helps retrieving the actual html of a public page on
facebook.com, ignoring the pages which require the CAPTCHA.

This allows to have a local copy of the page to test tweeper on.
---
 tests/fetch_facebook_page.sh | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100755 tests/fetch_facebook_page.sh

diff --git a/tests/fetch_facebook_page.sh b/tests/fetch_facebook_page.sh
new file mode 100755
index 0000000..f25966e
--- /dev/null
+++ b/tests/fetch_facebook_page.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+#
+# Facebook requires a CAPTCHA most of the times, so keep fetching the URL as
+# long as needed, until the page is shown with no CAPTCHA.
+
+set -e
+
+USER_AGENT="Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0";
+
+while true;
+do
+  # Force language to en-us to make sure that the string matching works
+  OUTPUT=$(wget -nv --user-agent="$USER_AGENT" --header='Accept-Language: en-us' -O - -- "$1")
+  if echo $OUTPUT | grep -q -v "Security Check Required";
+  then
+    echo "$OUTPUT" > facebook.html
+    break
+  fi
+  sleep 5
+done
-- 
2.1.4