From: Antonio Ospite Date: Wed, 1 Jul 2015 11:37:57 +0000 (+0200) Subject: tweeper.php: support host-specific methods for preprocessing the HTML data X-Git-Tag: v0.4~21 X-Git-Url: https://git.ao2.it/tweeper.git/commitdiff_plain/7097a8ad2ef040bc81a8c5f7ed7cc02e0073eaab?ds=sidebyside tweeper.php: support host-specific methods for preprocessing the HTML data Some sites serve mangled HTML code, so a mechanism to clean it up before loading it as XML is needed. For instance, facebook.com puts come content inside HTML comments, and these must be stripped in order to make the content available to the HTML parser when loading the data into a DOMDocument. --- diff --git a/tweeper.php b/tweeper.php index 2444a39..efc0fd6 100644 --- a/tweeper.php +++ b/tweeper.php @@ -257,6 +257,11 @@ class Tweeper { return NULL; } + $preprocess_html_host_method = 'preprocess_html_' . str_replace(".", "_", $host); + if (method_exists($this, $preprocess_html_host_method)) { + $html = call_user_func_array(array($this, $preprocess_html_host_method), array($html)); + } + $xmlDoc = $this->html_to_xml($html, $host); if (NULL === $xmlDoc) { return NULL;