Some sites serve mangled HTML code, so a mechanism to clean it up before
loading it as XML is needed.
For instance, facebook.com puts come content inside HTML comments, and
these must be stripped in order to make the content available to the
HTML parser when loading the data into a DOMDocument.
+ $preprocess_html_host_method = 'preprocess_html_' . str_replace(".", "_", $host);
+ if (method_exists($this, $preprocess_html_host_method)) {
+ $html = call_user_func_array(array($this, $preprocess_html_host_method), array($html));
+ }
+
$xmlDoc = $this->html_to_xml($html, $host);
if (NULL === $xmlDoc) {
return NULL;
$xmlDoc = $this->html_to_xml($html, $host);
if (NULL === $xmlDoc) {
return NULL;