From: Antonio Ospite Date: Fri, 27 Feb 2015 16:08:31 +0000 (+0100) Subject: tweeper.php: factor out an html_to_xml() function from the tweep() method X-Git-Tag: v0.4~36 X-Git-Url: https://git.ao2.it/tweeper.git/commitdiff_plain/f7fd5333da765ce6d5aa2916d779bbbbbb74da7d?ds=inline tweeper.php: factor out an html_to_xml() function from the tweep() method Split the operation to get some xml out of the web page, this in preparation for adding some more flexibility about what can be converted to xml for a subsequent transformation. --- diff --git a/tweeper.php b/tweeper.php index 17fff8b..524928a 100644 --- a/tweeper.php +++ b/tweeper.php @@ -164,6 +164,21 @@ class Tweeper { return $xsltProcessor; } + private function html_to_xml($html) { + $xmlDoc = new DOMDocument(); + + // Handle warnings and errors when loading invalid HTML. + $xml_errors_value = libxml_use_internal_errors(true); + $xmlDoc->loadHTML($html); + foreach (libxml_get_errors() as $xml_error) { + $this->log_xml_error($xml_error); + } + libxml_clear_errors(); + libxml_use_internal_errors($xml_errors_value); + + return $xmlDoc; + } + public function tweep($src_url) { $url = parse_url($src_url); if (FALSE === $url || empty($url["host"])) { @@ -176,18 +191,15 @@ class Tweeper { return NULL; } - $html = Tweeper::get_contents($src_url); - - $xmlDoc = new DOMDocument(); + $html = $this->get_contents($src_url); + if (FALSE === $html) { + return NULL; + } - // Handle warnings and errors when loading invalid HTML. - $xml_errors_value = libxml_use_internal_errors(true); - $xmlDoc->loadHTML($html); - foreach (libxml_get_errors() as $xml_error) { - $this->log_xml_error($xml_error); + $xmlDoc = $this->html_to_xml($html); + if (NULL === $xmlDoc) { + return NULL; } - libxml_clear_errors(); - libxml_use_internal_errors($xml_errors_value); $output = $xsltProcessor->transformToXML($xmlDoc);