From: Antonio Ospite Date: Fri, 27 Feb 2015 16:08:31 +0000 (+0100) Subject: tweeper.php: factor out an html_to_xml() function from the tweep() method X-Git-Tag: v0.4~36 X-Git-Url: https://git.ao2.it/tweeper.git/commitdiff_plain/f7fd5333da765ce6d5aa2916d779bbbbbb74da7d?hp=50842e9d4c8ba313313710d0b4472548390cd77b tweeper.php: factor out an html_to_xml() function from the tweep() method Split the operation to get some xml out of the web page, this in preparation for adding some more flexibility about what can be converted to xml for a subsequent transformation. --- diff --git a/tweeper.php b/tweeper.php index 17fff8b..524928a 100644 --- a/tweeper.php +++ b/tweeper.php @@ -164,6 +164,21 @@ class Tweeper { return $xsltProcessor; } + private function html_to_xml($html) { + $xmlDoc = new DOMDocument(); + + // Handle warnings and errors when loading invalid HTML. + $xml_errors_value = libxml_use_internal_errors(true); + $xmlDoc->loadHTML($html); + foreach (libxml_get_errors() as $xml_error) { + $this->log_xml_error($xml_error); + } + libxml_clear_errors(); + libxml_use_internal_errors($xml_errors_value); + + return $xmlDoc; + } + public function tweep($src_url) { $url = parse_url($src_url); if (FALSE === $url || empty($url["host"])) { @@ -176,18 +191,15 @@ class Tweeper { return NULL; } - $html = Tweeper::get_contents($src_url); - - $xmlDoc = new DOMDocument(); + $html = $this->get_contents($src_url); + if (FALSE === $html) { + return NULL; + } - // Handle warnings and errors when loading invalid HTML. - $xml_errors_value = libxml_use_internal_errors(true); - $xmlDoc->loadHTML($html); - foreach (libxml_get_errors() as $xml_error) { - $this->log_xml_error($xml_error); + $xmlDoc = $this->html_to_xml($html); + if (NULL === $xmlDoc) { + return NULL; } - libxml_clear_errors(); - libxml_use_internal_errors($xml_errors_value); $output = $xsltProcessor->transformToXML($xmlDoc);