From f7fd5333da765ce6d5aa2916d779bbbbbb74da7d Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Fri, 27 Feb 2015 17:08:31 +0100 Subject: [PATCH 1/1] tweeper.php: factor out an html_to_xml() function from the tweep() method Split the operation to get some xml out of the web page, this in preparation for adding some more flexibility about what can be converted to xml for a subsequent transformation. --- tweeper.php | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/tweeper.php b/tweeper.php index 17fff8b..524928a 100644 --- a/tweeper.php +++ b/tweeper.php @@ -164,6 +164,21 @@ class Tweeper { return $xsltProcessor; } + private function html_to_xml($html) { + $xmlDoc = new DOMDocument(); + + // Handle warnings and errors when loading invalid HTML. + $xml_errors_value = libxml_use_internal_errors(true); + $xmlDoc->loadHTML($html); + foreach (libxml_get_errors() as $xml_error) { + $this->log_xml_error($xml_error); + } + libxml_clear_errors(); + libxml_use_internal_errors($xml_errors_value); + + return $xmlDoc; + } + public function tweep($src_url) { $url = parse_url($src_url); if (FALSE === $url || empty($url["host"])) { @@ -176,18 +191,15 @@ class Tweeper { return NULL; } - $html = Tweeper::get_contents($src_url); - - $xmlDoc = new DOMDocument(); + $html = $this->get_contents($src_url); + if (FALSE === $html) { + return NULL; + } - // Handle warnings and errors when loading invalid HTML. - $xml_errors_value = libxml_use_internal_errors(true); - $xmlDoc->loadHTML($html); - foreach (libxml_get_errors() as $xml_error) { - $this->log_xml_error($xml_error); + $xmlDoc = $this->html_to_xml($html); + if (NULL === $xmlDoc) { + return NULL; } - libxml_clear_errors(); - libxml_use_internal_errors($xml_errors_value); $output = $xsltProcessor->transformToXML($xmlDoc); -- 2.1.4