From 07a6e0c19e175603bde51f800b0b484a327c515c Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Fri, 8 Nov 2013 10:44:44 +0100 Subject: [PATCH] Handle errors and warnings from loadHTML() When parsing invalid documents loadHTML() spits out warnings and errors which may end up polluting the output of tweeper depending on the value of the "display_errors" variable in the PHP configuration; this may result in the output being invalid RSS. Handling those messages explicitly makes tweeper more robust against different PHP configurations. Thanks-to: gregor herrmann --- tweeper.php | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tweeper.php b/tweeper.php index 923b303..e1483f1 100644 --- a/tweeper.php +++ b/tweeper.php @@ -122,11 +122,48 @@ class Tweeper { return $dom->saveXML($enc); } + /* Mimic the message from libxml.c::php_libxml_ctx_error_level() */ + private function log_xml_error($error) { + $output = ""; + + switch ($error->level) { + case LIBXML_ERR_WARNING: + $output .= "Warning $error->code: "; + break; + case LIBXML_ERR_ERROR: + $output .= "Error $error->code: "; + break; + case LIBXML_ERR_FATAL: + $output .= "Fatal Error $error->code: "; + break; + } + + $output .= trim($error->message); + + if ($error->file) { + $output .= " in $error->file"; + } else { + $output .= " in Entity,"; + } + + $output .=" line $error->line"; + + error_log($output); + } + public function tweep($uri) { $html = Tweeper::get_contents($uri); $xmlDoc = new DOMDocument(); + + // Handle warnings and errors when loading invalid HTML. + $xml_errors_value = libxml_use_internal_errors(true); $xmlDoc->loadHTML($html); + foreach (libxml_get_errors() as $xml_error) { + $this->log_xml_error($xml_error); + } + libxml_clear_errors(); + libxml_use_internal_errors($xml_errors_value); $output = $this->xsltProcessor->transformToXML($xmlDoc); -- 2.1.4