From: Antonio Ospite Date: Fri, 8 Nov 2013 09:44:44 +0000 (+0100) Subject: Handle errors and warnings from loadHTML() X-Git-Tag: v0.1~8 X-Git-Url: https://git.ao2.it/tweeper.git/commitdiff_plain/07a6e0c19e175603bde51f800b0b484a327c515c?ds=inline;hp=-c Handle errors and warnings from loadHTML() When parsing invalid documents loadHTML() spits out warnings and errors which may end up polluting the output of tweeper depending on the value of the "display_errors" variable in the PHP configuration; this may result in the output being invalid RSS. Handling those messages explicitly makes tweeper more robust against different PHP configurations. Thanks-to: gregor herrmann --- 07a6e0c19e175603bde51f800b0b484a327c515c diff --git a/tweeper.php b/tweeper.php index 923b303..e1483f1 100644 --- a/tweeper.php +++ b/tweeper.php @@ -122,11 +122,48 @@ class Tweeper { return $dom->saveXML($enc); } + /* Mimic the message from libxml.c::php_libxml_ctx_error_level() */ + private function log_xml_error($error) { + $output = ""; + + switch ($error->level) { + case LIBXML_ERR_WARNING: + $output .= "Warning $error->code: "; + break; + case LIBXML_ERR_ERROR: + $output .= "Error $error->code: "; + break; + case LIBXML_ERR_FATAL: + $output .= "Fatal Error $error->code: "; + break; + } + + $output .= trim($error->message); + + if ($error->file) { + $output .= " in $error->file"; + } else { + $output .= " in Entity,"; + } + + $output .=" line $error->line"; + + error_log($output); + } + public function tweep($uri) { $html = Tweeper::get_contents($uri); $xmlDoc = new DOMDocument(); + + // Handle warnings and errors when loading invalid HTML. + $xml_errors_value = libxml_use_internal_errors(true); $xmlDoc->loadHTML($html); + foreach (libxml_get_errors() as $xml_error) { + $this->log_xml_error($xml_error); + } + libxml_clear_errors(); + libxml_use_internal_errors($xml_errors_value); $output = $this->xsltProcessor->transformToXML($xmlDoc);