When parsing invalid documents loadHTML() spits out warnings and errors
which may end up polluting the output of tweeper depending on the value
of the "display_errors" variable in the PHP configuration; this may
result in the output being invalid RSS.
Handling those messages explicitly makes tweeper more robust against
different PHP configurations.
Thanks-to: gregor herrmann <gregoa@debian.org>
return $dom->saveXML($enc);
}
return $dom->saveXML($enc);
}
+ /* Mimic the message from libxml.c::php_libxml_ctx_error_level() */
+ private function log_xml_error($error) {
+ $output = "";
+
+ switch ($error->level) {
+ case LIBXML_ERR_WARNING:
+ $output .= "Warning $error->code: ";
+ break;
+ case LIBXML_ERR_ERROR:
+ $output .= "Error $error->code: ";
+ break;
+ case LIBXML_ERR_FATAL:
+ $output .= "Fatal Error $error->code: ";
+ break;
+ }
+
+ $output .= trim($error->message);
+
+ if ($error->file) {
+ $output .= " in $error->file";
+ } else {
+ $output .= " in Entity,";
+ }
+
+ $output .=" line $error->line";
+
+ error_log($output);
+ }
+
public function tweep($uri) {
$html = Tweeper::get_contents($uri);
$xmlDoc = new DOMDocument();
public function tweep($uri) {
$html = Tweeper::get_contents($uri);
$xmlDoc = new DOMDocument();
+
+ // Handle warnings and errors when loading invalid HTML.
+ $xml_errors_value = libxml_use_internal_errors(true);
$xmlDoc->loadHTML($html);
$xmlDoc->loadHTML($html);
+ foreach (libxml_get_errors() as $xml_error) {
+ $this->log_xml_error($xml_error);
+ }
+ libxml_clear_errors();
+ libxml_use_internal_errors($xml_errors_value);
$output = $this->xsltProcessor->transformToXML($xmlDoc);
$output = $this->xsltProcessor->transformToXML($xmlDoc);