From: Antonio Ospite <ospite@studenti.unina.it>
Date: Fri, 8 Nov 2013 09:44:44 +0000 (+0100)
Subject: Handle errors and warnings from loadHTML()
X-Git-Tag: v0.1~8
X-Git-Url: https://git.ao2.it/tweeper.git/commitdiff_plain/07a6e0c19e175603bde51f800b0b484a327c515c?ds=inline

Handle errors and warnings from loadHTML()

When parsing invalid documents loadHTML() spits out warnings and errors
which may end up polluting the output of tweeper depending on the value
of the "display_errors" variable in the PHP configuration; this may
result in the output being invalid RSS.

Handling those messages explicitly makes tweeper more robust against
different PHP configurations.

Thanks-to: gregor herrmann <gregoa@debian.org>
---

diff --git a/tweeper.php b/tweeper.php
index 923b303..e1483f1 100644
--- a/tweeper.php
+++ b/tweeper.php
@@ -122,11 +122,48 @@ class Tweeper {
     return $dom->saveXML($enc);
   }
 
+  /* Mimic the message from libxml.c::php_libxml_ctx_error_level() */
+  private function log_xml_error($error) {
+    $output = "";
+
+    switch ($error->level) {
+    case LIBXML_ERR_WARNING:
+      $output .= "Warning $error->code: ";
+      break;
+    case LIBXML_ERR_ERROR:
+      $output .= "Error $error->code: ";
+      break;
+    case LIBXML_ERR_FATAL:
+      $output .= "Fatal Error $error->code: ";
+      break;
+    }
+
+    $output .= trim($error->message);
+
+    if ($error->file) {
+      $output .= " in $error->file";
+    } else {
+      $output .= " in Entity,";
+    }
+
+    $output .=" line $error->line";
+
+    error_log($output);
+  }
+
   public function tweep($uri) {
     $html = Tweeper::get_contents($uri);
 
     $xmlDoc = new DOMDocument();
+
+    // Handle warnings and errors when loading invalid HTML.
+    $xml_errors_value = libxml_use_internal_errors(true);
     $xmlDoc->loadHTML($html);
+    foreach (libxml_get_errors() as $xml_error) {
+      $this->log_xml_error($xml_error);
+    }
+    libxml_clear_errors();
+    libxml_use_internal_errors($xml_errors_value);
 
     $output = $this->xsltProcessor->transformToXML($xmlDoc);