From: Antonio Ospite <ao2@ao2.it>
Date: Fri, 27 Feb 2015 16:08:31 +0000 (+0100)
Subject: tweeper.php: factor out an html_to_xml() function from the tweep() method
X-Git-Tag: v0.4~36
X-Git-Url: https://git.ao2.it/tweeper.git/commitdiff_plain/f7fd5333da765ce6d5aa2916d779bbbbbb74da7d

tweeper.php: factor out an html_to_xml() function from the tweep() method

Split the operation to get some xml out of the web page, this in
preparation for adding some more flexibility about what can be converted
to xml for a subsequent transformation.
---

diff --git a/tweeper.php b/tweeper.php
index 17fff8b..524928a 100644
--- a/tweeper.php
+++ b/tweeper.php
@@ -164,6 +164,21 @@ class Tweeper {
     return $xsltProcessor;
   }
 
+  private function html_to_xml($html) {
+    $xmlDoc = new DOMDocument();
+
+    // Handle warnings and errors when loading invalid HTML.
+    $xml_errors_value = libxml_use_internal_errors(true);
+    $xmlDoc->loadHTML($html);
+    foreach (libxml_get_errors() as $xml_error) {
+      $this->log_xml_error($xml_error);
+    }
+    libxml_clear_errors();
+    libxml_use_internal_errors($xml_errors_value);
+
+    return $xmlDoc;
+  }
+
   public function tweep($src_url) {
     $url = parse_url($src_url);
     if (FALSE === $url || empty($url["host"])) {
@@ -176,18 +191,15 @@ class Tweeper {
       return NULL;
     }
 
-    $html = Tweeper::get_contents($src_url);
-
-    $xmlDoc = new DOMDocument();
+    $html = $this->get_contents($src_url);
+    if (FALSE === $html) {
+      return NULL;
+    }
 
-    // Handle warnings and errors when loading invalid HTML.
-    $xml_errors_value = libxml_use_internal_errors(true);
-    $xmlDoc->loadHTML($html);
-    foreach (libxml_get_errors() as $xml_error) {
-      $this->log_xml_error($xml_error);
+    $xmlDoc = $this->html_to_xml($html);
+    if (NULL === $xmlDoc) {
+      return NULL;
     }
-    libxml_clear_errors();
-    libxml_use_internal_errors($xml_errors_value);
 
     $output = $xsltProcessor->transformToXML($xmlDoc);