summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
50842e9)
Split the operation to get some xml out of the web page, this in
preparation for adding some more flexibility about what can be converted
to xml for a subsequent transformation.
+ private function html_to_xml($html) {
+ $xmlDoc = new DOMDocument();
+
+ // Handle warnings and errors when loading invalid HTML.
+ $xml_errors_value = libxml_use_internal_errors(true);
+ $xmlDoc->loadHTML($html);
+ foreach (libxml_get_errors() as $xml_error) {
+ $this->log_xml_error($xml_error);
+ }
+ libxml_clear_errors();
+ libxml_use_internal_errors($xml_errors_value);
+
+ return $xmlDoc;
+ }
+
public function tweep($src_url) {
$url = parse_url($src_url);
if (FALSE === $url || empty($url["host"])) {
public function tweep($src_url) {
$url = parse_url($src_url);
if (FALSE === $url || empty($url["host"])) {
- $html = Tweeper::get_contents($src_url);
-
- $xmlDoc = new DOMDocument();
+ $html = $this->get_contents($src_url);
+ if (FALSE === $html) {
+ return NULL;
+ }
- // Handle warnings and errors when loading invalid HTML.
- $xml_errors_value = libxml_use_internal_errors(true);
- $xmlDoc->loadHTML($html);
- foreach (libxml_get_errors() as $xml_error) {
- $this->log_xml_error($xml_error);
+ $xmlDoc = $this->html_to_xml($html);
+ if (NULL === $xmlDoc) {
+ return NULL;
- libxml_clear_errors();
- libxml_use_internal_errors($xml_errors_value);
$output = $xsltProcessor->transformToXML($xmlDoc);
$output = $xsltProcessor->transformToXML($xmlDoc);