$ch = curl_init($url);
curl_setopt_array($ch, array(
CURLOPT_HEADER => FALSE,
+ CURLOPT_FOLLOWLOCATION => TRUE, // follow http redirects to get the real URL
CURLOPT_RETURNTRANSFER => TRUE,
CURLOPT_SSL_VERIFYHOST => FALSE,
CURLOPT_SSL_VERIFYPEER => FALSE,
return $dom->saveXML($enc);
}
+ /* Mimic the message from libxml.c::php_libxml_ctx_error_level() */
+ private function log_xml_error($error) {
+ $output = "";
+
+ switch ($error->level) {
+ case LIBXML_ERR_WARNING:
+ $output .= "Warning $error->code: ";
+ break;
+ case LIBXML_ERR_ERROR:
+ $output .= "Error $error->code: ";
+ break;
+ case LIBXML_ERR_FATAL:
+ $output .= "Fatal Error $error->code: ";
+ break;
+ }
+
+ $output .= trim($error->message);
+
+ if ($error->file) {
+ $output .= " in $error->file";
+ } else {
+ $output .= " in Entity,";
+ }
+
+ $output .=" line $error->line";
+
+ error_log($output);
+ }
+
public function tweep($uri) {
$html = Tweeper::get_contents($uri);
$xmlDoc = new DOMDocument();
+
+ // Handle warnings and errors when loading invalid HTML.
+ $xml_errors_value = libxml_use_internal_errors(true);
$xmlDoc->loadHTML($html);
+ foreach (libxml_get_errors() as $xml_error) {
+ $this->log_xml_error($xml_error);
+ }
+ libxml_clear_errors();
+ libxml_use_internal_errors($xml_errors_value);
$output = $this->xsltProcessor->transformToXML($xmlDoc);