+ /**
+ * Get the headers from a URL.
+ */
+ private static function getUrlInfo($url) {
+ $ch = curl_init($url);
+ curl_setopt_array($ch, array(
+ CURLOPT_HEADER => TRUE,
+ CURLOPT_NOBODY => TRUE,
+ // Follow http redirects to get the real URL.
+ CURLOPT_FOLLOWLOCATION => TRUE,
+ CURLOPT_RETURNTRANSFER => TRUE,
+ CURLOPT_SSL_VERIFYHOST => FALSE,
+ CURLOPT_SSL_VERIFYPEER => FALSE,
+ CURLOPT_USERAGENT => Tweeper::$userAgent,
+ ));
+ curl_exec($ch);
+ $url_info = curl_getinfo($ch);
+ curl_close($ch);
+
+ return $url_info;
+ }
+
+ /**
+ * Generate an RSS <enclosure/> element.
+ */
+ public static function generateEnclosure($url) {
+ $supported_content_types = array(
+ "application/ogg",
+ "audio/aac",
+ "audio/mp4",
+ "audio/mpeg",
+ "audio/ogg",
+ "audio/vorbis",
+ "audio/wav",
+ "audio/webm",
+ "audio/x-midi",
+ "image/gif",
+ "image/jpeg",
+ "video/avi",
+ "video/mp4",
+ "video/mpeg",
+ "video/ogg",
+ );
+
+ // The RSS specification says that the enclosure element URL must be http.
+ // See http://sourceforge.net/p/feedvalidator/bugs/72/
+ $http_url = preg_replace("/^https/", "http", $url);
+
+ $url_info = Tweeper::getUrlInfo($http_url);
+
+ $supported = in_array($url_info['content_type'], $supported_content_types);
+ if (!$supported) {
+ error_log("Unsupported enclosure content type \"" . $url_info['content_type'] . "\" for URL: " . $url_info['url']);
+ return '';
+ }
+
+ $dom = new DomDocument();
+ $enc = $dom->createElement('enclosure');
+ $enc->setAttribute('url', $url_info['url']);
+ $enc->setAttribute('length', $url_info['download_content_length']);
+ $enc->setAttribute('type', $url_info['content_type']);
+
+ $dom->appendChild($enc);
+
+ return $dom->saveXML($enc);
+ }
+
+ /**
+ * Mimic the message from libxml.c::php_libxml_ctx_error_level()
+ */
+ private static function logXmlError($error) {
+ $output = "";
+
+ switch ($error->level) {
+ case LIBXML_ERR_WARNING:
+ $output .= "Warning $error->code: ";
+ break;
+
+ case LIBXML_ERR_ERROR:
+ $output .= "Error $error->code: ";
+ break;
+
+ case LIBXML_ERR_FATAL:
+ $output .= "Fatal Error $error->code: ";
+ break;
+ }
+
+ $output .= trim($error->message);
+
+ if ($error->file) {
+ $output .= " in $error->file";
+ }
+ else {
+ $output .= " in Entity,";
+ }
+
+ $output .= " line $error->line";
+
+ error_log($output);
+ }
+
+ /**
+ * Convert json to XML.
+ */
+ private static function jsonToXml($json, $root_node_name) {
+ // Apparently the ObjectNormalizer used afterwards is not able to handle
+ // the stdClass object created by json_decode() with the default setting
+ // $assoc = false; so use $assoc = true.
+ $data = json_decode($json, $assoc = TRUE);
+ if (!$data) {
+ return NULL;
+ }
+
+ $encoder = new XmlEncoder();
+ $normalizer = new ObjectNormalizer();
+ $serializer = new Serializer(array($normalizer), array($encoder));
+
+ $serializer_options = array(
+ 'xml_encoding' => "UTF-8",
+ 'xml_format_output' => TRUE,
+ 'xml_root_node_name' => $root_node_name,
+ );
+
+ $xml_data = $serializer->serialize($data, 'xml', $serializer_options);
+ if (!$xml_data) {
+ trigger_error("Cannot serialize data", E_USER_ERROR);
+ return NULL;
+ }
+
+ return $xml_data;
+ }
+
+ /**
+ * Convert the Instagram content to XML.
+ */
+ private function getXmlInstagramCom($html) {
+ // Extract the json data from the html code.
+ $json_match_expr = '/window._sharedData = (.*);/';
+ $ret = preg_match($json_match_expr, $html, $matches);
+ if ($ret !== 1) {
+ trigger_error("Cannot match expression: $json_match_expr\n", E_USER_ERROR);
+ return NULL;
+ }
+
+ return Tweeper::jsonToXml($matches[1], 'instagram');
+ }
+
+ /**
+ * Make the Facebook HTML processable.
+ */
+ private function preprocessHtmlFacebookCom($html) {
+ $html = str_replace('<!--', '', $html);
+ $html = str_replace('-->', '', $html);
+ return $html;
+ }