-// direct_download_links - Add direct download links for (video) files
-// version 0.2
-// 2011-11-14
-// Copyright (C) 2011 Antonio Ospite <ospite@studenti.unina.it>
+// direct_download_links - Add direct download links
+// version 0.4
+// 2012-02-12
+// Copyright (C) 2011,2012 Antonio Ospite <ospite@studenti.unina.it>
// Released under the GPL license
// http://www.gnu.org/copyleft/gpl.html
//
// ==UserScript==
// @name Direct Download Links
// @namespace http://git.ao2.it/GM_direct_download_links.git
-// @description Add direct download links for (video) files
+// @description Add direct download links
+// @grant GM_log
+// @grant GM_xmlhttpRequest
// @include http://video.repubblica.it/*
// @include http://tv.repubblica.it/*
// @include http://trovacinema.repubblica.it/*
// @include http://www.kataweb.it/tvzap/*
+// @include http://www.rai.tv/*
+// @include http://soundcloud.com/*
+// @include http://www.telecinco.es/*
+// @include http://slideshare.net/*
+// @include http://www.slideshare.net/*
// ==/UserScript==
+//
/*
* TODO:
* - find a way to use the same string as in the @include lines to match the
- * current window.location
- * - use xpath instead of regexp like in http://a32.me/2009/11/greasemonkey/
+ * current window.location. Look for something like GM_testUrl() which builds
+ * the regexp starting from a glob line.
* - use jquery, like shown in http://a32.me/2009/11/greasemonkey/
+ * - Support the "download" attribute for anchors:
+ * http://www.whatwg.org/specs/web-apps/current-work/multipage/links.html#downloading-resources
*/
+/* Fields supported by the "site" object.
+ *
+ * Manadatory fields:
+ * locationRegExp: the regexp describing the URL of the page we are modifying
+ * urlContainerXPath: the XPath of the element containing the URL to link
+ * urlRegexp: the regular expression for finding the URL, the first
+ * sub-pattern is taken as the URL
+ * linkDestXPath: the XPath of the element where to place the Direct Download link
+ *
+ *
+ * Optional fields:
+ *
+ * initCommand: a function called before the regExp is matched, this can
+ * be useful in cases when some action needs to be done in
+ * order to make the element containing the regExp be actually
+ * rendered. It must accept a 'site' parameter.
+ *
+ * onEvent: used to delay the urlRegexp matching to a certain event like
+ * 'DOMNodeInserted' useful when the URL is added by some javascript
+ * library. It has two fields:
+ *
+ * evt: the event we want to wait for (e.g. 'DOMNodeInserted')
+ *
+ * targetElement: the element in the event handler we want the
+ * urlRegexp is performed on.
+ *
+ * processURL: a function to process the URL before adding the Direct
+ * Downdload Link to the page, it must accept a 'site' and a
+ * 'URL' parameters and dispatch the UrlFetched to pass the
+ * modified URL to _add_link().
+ *
+ */
var supported_sites = [
{
locationRegexp: /^http:\/\/video\.repubblica\.it\/.*$/,
- fileElem: 'contA',
- fileRegexp: /'pcUrl', '((http|mms):\/\/[^']*)'/,
- linkDest: 'contA',
+ urlContainerXPath: '//div[@id="contA"]',
+ urlRegexp: /[^\/]addParam\('format', '[^']*', '((http|mms):\/\/[^']*)'/,
+ linkDestXPath: '//div[@id="contA"]',
},
{
locationRegexp: /^http:\/\/tv\.repubblica\.it\/.*$/,
- fileElem: 'boxPlayer',
- fileRegexp: /'pcUrl', '((http|mms):\/\/[^']*)'/,
+ urlContainerXPath: '//div[@id="boxPlayer"]',
+ urlRegexp: /'pcUrl', '((http|mms):\/\/[^']*)'/,
linkDest: 'box_embed',
+ linkDestXPath: '//div[@id="box_embed"]',
},
{
locationRegexp: /^http:\/\/trovacinema\.repubblica\.it\/.*$/,
- fileElem: 'col-center',
- fileRegexp: /'flvUrl', '((http|mms):\/\/[^']*)'/,
- linkDest: 'col-center',
+ urlContainerXPath: '//div[@id="col-center"]',
+ urlRegexp: /'flvUrl', '((http|mms):\/\/[^']*)'/,
+ linkDestXPath: '//div[@id="col-center"]',
},
{
locationRegexp: /^http:\/\/www\.kataweb\.it\/tvzap\/.*$/,
- fileElem: 'tvzap_video',
- fileRegexp: /'pcUrl', '((http|mms):\/\/[^']*)'/,
- linkDest: 'playerCont',
+ urlContainerXPath: '//div[@id="contAB"]',
+ urlRegexp: /[^\/]addParam\('pcUrl', '((http|mms):\/\/[^']*)'/,
+ linkDestXPath: '//div[@id="contAB"]',
+ },
+ {
+ locationRegexp: /^http:\/\/www\.rai\.tv\/.*$/,
+ initCommand: function(site) {
+ unsafeWindow.Silverlight.isInstalled = function(version) {
+ return true;
+ };
+ },
+ urlContainerXPath: '//div[@id="silverlightControlHost" or @id="SilverlightPlayer"]',
+ urlRegexp: /mediaUri=(http:\/\/[^,"]*)/,
+ onEvent: { evt: 'DOMNodeInserted', targetElement: 'object' },
+ processURL: _rai_get_actual_url,
+ linkDestXPath: '//div[@id="silverlightControlHost" or @id="SilverlightPlayer"]',
+ },
+ {
+ locationRegexp: /^http:\/\/soundcloud.com\/.*$/,
+ urlContainerXPath: '//div[@id="main-content-inner"]',
+ urlRegexp: /"streamUrl":"([^"]*)"/,
+ linkDestXPath: '//div[@id="main-content-inner"]',
+ },
+ {
+ locationRegexp: /^http:\/\/www\.telecinco.es\/.*$/,
+ urlContainerXPath: '//video[@class="video-js"]',
+ urlRegexp: /src="([^"]*)"/,
+ linkDestXPath: '//div[@class="pg-bd"]',
+ },
+ {
+ locationRegexp: /^http:\/\/(www\.|)slideshare.net\/.*$/,
+ urlContainerXPath: '//script[@id="page-json"]',
+ urlRegexp: /"ppt_location":"([^"]*)"/,
+ processURL: function(site, object_id) {
+ var URL = 'http://s3.amazonaws.com/slideshare/' + object_id + '.xml';
+ var evt = document.createEvent('Event');
+ evt.initEvent('UrlFetched', true, true);
+ evt.site = site;
+ evt.URL = URL;
+ document.dispatchEvent(evt);
+ return;
+ },
+ linkDestXPath: '//div[@class="playerWrapper"]',
},
];
var result = window.location.href.match(site.locationRegexp);
if (result) {
- var ret = direct_download_link_add(window.location.href, site.fileElem, site.fileRegexp, site.linkDest);
- if (!ret) {
- alert('Cannot add the link');
+ if (site.initCommand) {
+ site.initCommand(site);
}
+ direct_download_link_add(window.location.href, site);
}
}
+function getElementByXPath(query, root) {
+ return document.evaluate(query, root || document, null, XPathResult.ANY_UNORDERED_NODE_TYPE, null).singleNodeValue;
+}
+
+/* from http://stackoverflow.com/questions/1912501 */
+function htmlDecode(input){
+ var e = document.createElement('div');
+ e.innerHTML = input;
+ return e.childNodes.length === 0 ? "" : e.childNodes[0].nodeValue;
+}
+
/**
- * Add a Direct Download link on the page for the specified file
+ * Add a Direct Download link on the page for the specified URL
*
- * @param pageURL: the URL of the page we are modifying
- * @param fileElem: the element containing the file URL
- * @param fileRegexp: the regular expression for finding the file, the first
- * sub-pattern is taken as the file URL
- * @param linkDest: the element where to place the Direct Download link
+ * @param: a 'site' object described above.
*
* @return: null on error, true on success
*/
-function direct_download_link_add(pageURL, fileElem, fileRegexp, linkDest) {
- var element = document.getElementById(fileElem);
+function direct_download_link_add(pageURL, site) {
+ site.pageURL = pageURL;
+ var element = getElementByXPath(site.urlContainerXPath);
if (!element) {
- alert('DirectDL (' + pageURL + '): Cannot find the element ' + fileElem + ' containing the file URL.');
+ DDL_log('DirectDL (' + site.pageURL + '): Cannot find the element ' + site.urlContainerXPath + ' containing the URL.');
return null;
}
+ document.addEventListener('UrlFetched', _add_link, true);
+
+ // This is used for sites adding the URL to the DOM after DOMContentLoaded,
+ // for example by some javascript library (like Silverlight.js on rai.tv).
+ if (site.onEvent) {
+ element.addEventListener(site.onEvent.evt, function(e) {
+ if (site.onEvent.targetElement &&
+ e.target.tagName.toLowerCase() != site.onEvent.targetElement) {
+ DDL_log('DirectDL (' + site.pageURL + '): skipping element ' + e.target.tagName);
+ return;
+ }
+ _get_URL(site, element);
+ }, false);
+ return;
+ }
+
+ _get_URL(site, element);
+}
+
+function _get_URL(site, element) {
var content = element.innerHTML;
if (!content) {
- alert('DirectDL (' + pageURL + '): content is null, cannot find file URL.');
- return null;
+ DDL_log('DirectDL (' + site.pageURL + '): content is null, cannot find URL.');
+ return;
}
- var matches = content.match(fileRegexp);
+ var matches = content.match(site.urlRegexp);
if (!matches || matches.length < 2 || !matches[1]) {
- alert('DirectDL (' + pageURL + '): file URL not found, check the fileRegexp');
- return null;
+ DDL_log('DirectDL (' + site.pageURL + '): URL not found, check the urlRegexp');
+ return;
+ }
+ var URL = matches[1];
+ if (!URL) {
+ DDL_log('DirectDL (' + site.pageURL + '): cannot get the URL.');
+ return;
}
- var fileURL = matches[1];
- var links = document.getElementById(linkDest);
- if (!links) {
- alert('DirectDl (' + pageURL + '): Cannot add the direct download link.');
- return null;
+ if (site.processURL) {
+ site.processURL(site, URL);
+ return;
+ }
+
+ var evt = document.createEvent('Event');
+ evt.initEvent('UrlFetched', true, true);
+ evt.site = site;
+ evt.URL = htmlDecode(URL);
+ document.dispatchEvent(evt);
+}
+
+function _add_link(e) {
+ var site = e.site;
+ var URL = e.URL;;
+
+ var destination = getElementByXPath(site.linkDestXPath);
+ if (!destination) {
+ DDL_log('DirectDl (' + site.pageURL + '): Cannot add the direct download link.');
+ return;
}
- var download_link = document.createElement('a');
- download_link.textContent = 'Direct Link';
- download_link.setAttribute('href', fileURL);
- var style = 'background-color: white; color: blue;';
- style += ' border: 2px solid red;'
- style += ' float: right; font-size: large;';
- style += ' padding: .5em; margin: 1em;'
- download_link.setAttribute('style', style);
+ // Check if we added the link already, if so just update the href attribute.
+ // This is useful when _get_URL() is called on async events.
+ var download_link = document.getElementById('GM_direct_downaload_link');
+ if (download_link) {
+ download_link.setAttribute('href', URL);
+ } else {
+ download_link = document.createElement('a');
+ download_link.textContent = 'Direct Link';
+ download_link.setAttribute('id', 'GM_direct_downaload_link');
+ download_link.setAttribute('href', URL);
+ var style = 'background-color: white; color: blue;';
+ style += ' border: 2px solid red;'
+ style += ' float: right; font-size: large;';
+ style += ' padding: .5em; margin: 1em;'
+ style += ' position: relative; z-index: 1000;'
+ download_link.setAttribute('style', style);
+
+ destination.insertBefore(download_link, destination.firstChild);
+ }
+}
+
+function DDL_log(message) {
+ var debug = false;
+ if (debug) {
+ alert(message)
+ } else {
+ GM_log(message);
+ }
+}
+
+function _rai_get_actual_url(site, URL) {
+
+ // SmoothStreaming manifest files get added without processing, for now:
+ if (URL.match(/.*\.csm$/)) {
+ var evt = document.createEvent('Event');
+ evt.initEvent('UrlFetched', true, true);
+ evt.site = site;
+ evt.URL = URL;
+ document.dispatchEvent(evt);
+ return;
+ }
+
+ // http://www.neaveru.com/wordpress/index.php/2008/05/09/greasemonkey-bug-domnodeinserted-event-doesnt-allow-gm_xmlhttprequest/
+ setTimeout( function() {
+ GM_xmlhttpRequest({
+ method: "GET",
+ // XXX A custom header. This is the "clever" trick Rai uses to ensure
+ // the content is accessed by www.rai.tv only...
+ headers: {'viaurl': 'www.rai.tv'},
+ url: URL,
+ onload: function(response) {
+ text = response.responseText;
+ text = text.replace(/&/g, '&')
+ parser = new DOMParser();
+ xmlDoc = parser.parseFromString(text, "text/xml");
+
+ // MMS streams
+ elems = xmlDoc.getElementsByTagName('REF');
+ if (elems.length > 0) {
+ href = elems[0].getAttribute('HREF');;
- links.insertBefore(download_link, links.firstChild);
+ var evt = document.createEvent('Event');
+ evt.initEvent('UrlFetched', true, true);
+ evt.site = site;
+ evt.URL = href;
+ document.dispatchEvent(evt);
+ }
+ // SmoothStreaming streams
+ elems = xmlDoc.getElementsByTagName('playListItem');
+ if (elems.length > 0) {
+ href = elems[0].getAttribute('mediaSource');;
- return true;
+ var evt = document.createEvent('Event');
+ evt.initEvent('UrlFetched', true, true);
+ evt.site = site;
+ evt.URL = href;
+ document.dispatchEvent(evt);
+ }
+ }
+ });
+ }, 0);
}