direct_download_links.user.js

   1 // direct_download_links - Add direct download links
   2 // version 0.4
   3 // 2012-02-12
   4 // Copyright (C) 2011,2012  Antonio Ospite <ospite@studenti.unina.it>
   5 // Released under the GPL license
   6 // http://www.gnu.org/copyleft/gpl.html
   7 //
   8 // --------------------------------------------------------------------
   9 //
  10 // This is a Greasemonkey user script.
  11 //
  12 // To install, you need Greasemonkey: https://addons.mozilla.org/en-US/firefox/addon/748
  13 // Then restart Firefox and revisit this script.
  14 // Under Tools, there will be a new menu item to "Install User Script".
  15 // Accept the default configuration and install.
  16 //
  17 // To uninstall, go to Tools/Manage User Scripts,
  18 // select "Direct Download Links", and click Uninstall.
  19 //
  20 // --------------------------------------------------------------------
  21 //
  22 // ==UserScript==
  23 // @name           Direct Download Links
  24 // @namespace      http://git.ao2.it/GM_direct_download_links.git
  25 // @description    Add direct download links
  26 // @grant          GM_log
  27 // @grant          GM_xmlhttpRequest
  28 // @include        http://video.repubblica.it/*
  29 // @include        http://tv.repubblica.it/*
  30 // @include        http://trovacinema.repubblica.it/*
  31 // @include        http://www.kataweb.it/tvzap/*
  32 // @include        http://www.rai.tv/*
  33 // @include        http://soundcloud.com/*
  34 // @include        http://www.telecinco.es/*
  35 // ==/UserScript==
  36 //
  37
  38 /*
  39  * TODO:
  40  *  - find a way to use the same string as in the @include lines to match the
  41  *    current window.location. Look for something like GM_testUrl() which builds
  42  *    the regexp starting from a glob line.
  43  *  - use jquery, like shown in http://a32.me/2009/11/greasemonkey/
  44  *  - Support the "download" attribute for anchors:
  45  *    http://www.whatwg.org/specs/web-apps/current-work/multipage/links.html#downloading-resources
  46  */
  47
  48 /* Fields supported by the "site" object.
  49  *
  50  * Manadatory fields:
  51  *   locationRegExp: the regexp describing the URL of the page we are modifying
  52  *   urlContainerXPath: the XPath of the element containing the URL to link
  53  *   urlRegexp: the regular expression for finding the URL, the first
  54  *              sub-pattern is taken as the URL
  55  *   linkDestXPath: the XPath of the element where to place the Direct Download link
  56  *
  57  *
  58  * Optional fields:
  59  *
  60  *   initCommand: a function called before the regExp is matched, this can
  61  *                be useful in cases when some action needs to be done in
  62  *                order to make the element containing the regExp be actually
  63  *                rendered. It must accept  a 'site' parameter.
  64  *
  65  *   onEvent: used to delay the urlRegexp matching to a certain event like
  66  *            'DOMNodeInserted' useful when the URL is added by some javascript
  67  *            library. It has two fields:
  68  *
  69  *              evt: the event we want to wait for (e.g. 'DOMNodeInserted')
  70  *
  71  *              targetElement: the element in the event handler we want the
  72  *                urlRegexp is performed on.
  73  *
  74  *  processURL: a function to process the URL before adding the Direct
  75  *              Downdload Link to the page, it must accept  a 'site' and a
  76  *              'URL' parameters and dispatch the UrlFetched to pass the
  77  *              modified URL to _add_link().
  78  *
  79  */
  80 var supported_sites = [
  81   {
  82     locationRegexp: /^http:\/\/video\.repubblica\.it\/.*$/,
  83     urlContainerXPath: '//div[@id="contA"]',
  84     urlRegexp: /[^\/]addParam\('format', '[^']*', '((http|mms):\/\/[^']*)'/,
  85     linkDestXPath: '//div[@id="contA"]',
  86   },
  87   {
  88     locationRegexp: /^http:\/\/tv\.repubblica\.it\/.*$/,
  89     urlContainerXPath: '//div[@id="boxPlayer"]',
  90     urlRegexp: /'pcUrl', '((http|mms):\/\/[^']*)'/,
  91     linkDest: 'box_embed',
  92     linkDestXPath: '//div[@id="box_embed"]',
  93   },
  94   {
  95     locationRegexp: /^http:\/\/trovacinema\.repubblica\.it\/.*$/,
  96     urlContainerXPath: '//div[@id="col-center"]',
  97     urlRegexp: /'flvUrl', '((http|mms):\/\/[^']*)'/,
  98     linkDestXPath: '//div[@id="col-center"]',
  99   },
 100   {
 101     locationRegexp: /^http:\/\/www\.kataweb\.it\/tvzap\/.*$/,
 102     urlContainerXPath: '//div[@id="tvzap_video"]',
 103     urlRegexp: /'pcUrl', '((http|mms):\/\/[^']*)'/,
 104     linkDestXPath: '//div[@id="tvzap_video"]',
 105   },
 106   {
 107     locationRegexp: /^http:\/\/www\.rai\.tv\/.*$/,
 108     initCommand: function(site) {
 109       unsafeWindow.Silverlight.isInstalled = function(version) {
 110         return true;
 111       };
 112     },
 113     urlContainerXPath: '//div[@id="silverlightControlHost" or @id="SilverlightPlayer"]',
 114     urlRegexp: /mediaUri=(http:\/\/[^,]*)/,
 115     onEvent: { evt: 'DOMNodeInserted', targetElement: 'object' },
 116     processURL: _rai_get_actual_url,
 117     linkDestXPath: '//div[@id="silverlightControlHost" or @id="SilverlightPlayer"]',
 118   },
 119   {
 120     locationRegexp: /^http:\/\/soundcloud.com\/.*$/,
 121     urlContainerXPath: '//div[@id="main-content-inner"]',
 122     urlRegexp: /"streamUrl":"([^"]*)"/,
 123     linkDestXPath: '//div[@id="main-content-inner"]',
 124   },
 125   {
 126     locationRegexp: /^http:\/\/www\.telecinco.es\/.*$/,
 127     urlContainerXPath: '//video[@class="video-js"]',
 128     urlRegexp: /src="([^"]*)"/,
 129     linkDestXPath: '//div[@class="pg-bd"]',
 130   },
 131 ];
 132
 133 /* Apply different rules to different sites */
 134 for (i = 0; i < supported_sites.length; i++) {
 135   var site = supported_sites[i];
 136
 137   var result = window.location.href.match(site.locationRegexp);
 138   if (result) {
 139     if (site.initCommand) {
 140       site.initCommand(site);
 141     }
 142     direct_download_link_add(window.location.href, site);
 143   }
 144 }
 145
 146 function getElementByXPath(query, root) {
 147   return document.evaluate(query, root || document, null, XPathResult.ANY_UNORDERED_NODE_TYPE, null).singleNodeValue;
 148 }
 149
 150 /* from http://stackoverflow.com/questions/1912501 */
 151 function htmlDecode(input){
 152   var e = document.createElement('div');
 153   e.innerHTML = input;
 154   return e.childNodes.length === 0 ? "" : e.childNodes[0].nodeValue;
 155 }
 156
 157 /**
 158  * Add a Direct Download link on the page for the specified URL
 159  *
 160  * @param: a 'site' object described above.
 161  *
 162  * @return: null on error, true on success
 163  */
 164 function direct_download_link_add(pageURL, site) {
 165   site.pageURL = pageURL;
 166   var element = getElementByXPath(site.urlContainerXPath);
 167   if (!element) {
 168     DDL_log('DirectDL (' + site.pageURL  + '): Cannot find the element ' + site.urlContainerXPath + ' containing the URL.');
 169     return null;
 170   }
 171
 172   document.addEventListener('UrlFetched', _add_link, true);
 173
 174   // This is used for sites adding the URL to the DOM after DOMContentLoaded,
 175   // for example by some javascript library (like Silverlight.js on rai.tv).
 176   if (site.onEvent) {
 177     element.addEventListener(site.onEvent.evt, function(e) {
 178       if (site.onEvent.targetElement &&
 179           e.target.tagName.toLowerCase() != site.onEvent.targetElement) {
 180         DDL_log('DirectDL (' + site.pageURL  + '): skipping element ' + e.target.tagName);
 181         return;
 182       }
 183      _get_URL(site, element);
 184     }, false);
 185     return;
 186   }
 187
 188   _get_URL(site, element);
 189 }
 190
 191 function _get_URL(site, element) {
 192   var content = element.innerHTML;
 193   if (!content) {
 194     DDL_log('DirectDL (' + site.pageURL + '): content is null, cannot find URL.');
 195     return;
 196   }
 197
 198   var matches = content.match(site.urlRegexp);
 199   if (!matches || matches.length < 2 || !matches[1]) {
 200       DDL_log('DirectDL (' + site.pageURL + '): URL not found, check the urlRegexp');
 201       return;
 202   }
 203   var URL = matches[1];
 204   if (!URL) {
 205     DDL_log('DirectDL (' + site.pageURL + '): cannot get the URL.');
 206     return;
 207   }
 208
 209   if (site.processURL) {
 210     site.processURL(site, URL);
 211     return;
 212   }
 213
 214   var evt = document.createEvent('Event');
 215   evt.initEvent('UrlFetched', true, true);
 216   evt.site = site;
 217   evt.URL = htmlDecode(URL);
 218   document.dispatchEvent(evt);
 219 }
 220
 221 function _add_link(e) {
 222   var site = e.site;
 223   var URL = e.URL;;
 224
 225   var destination = getElementByXPath(site.linkDestXPath);
 226   if (!destination) {
 227     DDL_log('DirectDl (' + site.pageURL + '): Cannot add the direct download link.');
 228     return;
 229   }
 230
 231   // Check if we added the link already, if so just update the href attribute.
 232   // This is useful when _get_URL() is called on async events.
 233   var download_link = document.getElementById('GM_direct_downaload_link');
 234   if (download_link) {
 235     download_link.setAttribute('href', URL);
 236   } else {
 237     download_link = document.createElement('a');
 238     download_link.textContent = 'Direct Link';
 239     download_link.setAttribute('id', 'GM_direct_downaload_link');
 240     download_link.setAttribute('href', URL);
 241     var style = 'background-color: white; color: blue;';
 242     style += ' border: 2px solid red;'
 243     style += ' float: right; font-size: large;';
 244     style += ' padding: .5em; margin: 1em;'
 245     style += ' position: relative; z-index: 1000;'
 246     download_link.setAttribute('style', style);
 247
 248     destination.insertBefore(download_link, destination.firstChild);
 249   }
 250 }
 251
 252 function DDL_log(message) {
 253   var debug = false;
 254   if (debug) {
 255     alert(message)
 256   } else {
 257     GM_log(message);
 258   }
 259 }
 260
 261 function _rai_get_actual_url(site, URL) {
 262
 263   // SmoothStreaming manifest files get added without processing, for now:
 264   if (URL.match(/.*\.csm$/)) {
 265     var evt = document.createEvent('Event');
 266     evt.initEvent('UrlFetched', true, true);
 267     evt.site = site;
 268     evt.URL = URL;
 269     document.dispatchEvent(evt);
 270     return;
 271   }
 272
 273   // http://www.neaveru.com/wordpress/index.php/2008/05/09/greasemonkey-bug-domnodeinserted-event-doesnt-allow-gm_xmlhttprequest/
 274   setTimeout( function() {
 275     GM_xmlhttpRequest({
 276       method: "GET",
 277       // XXX A custom header. This is the "clever" trick Rai uses to ensure
 278       // the content is accessed by www.rai.tv only...
 279       headers: {'viaurl': 'www.rai.tv'},
 280       url: URL,
 281       onload: function(response) {
 282         text = response.responseText;
 283         text = text.replace(/&/g, '&amp;')
 284         parser = new DOMParser();
 285         xmlDoc = parser.parseFromString(text, "text/xml");
 286
 287         // MMS streams
 288         elems = xmlDoc.getElementsByTagName('REF');
 289         if (elems.length > 0) {
 290           href = elems[0].getAttribute('HREF');;
 291
 292           var evt = document.createEvent('Event');
 293           evt.initEvent('UrlFetched', true, true);
 294           evt.site = site;
 295           evt.URL = href;
 296           document.dispatchEvent(evt);
 297         }
 298         // SmoothStreaming streams
 299         elems = xmlDoc.getElementsByTagName('playListItem');
 300         if (elems.length > 0) {
 301           href = elems[0].getAttribute('mediaSource');;
 302
 303           var evt = document.createEvent('Event');
 304           evt.initEvent('UrlFetched', true, true);
 305           evt.site = site;
 306           evt.URL = href;
 307           document.dispatchEvent(evt);
 308         }
 309       }
 310     });
 311   }, 0);
 312 }