direct_download_links.user.js

   1 // direct_download_links - Add direct download links
   2 // version 0.4
   3 // 2012-02-12
   4 // Copyright (C) 2011,2012  Antonio Ospite <ospite@studenti.unina.it>
   5 // Released under the GPL license
   6 // http://www.gnu.org/copyleft/gpl.html
   7 //
   8 // --------------------------------------------------------------------
   9 //
  10 // This is a Greasemonkey user script.
  11 //
  12 // To install, you need Greasemonkey: https://addons.mozilla.org/en-US/firefox/addon/748
  13 // Then restart Firefox and revisit this script.
  14 // Under Tools, there will be a new menu item to "Install User Script".
  15 // Accept the default configuration and install.
  16 //
  17 // To uninstall, go to Tools/Manage User Scripts,
  18 // select "Direct Download Links", and click Uninstall.
  19 //
  20 // --------------------------------------------------------------------
  21 //
  22 // ==UserScript==
  23 // @name           Direct Download Links
  24 // @namespace      http://git.ao2.it/GM_direct_download_links.git
  25 // @description    Add direct download links
  26 // @include        http://video.repubblica.it/*
  27 // @include        http://tv.repubblica.it/*
  28 // @include        http://trovacinema.repubblica.it/*
  29 // @include        http://www.kataweb.it/tvzap/*
  30 // @include        http://www.rai.tv/*
  31 // @include        http://soundcloud.com/*
  32 // ==/UserScript==
  33 //
  34
  35 /*
  36  * TODO:
  37  *  - find a way to use the same string as in the @include lines to match the
  38  *    current window.location. Look for something like GM_testUrl() which builds
  39  *    the regexp starting from a glob line.
  40  *  - use jquery, like shown in http://a32.me/2009/11/greasemonkey/
  41  *  - Support the "download" attribute for anchors:
  42  *    http://www.whatwg.org/specs/web-apps/current-work/multipage/links.html#downloading-resources
  43  */
  44
  45 /* Fields supported by the "site" object.
  46  *
  47  * Manadatory fields:
  48  *   locationRegExp: the regexp describing the URL of the page we are modifying
  49  *   urlContainerXPath: the XPath of the element containing the URL to link
  50  *   urlRegexp: the regular expression for finding the URL, the first
  51  *              sub-pattern is taken as the URL
  52  *   linkDestXPath: the XPath of the element where to place the Direct Download link
  53  *
  54  *
  55  * Optional fields:
  56  *
  57  *   initCommand: a function called before the regExp is matched, this can
  58  *                be useful in cases when some action needs to be done in
  59  *                order to make the element containing the regExp be actually
  60  *                rendered. It must accept  a 'site' parameter.
  61  *
  62  *   onEvent: used to delay the urlRegexp matching to a certain event like
  63  *            'DOMNodeInserted' useful when the URL is added by some javascript
  64  *            library. It has two fields:
  65  *
  66  *              evt: the event we want to wait for (e.g. 'DOMNodeInserted')
  67  *
  68  *              targetElement: the element in the event handler we want the
  69  *                urlRegexp is performed on.
  70  *
  71  *  processURL: a function to process the URL before adding the Direct
  72  *              Downdload Link to the page, it must accept  a 'site' and a
  73  *              'URL' parameters and dispatch the UrlFetched to pass the
  74  *              modified URL to _add_link().
  75  *
  76  */
  77 var supported_sites = [
  78   {
  79     locationRegexp: /^http:\/\/video\.repubblica\.it\/.*$/,
  80     urlContainerXPath: '//div[@id="contA"]',
  81     urlRegexp: /[^\/]addParam\('format', '[^']*', '((http|mms):\/\/[^']*)'/,
  82     linkDestXPath: '//div[@id="contA"]',
  83   },
  84   {
  85     locationRegexp: /^http:\/\/tv\.repubblica\.it\/.*$/,
  86     urlContainerXPath: '//div[@id="boxPlayer"]',
  87     urlRegexp: /'pcUrl', '((http|mms):\/\/[^']*)'/,
  88     linkDest: 'box_embed',
  89     linkDestXPath: '//div[@id="box_embed"]',
  90   },
  91   {
  92     locationRegexp: /^http:\/\/trovacinema\.repubblica\.it\/.*$/,
  93     urlContainerXPath: '//div[@id="col-center"]',
  94     urlRegexp: /'flvUrl', '((http|mms):\/\/[^']*)'/,
  95     linkDestXPath: '//div[@id="col-center"]',
  96   },
  97   {
  98     locationRegexp: /^http:\/\/www\.kataweb\.it\/tvzap\/.*$/,
  99     urlContainerXPath: '//div[@id="tvzap_video"]',
 100     urlRegexp: /'pcUrl', '((http|mms):\/\/[^']*)'/,
 101     linkDestXPath: '//div[@id="tvzap_video"]',
 102   },
 103   {
 104     locationRegexp: /^http:\/\/www\.rai\.tv\/.*$/,
 105     initCommand: function(site) {
 106       unsafeWindow.Silverlight.isInstalled = function(version) {
 107         return true;
 108       };
 109     },
 110     urlContainerXPath: '//div[@id="silverlightControlHost" or @id="SilverlightPlayer"]',
 111     urlRegexp: /mediaUri=(http:\/\/[^,]*)/,
 112     onEvent: { evt: 'DOMNodeInserted', targetElement: 'object' },
 113     processURL: _rai_get_actual_url,
 114     linkDestXPath: '//div[@id="silverlightControlHost" or @id="SilverlightPlayer"]',
 115   },
 116   {
 117     locationRegexp: /^http:\/\/soundcloud.com\/.*$/,
 118     urlContainerXPath: '//div[@id="main-content-inner"]',
 119     urlRegexp: /"streamUrl":"([^"]*)"/,
 120     linkDestXPath: '//div[@id="main-content-inner"]',
 121   },
 122 ];
 123
 124 /* Apply different rules to different sites */
 125 for (i = 0; i < supported_sites.length; i++) {
 126   var site = supported_sites[i];
 127
 128   var result = window.location.href.match(site.locationRegexp);
 129   if (result) {
 130     if (site.initCommand) {
 131       site.initCommand(site);
 132     }
 133     direct_download_link_add(window.location.href, site);
 134   }
 135 }
 136
 137 function getElementByXPath(query, root) {
 138   return document.evaluate(query, root || document, null, XPathResult.ANY_UNORDERED_NODE_TYPE, null).singleNodeValue;
 139 }
 140
 141 /* from http://stackoverflow.com/questions/1912501 */
 142 function htmlDecode(input){
 143   var e = document.createElement('div');
 144   e.innerHTML = input;
 145   return e.childNodes.length === 0 ? "" : e.childNodes[0].nodeValue;
 146 }
 147
 148 /**
 149  * Add a Direct Download link on the page for the specified URL
 150  *
 151  * @param: a 'site' object described above.
 152  *
 153  * @return: null on error, true on success
 154  */
 155 function direct_download_link_add(pageURL, site) {
 156   site.pageURL = pageURL;
 157   var element = getElementByXPath(site.urlContainerXPath);
 158   if (!element) {
 159     DDL_log('DirectDL (' + site.pageURL  + '): Cannot find the element ' + site.urlContainerXPath + ' containing the URL.');
 160     return null;
 161   }
 162
 163   document.addEventListener('UrlFetched', _add_link, true);
 164
 165   // This is used for sites adding the URL to the DOM after DOMContentLoaded,
 166   // for example by some javascript library (like Silverlight.js on rai.tv).
 167   if (site.onEvent) {
 168     element.addEventListener(site.onEvent.evt, function(e) {
 169       if (site.onEvent.targetElement &&
 170           e.target.tagName.toLowerCase() != site.onEvent.targetElement) {
 171         DDL_log('DirectDL (' + site.pageURL  + '): skipping element ' + e.target.tagName);
 172         return;
 173       }
 174      _get_URL(site, element);
 175     }, false);
 176     return;
 177   }
 178
 179   _get_URL(site, element);
 180 }
 181
 182 function _get_URL(site, element) {
 183   var content = element.innerHTML;
 184   if (!content) {
 185     DDL_log('DirectDL (' + site.pageURL + '): content is null, cannot find URL.');
 186     return;
 187   }
 188
 189   var matches = content.match(site.urlRegexp);
 190   if (!matches || matches.length < 2 || !matches[1]) {
 191       DDL_log('DirectDL (' + site.pageURL + '): URL not found, check the urlRegexp');
 192       return;
 193   }
 194   var URL = matches[1];
 195   if (!URL) {
 196     DDL_log('DirectDL (' + site.pageURL + '): cannot get the URL.');
 197     return;
 198   }
 199
 200   if (site.processURL) {
 201     site.processURL(site, URL);
 202     return;
 203   }
 204
 205   var evt = document.createEvent('Event');
 206   evt.initEvent('UrlFetched', true, true);
 207   evt.site = site;
 208   evt.URL = htmlDecode(URL);
 209   document.dispatchEvent(evt);
 210 }
 211
 212 function _add_link(e) {
 213   var site = e.site;
 214   var URL = e.URL;;
 215
 216   var destination = getElementByXPath(site.linkDestXPath);
 217   if (!destination) {
 218     DDL_log('DirectDl (' + site.pageURL + '): Cannot add the direct download link.');
 219     return;
 220   }
 221
 222   // Check if we added the link already, if so just update the href attribute.
 223   // This is useful when _get_URL() is called on async events.
 224   var download_link = document.getElementById('GM_direct_downaload_link');
 225   if (download_link) {
 226     download_link.setAttribute('href', URL);
 227   } else {
 228     download_link = document.createElement('a');
 229     download_link.textContent = 'Direct Link';
 230     download_link.setAttribute('id', 'GM_direct_downaload_link');
 231     download_link.setAttribute('href', URL);
 232     var style = 'background-color: white; color: blue;';
 233     style += ' border: 2px solid red;'
 234     style += ' float: right; font-size: large;';
 235     style += ' padding: .5em; margin: 1em;'
 236     style += ' position: relative; z-index: 1000;'
 237     download_link.setAttribute('style', style);
 238
 239     destination.insertBefore(download_link, destination.firstChild);
 240   }
 241 }
 242
 243 function DDL_log(message) {
 244   var debug = false;
 245   if (debug) {
 246     alert(message)
 247   } else {
 248     GM_log(message);
 249   }
 250 }
 251
 252 function _rai_get_actual_url(site, URL) {
 253
 254   // SmoothStreaming manifest files get added without processing, for now:
 255   if (URL.match(/.*\.csm$/)) {
 256     var evt = document.createEvent('Event');
 257     evt.initEvent('UrlFetched', true, true);
 258     evt.site = site;
 259     evt.URL = URL;
 260     document.dispatchEvent(evt);
 261     return;
 262   }
 263
 264   // http://www.neaveru.com/wordpress/index.php/2008/05/09/greasemonkey-bug-domnodeinserted-event-doesnt-allow-gm_xmlhttprequest/
 265   setTimeout( function() {
 266     GM_xmlhttpRequest({
 267       method: "GET",
 268       // XXX A custom header. This is the "clever" trick Rai uses to ensure
 269       // the content is accessed by www.rai.tv only...
 270       headers: {'viaurl': 'www.rai.tv'},
 271       url: URL,
 272       onload: function(response) {
 273         text = response.responseText;
 274         text = text.replace(/&/g, '&amp;')
 275         parser = new DOMParser();
 276         xmlDoc = parser.parseFromString(text, "text/xml");
 277
 278         // MMS streams
 279         elems = xmlDoc.getElementsByTagName('REF');
 280         if (elems.length > 0) {
 281           href = elems[0].getAttribute('HREF');;
 282
 283           var evt = document.createEvent('Event');
 284           evt.initEvent('UrlFetched', true, true);
 285           evt.site = site;
 286           evt.URL = href;
 287           document.dispatchEvent(evt);
 288         }
 289         // SmoothStreaming streams
 290         elems = xmlDoc.getElementsByTagName('playListItem');
 291         if (elems.length > 0) {
 292           href = elems[0].getAttribute('mediaSource');;
 293
 294           var evt = document.createEvent('Event');
 295           evt.initEvent('UrlFetched', true, true);
 296           evt.site = site;
 297           evt.URL = href;
 298           document.dispatchEvent(evt);
 299         }
 300       }
 301     });
 302   }, 0);
 303 }