4fdbca1600b521202ef4d44e966ed55acf0fcc24
[smooth-dl.git] / smooth-dl.py
1 #!/usr/bin/env python
2 #
3 # smooth-dl - download videos served using Smooth Streaming technology
4 #
5 # Copyright (C) 2010  Antonio Ospite <ospite@studenti.unina.it>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 #
21 # TODO:
22 #  - Handle HTTP errors:
23 #       "Connection reset by peer"
24 #       "Resource not  available"
25 #       "Gateway Time-out"
26 # - Support more Manifest formats:
27 #       WaveFormatEx attribute instead of PrivateCodecdata
28 #       'd' and other attributes in chunk element ('i', 's', 'q')
29 #
30 # basically, write a proper implementation of manifest parsing and chunk
31 # downloading
32
33 import os
34 import re
35 import sys
36 import xml.etree.ElementTree as etree
37 import urllib2
38 import struct
39 import tempfile
40 from optparse import OptionParser
41 from urlparse import urlparse, urlunparse
42
43 __description__ = "Download videos served using Smooth Streaming technology"
44 __version__ = "0.x"
45 __author_info__ = "Written by Antonio Ospite http://ao2.it"
46
47
48 def get_chunk_data(data):
49
50     moof_size = struct.unpack(">L", data[0:4])[0]
51     mdat_size = struct.unpack(">L", data[moof_size:moof_size + 4])[0]
52
53     data_start = moof_size + 4 + len('mdat')
54     data_size = mdat_size - 4 - len('mdat')
55
56     # print len(data[data_start:]), \
57     #        len(data[data_start:data_start + data_size]), data_size
58
59     assert len(data[data_start:]) == data_size
60
61     return data[data_start:data_start + data_size]
62
63
64 def hexstring_to_bytes(hex_string):
65     res = ""
66     for i in range(0, len(hex_string), 2):
67         res += chr(int(hex_string[i:i + 2], 16))
68
69     return res
70
71
72 def write_wav_header(out_file, fmt, codec_private_data, data_len):
73
74     extradata = hexstring_to_bytes(codec_private_data)
75
76     fmt['cbSize'] = len(extradata)
77     fmt_len = 18 + fmt['cbSize']
78     wave_len = len("WAVEfmt ") + 4 + fmt_len + len('data') + 4
79
80     out_file.write("RIFF")
81     out_file.write(struct.pack('<L', wave_len))
82     out_file.write("WAVEfmt ")
83     out_file.write(struct.pack('<L', fmt_len))
84     out_file.write(struct.pack('<H', fmt['wFormatTag']))
85     out_file.write(struct.pack('<H', fmt['nChannels']))
86     out_file.write(struct.pack('<L', fmt['nSamplesPerSec']))
87     out_file.write(struct.pack('<L', fmt['nAvgBytesPerSec']))
88     out_file.write(struct.pack('<H', fmt['nBlockAlign']))
89     out_file.write(struct.pack('<H', fmt['wBitsPerSample']))
90     out_file.write(struct.pack('<H', fmt['cbSize']))
91     out_file.write(extradata)
92     out_file.write("data")
93     out_file.write(struct.pack('<L', data_len))
94
95
96 def download_file(src_url, dest_file, mode):
97     try:
98         response = urllib2.urlopen(src_url)
99         data = response.read()
100     except urllib2.HTTPError:
101         sys.stderr.write("Error while dowloading URL: %s" % src_url)
102         raise
103
104     if dest_file:
105         f = open(dest_file, mode)
106         f.write(data)
107         f.close()
108
109     return data
110
111
112 def get_manifest(url, dest_dir=tempfile.gettempdir()):
113     """Returns the manifest and the new URL if this is changed"""
114
115     if not os.path.exists(dest_dir):
116         os.mkdir(dest_dir, 0755)
117
118     # Remove the querystring if present
119     manifest_url = urlunparse(urlparse(url)._replace(query=''))
120
121     if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
122         manifest_url += '/Manifest'
123
124     if manifest_url.startswith('http://'):
125         local_manifest_path = os.path.join(dest_dir, 'Manifest')
126         download_file(manifest_url, local_manifest_path, "w")
127     else:
128         local_manifest_path = url
129
130     manifest = etree.parse(local_manifest_path)
131
132     version = manifest.getroot().attrib['MajorVersion']
133     if version != "2":
134         raise Exception('Only Smooth Streaming version 2 supported')
135
136     try:
137         # if some intermediate client Manifest is used, like in Rai Replay
138         clip = manifest.find("Clip")
139         manifest_url = clip.attrib["Url"]
140         manifest = download_file(manifest_url, None, None)
141     except AttributeError:
142         pass
143
144     manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
145     base_url = manifest_pattern.sub("", manifest_url)
146
147     return (manifest, base_url)
148
149
150 def print_manifest_info(manifest):
151
152     streams = manifest.findall('.//StreamIndex')
153
154     for i, s in enumerate(streams):
155         stream_type = s.attrib["Type"]
156
157         print "Stream: %s Type: %s" % (i, stream_type)
158
159         print "\tQuality Levels:"
160         qualities = s.findall("QualityLevel")
161         for i, q in enumerate(qualities):
162             bitrate = q.attrib["Bitrate"]
163             fourcc = q.attrib["FourCC"]
164
165             if stream_type == "video":
166                 size = "%sx%s" % (q.attrib["MaxWidth"], q.attrib["MaxHeight"])
167                 print "\t%2s: %4s %10s @ %7s bps" % (i, fourcc, size, bitrate)
168             if stream_type == "audio":
169                 channels = q.attrib["Channels"]
170                 sampling_rate = q.attrib["SamplingRate"]
171                 bits_per_sample = q.attrib["BitsPerSample"]
172                 print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % \
173                     (i, fourcc, sampling_rate, bits_per_sample, channels,
174                      bitrate)
175
176     print
177
178
179 def get_chunk_quality_string(stream, quality_level):
180     quality = stream.findall("QualityLevel")[quality_level]
181     bitrate = quality.attrib["Bitrate"]
182
183     quality_attributes = quality.findall("CustomAttributes/Attribute")
184     custom_attributes = ""
185     for i in quality_attributes:
186         custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
187     custom_attributes = custom_attributes.rstrip(',')
188
189     # Assume URLs are in this form:
190     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
191     # or
192     # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
193     url = stream.attrib["Url"]
194
195     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
196     chunks_quality = chunks_quality.replace("{CustomAttributes}",
197                                             custom_attributes)
198
199     return chunks_quality
200
201
202 def get_chunk_name_string(stream, chunk_time):
203     url = stream.attrib["Url"]
204     chunk_name = url.split('/')[1].replace("{start time}", str(chunk_time))
205
206     return chunk_name
207
208
209 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
210
211     if not os.path.exists(dest_dir):
212         os.mkdir(dest_dir, 0755)
213
214     stream = manifest.findall('.//StreamIndex')[stream_index]
215
216     chunks_quality = get_chunk_quality_string(stream, quality_level)
217
218     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
219     if not os.path.exists(chunks_dest_dir):
220         os.mkdir(chunks_dest_dir, 0755)
221
222     chunks = stream.findall("c")
223     data_size = 0
224     print "\nDownloading Stream %d" % stream_index
225     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
226     sys.stdout.flush()
227
228     stream_duration = 0
229     for i, chunk in enumerate(chunks):
230
231         if "t" in chunk.attrib:
232             chunk_time = chunk.attrib["t"]
233         elif "d" in chunk.attrib:
234             chunk_time = stream_duration
235             stream_duration = chunk_time + int(chunk.attrib["d"])
236
237         chunk_name = get_chunk_name_string(stream, chunk_time)
238         chunk_file = os.path.join(dest_dir, chunks_quality, chunk_name)
239
240         if not os.path.exists(chunk_file):
241             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
242             data = download_file(chunk_url, chunk_file, "wb")
243         else:
244             f = open(chunk_file, "rb")
245             data = f.read()
246             f.close()
247
248         data_size += len(data)
249         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
250         sys.stdout.flush()
251     print "\tDownloaded size:", data_size
252
253
254 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
255                    dest_file_name, final_dest_file=None):
256
257     if final_dest_file is None:
258         final_dest_file = dest_file_name
259
260     stream = manifest.findall('.//StreamIndex')[stream_index]
261
262     chunks_quality = get_chunk_quality_string(stream, quality_level)
263
264     chunks_src_dir = os.path.join(src_dir, chunks_quality)
265
266     dest_file = open(dest_file_name, "wb")
267
268     chunks = stream.findall("c")
269     data_size = 0
270     print "\nRebuilding Stream %d" % stream_index
271     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
272     sys.stdout.flush()
273
274     stream_duration = 0
275     for i, chunk in enumerate(chunks):
276
277         if "t" in chunk.attrib:
278             chunk_time = chunk.attrib["t"]
279         elif "d" in chunk.attrib:
280             chunk_time = stream_duration
281             stream_duration = chunk_time + int(chunk.attrib["d"])
282
283         chunk_name = get_chunk_name_string(stream, chunk_time)
284         chunk_file = os.path.join(chunks_src_dir, chunk_name)
285
286         f = open(chunk_file, "rb")
287         data = get_chunk_data(f.read())
288         f.close()
289         dest_file.write(data)
290         data_size += len(data)
291         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
292         sys.stdout.flush()
293
294     # Add a nice WAV header
295     if stream.attrib['Type'] == "audio":
296         quality = stream.findall("QualityLevel")[quality_level]
297         codec_private_data = quality.attrib['CodecPrivateData']
298
299         fmt = {}
300         fmt['wFormatTag'] = int(quality.attrib['AudioTag'])
301         fmt['nChannels'] = int(quality.attrib['Channels'])
302         fmt['nSamplesPerSec'] = int(quality.attrib['SamplingRate'])
303         fmt['nAvgBytesPerSec'] = int(quality.attrib['Bitrate']) / 8
304         fmt['wBitsPerSample'] = int(quality.attrib['BitsPerSample'])
305         fmt['nBlockAlign'] = int(quality.attrib['PacketSize'])
306         fmt['cbSize'] = 0
307
308         f = open(final_dest_file, "wb")
309         write_wav_header(f, fmt, codec_private_data, data_size)
310         dest_file.close()
311         dest_file = open(dest_file_name, "rb")
312         f.write(dest_file.read())
313         f.close()
314         dest_file.close()
315
316     print
317     print "Stream %d, actual data size: %d\n" % (stream_index, data_size)
318
319
320 def calc_tracks_delay(manifest, stream1_index, stream2_index):
321     streams = manifest.findall('.//StreamIndex')
322
323     s1 = streams[stream1_index]
324     s2 = streams[stream2_index]
325
326     if "TimeScale" not in s1 or "TimeScale" not in s2:
327         return 0
328
329     s1_start_chunk = s1.find("c")
330     s2_start_chunk = s2.find("c")
331
332     if "t" not in s1_start_chunk.attrib \
333        or "t" not in s2_start_chunk.attrib:
334         return 0
335
336     s1_start_time = int(s1_start_chunk.attrib['t'])
337     s2_start_time = int(s2_start_chunk.attrib['t'])
338
339     s1_timescale = float(s1.attrib['TimeScale'])
340     s2_timescale = float(s2.attrib['TimeScale'])
341
342     # calc difference in seconds
343     delay = s2_start_time / s2_timescale - \
344         s1_start_time / s1_timescale
345
346     return delay
347
348
349 def get_clip_duration(manifest):
350     # TODO: use <Clip ClipBegin="" ClipEnd=""> if Duration is not available
351     duration = manifest.getroot().attrib['Duration']
352
353     return float(duration) / 10000000  # here is the default timescale
354
355
356 def smooth_download(url, manifest, dest_dir=tempfile.gettempdir(),
357                     video_stream_index=0, audio_stream_index=1,
358                     video_quality_level=0, audio_quality_level=0,
359                     chunks_dir=None, download=True,
360                     out_video_file='_video.vc1', out_audio_file='_audio.raw'):
361
362     if chunks_dir is None:
363         chunks_dir = dest_dir
364
365     if download:
366         download_chunks(url, manifest, video_stream_index,
367                         video_quality_level, chunks_dir)
368         download_chunks(url, manifest, audio_stream_index,
369                         audio_quality_level, chunks_dir)
370
371     dest_video = os.path.join(dest_dir, out_video_file)
372     dest_audio = os.path.join(dest_dir, out_audio_file)
373
374     rebuild_stream(manifest, video_stream_index, video_quality_level,
375                    chunks_dir, dest_video)
376     rebuild_stream(manifest, audio_stream_index, audio_quality_level,
377                    chunks_dir, dest_audio, dest_audio + '.wav')
378
379     # duration = get_clip_duration(manifest)
380
381     delay = calc_tracks_delay(manifest, video_stream_index,
382                               audio_stream_index)
383
384     # optionally encode audio to vorbis:
385     # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
386     mux_command = ("ffmpeg -i %s \\\n" +
387                    "  -itsoffset %f -async 1 -i %s \\\n" +
388                    "  -vcodec copy -acodec copy ffout.mkv") % \
389         (dest_video, delay, dest_audio + '.wav')
390
391     print mux_command
392
393
394 def options_parser():
395     version = "%%prog %s" % __version__
396     usage = "usage: %prog [options] <manifest URL or file>"
397     parser = OptionParser(usage=usage, version=version,
398                           description=__description__, epilog=__author_info__)
399     parser.add_option("-i", "--info",
400                       action="store_true", dest="info_only",
401                       default=False, help="print Manifest info and exit")
402     parser.add_option("-m", "--manifest-only",
403                       action="store_true", dest="manifest_only",
404                       default=False, help="download Manifest file and exit")
405     parser.add_option("-n", "--no-download",
406                       action="store_false", dest="download",
407                       default=True, help="disable downloading chunks")
408     parser.add_option("-s", "--sync-delay",
409                       action="store_true", dest="sync_delay",
410                       default=False, help="show the sync delay between the given streams and exit")
411     parser.add_option("-d", "--dest-dir", metavar="<dir>",
412                       dest="dest_dir", default=tempfile.gettempdir(),
413                       help="destination directory")
414     parser.add_option("-c", "--chunks-dir", metavar="<dir>",
415                       dest="chunks_dir", default=None,
416                       help="directory containing chunks, if different from destination dir")
417     parser.add_option("-v", "--video-stream", metavar="<n>",
418                       type="int", dest="video_stream_index", default=0,
419                       help="index of the video stream")
420     parser.add_option("-a", "--audio-stream", metavar="<n>",
421                       type="int", dest="audio_stream_index", default=1,
422                       help="index of the audio stream")
423     parser.add_option("-q", "--video-quality", metavar="<n>",
424                       type="int", dest="video_quality_level", default=0,
425                       help="index of the video quality level")
426     parser.add_option("-Q", "--audio-quality", metavar="<n>",
427                       type="int", dest="audio_quality_level", default=0,
428                       help="index of the audio quality level")
429
430     return parser
431
432
433 def main():
434     parser = options_parser()
435     (options, args) = parser.parse_args()
436
437     if len(args) != 1:
438         parser.print_help()
439         parser.exit(1)
440
441     url = args[0]
442     manifest, url = get_manifest(url, options.dest_dir)
443
444     if options.manifest_only:
445         parser.exit(0)
446
447     if options.sync_delay:
448         print calc_tracks_delay(manifest,
449                                 options.video_stream_index,
450                                 options.audio_stream_index)
451         parser.exit(0)
452
453     if options.info_only:
454         print_manifest_info(manifest)
455         parser.exit(0)
456
457     print_manifest_info(manifest)
458
459     smooth_download(url, manifest, options.dest_dir,
460                     options.video_stream_index, options.audio_stream_index,
461                     options.video_quality_level, options.audio_quality_level,
462                     options.chunks_dir, options.download)
463
464
465 if __name__ == "__main__":
466     main()