41dfc4c057d90cc287977cff180bf196eae7e300
[smooth-dl.git] / smooth-dl.py
1 #!/usr/bin/env python
2 #
3 # smooth-dl - download videos served using Smooth Streaming technology
4 #
5 # Copyright (C) 2010  Antonio Ospite <ospite@studenti.unina.it>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 #
21 # TODO:
22 #  - Handle HTTP errors:
23 #       "Connection reset by peer"
24 #       "Resource not  available"
25 #       "Gateway Time-out"
26 # - Support more Manifest formats:
27 #       WaveFormatEx attribute instead of PrivateCodecdata
28 #       'd' and other attributes in chunk element ('i', 's', 'q')
29 #
30 # basically, write a proper implementation of manifest parsing and chunk
31 # downloading
32
33 import os
34 import re
35 import sys
36 import xml.etree.ElementTree as etree
37 import urllib2
38 import struct
39 import tempfile
40 from optparse import OptionParser
41 from urlparse import urlparse, urlunparse
42
43 __description__ = "Download videos served using Smooth Streaming technology"
44 __version__ = "0.x"
45 __author_info__ = "Written by Antonio Ospite http://ao2.it"
46
47
48 def get_chunk_data(data):
49
50     moof_size = struct.unpack(">L", data[0:4])[0]
51     mdat_size = struct.unpack(">L", data[moof_size:moof_size + 4])[0]
52
53     data_start = moof_size + 4 + len('mdat')
54     data_size = mdat_size - 4 - len('mdat')
55
56     # print len(data[data_start:]), \
57     #        len(data[data_start:data_start + data_size]), data_size
58
59     assert len(data[data_start:]) == data_size
60
61     return data[data_start:data_start + data_size]
62
63
64 def hexstring_to_bytes(hex_string):
65     res = ""
66     for i in range(0, len(hex_string), 2):
67         res += chr(int(hex_string[i:i + 2], 16))
68
69     return res
70
71
72 def write_wav_header(out_file, fmt, codec_private_data, data_len):
73
74     extradata = hexstring_to_bytes(codec_private_data)
75
76     fmt['cbSize'] = len(extradata)
77     fmt_len = 18 + fmt['cbSize']
78     wave_len = len("WAVEfmt ") + 4 + fmt_len + len('data') + 4
79
80     out_file.write("RIFF")
81     out_file.write(struct.pack('<L', wave_len))
82     out_file.write("WAVEfmt ")
83     out_file.write(struct.pack('<L', fmt_len))
84     out_file.write(struct.pack('<H', fmt['wFormatTag']))
85     out_file.write(struct.pack('<H', fmt['nChannels']))
86     out_file.write(struct.pack('<L', fmt['nSamplesPerSec']))
87     out_file.write(struct.pack('<L', fmt['nAvgBytesPerSec']))
88     out_file.write(struct.pack('<H', fmt['nBlockAlign']))
89     out_file.write(struct.pack('<H', fmt['wBitsPerSample']))
90     out_file.write(struct.pack('<H', fmt['cbSize']))
91     out_file.write(extradata)
92     out_file.write("data")
93     out_file.write(struct.pack('<L', data_len))
94
95
96 def download_file(src_url, dest_file, mode):
97     try:
98         response = urllib2.urlopen(src_url)
99         data = response.read()
100     except urllib2.HTTPError:
101         sys.stderr.write("Error while dowloading URL: %s" % src_url)
102         raise
103
104     if dest_file:
105         f = open(dest_file, mode)
106         f.write(data)
107         f.close()
108
109     return data
110
111
112 def get_manifest(url, dest_dir=tempfile.gettempdir()):
113     """Returns the manifest and the new URL if this is changed"""
114
115     if not os.path.exists(dest_dir):
116         os.mkdir(dest_dir, 0755)
117
118     # Remove the querystring if present
119     manifest_url = urlunparse(urlparse(url)._replace(query=''))
120
121     if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
122         manifest_url += '/Manifest'
123
124     if manifest_url.startswith('http://'):
125         local_manifest_path = os.path.join(dest_dir, 'Manifest')
126         download_file(manifest_url, local_manifest_path, "w")
127     else:
128         local_manifest_path = url
129
130     manifest = etree.parse(local_manifest_path)
131
132     version = manifest.getroot().attrib['MajorVersion']
133     if version != "2":
134         raise Exception('Only Smooth Streaming version 2 supported')
135
136     try:
137         # if some intermediate client Manifest is used, like in Rai Replay
138         clip = manifest.find("Clip")
139         manifest_url = clip.attrib["Url"]
140         manifest = download_file(manifest_url, None, None)
141     except AttributeError:
142         pass
143
144     manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
145     base_url = manifest_pattern.sub("", manifest_url)
146
147     return (manifest, base_url)
148
149
150 def print_manifest_info(manifest):
151
152     streams = manifest.findall('.//StreamIndex')
153
154     for i, s in enumerate(streams):
155         stream_type = s.attrib["Type"]
156
157         print "Stream: %s Type: %s" % (i, stream_type)
158
159         print "\tQuality Levels:"
160         qualities = s.findall("QualityLevel")
161         for i, q in enumerate(qualities):
162             bitrate = q.attrib["Bitrate"]
163             fourcc = q.attrib["FourCC"]
164
165             if stream_type == "video":
166                 size = "%sx%s" % (q.attrib["MaxWidth"], q.attrib["MaxHeight"])
167                 print "\t%2s: %4s %10s @ %7s bps" % (i, fourcc, size, bitrate)
168             if stream_type == "audio":
169                 channels = q.attrib["Channels"]
170                 sampling_rate = q.attrib["SamplingRate"]
171                 bits_per_sample = q.attrib["BitsPerSample"]
172                 print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % \
173                     (i, fourcc, sampling_rate, bits_per_sample, channels,
174                      bitrate)
175
176     print
177
178
179 def get_chunk_quality_string(stream, quality_level):
180     quality = stream.findall("QualityLevel")[quality_level]
181     bitrate = quality.attrib["Bitrate"]
182
183     quality_attributes = quality.findall("CustomAttributes/Attribute")
184     custom_attributes = ""
185     for i in quality_attributes:
186         custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
187     custom_attributes = custom_attributes.rstrip(',')
188
189     # Assume URLs are in this form:
190     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
191     # or
192     # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
193     url = stream.attrib["Url"]
194
195     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
196     chunks_quality = chunks_quality.replace("{CustomAttributes}",
197                                             custom_attributes)
198
199     return chunks_quality
200
201
202 def get_chunk_name_string(stream, chunk):
203     t = chunk.attrib["t"]
204     url = stream.attrib["Url"]
205     chunk_name = url.split('/')[1].replace("{start time}", t)
206
207     return chunk_name
208
209
210 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
211
212     if not os.path.exists(dest_dir):
213         os.mkdir(dest_dir, 0755)
214
215     stream = manifest.findall('.//StreamIndex')[stream_index]
216
217     chunks_quality = get_chunk_quality_string(stream, quality_level)
218
219     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
220     if not os.path.exists(chunks_dest_dir):
221         os.mkdir(chunks_dest_dir, 0755)
222
223     chunks = stream.findall("c")
224     data_size = 0
225     print "\nDownloading Stream %d" % stream_index
226     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
227     sys.stdout.flush()
228     for i, c in enumerate(chunks):
229
230         chunk_name = get_chunk_name_string(stream, c)
231         chunk_file = os.path.join(dest_dir, chunks_quality, chunk_name)
232
233         if not os.path.exists(chunk_file):
234             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
235             data = download_file(chunk_url, chunk_file, "wb")
236         else:
237             f = open(chunk_file, "rb")
238             data = f.read()
239             f.close()
240
241         data_size += len(data)
242         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
243         sys.stdout.flush()
244     print "\tDownloaded size:", data_size
245
246
247 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
248                    dest_file_name, final_dest_file=None):
249
250     if final_dest_file is None:
251         final_dest_file = dest_file_name
252
253     stream = manifest.findall('.//StreamIndex')[stream_index]
254
255     chunks_quality = get_chunk_quality_string(stream, quality_level)
256
257     chunks_src_dir = os.path.join(src_dir, chunks_quality)
258
259     dest_file = open(dest_file_name, "wb")
260
261     chunks = stream.findall("c")
262     data_size = 0
263     print "\nRebuilding Stream %d" % stream_index
264     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
265     sys.stdout.flush()
266     for i, c in enumerate(chunks):
267
268         chunk_name = get_chunk_name_string(stream, c)
269         chunk_file = os.path.join(chunks_src_dir, chunk_name)
270
271         f = open(chunk_file, "rb")
272         data = get_chunk_data(f.read())
273         f.close()
274         dest_file.write(data)
275         data_size += len(data)
276         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
277         sys.stdout.flush()
278
279     # Add a nice WAV header
280     if stream.attrib['Type'] == "audio":
281         quality = stream.findall("QualityLevel")[quality_level]
282         codec_private_data = quality.attrib['CodecPrivateData']
283
284         fmt = {}
285         fmt['wFormatTag'] = int(quality.attrib['AudioTag'])
286         fmt['nChannels'] = int(quality.attrib['Channels'])
287         fmt['nSamplesPerSec'] = int(quality.attrib['SamplingRate'])
288         fmt['nAvgBytesPerSec'] = int(quality.attrib['Bitrate']) / 8
289         fmt['wBitsPerSample'] = int(quality.attrib['BitsPerSample'])
290         fmt['nBlockAlign'] = int(quality.attrib['PacketSize'])
291         fmt['cbSize'] = 0
292
293         f = open(final_dest_file, "wb")
294         write_wav_header(f, fmt, codec_private_data, data_size)
295         dest_file.close()
296         dest_file = open(dest_file_name, "rb")
297         f.write(dest_file.read())
298         f.close()
299         dest_file.close()
300
301     print
302     print "Stream %d, actual data size: %d\n" % (stream_index, data_size)
303
304
305 def calc_tracks_delay(manifest, stream1_index, stream2_index):
306     streams = manifest.findall('.//StreamIndex')
307
308     s1 = streams[stream1_index]
309     s2 = streams[stream2_index]
310
311     s1_start_chunk = s1.find("c")
312     s2_start_chunk = s2.find("c")
313
314     s1_start_time = int(s1_start_chunk.attrib['t'])
315     s2_start_time = int(s2_start_chunk.attrib['t'])
316
317     s1_timescale = float(s1.attrib['TimeScale'])
318     s2_timescale = float(s2.attrib['TimeScale'])
319
320     # calc difference in seconds
321     delay = s2_start_time / s2_timescale - \
322         s1_start_time / s1_timescale
323
324     return delay
325
326
327 def get_clip_duration(manifest):
328     # TODO: use <Clip ClipBegin="" ClipEnd=""> if Duration is not available
329     duration = manifest.getroot().attrib['Duration']
330
331     return float(duration) / 10000000  # here is the default timescale
332
333
334 def smooth_download(url, manifest, dest_dir=tempfile.gettempdir(),
335                     video_stream_index=0, audio_stream_index=1,
336                     video_quality_level=0, audio_quality_level=0,
337                     chunks_dir=None, download=True,
338                     out_video_file='_video.vc1', out_audio_file='_audio.raw'):
339
340     if chunks_dir is None:
341         chunks_dir = dest_dir
342
343     if download:
344         download_chunks(url, manifest, video_stream_index,
345                         video_quality_level, chunks_dir)
346         download_chunks(url, manifest, audio_stream_index,
347                         audio_quality_level, chunks_dir)
348
349     dest_video = os.path.join(dest_dir, out_video_file)
350     dest_audio = os.path.join(dest_dir, out_audio_file)
351
352     rebuild_stream(manifest, video_stream_index, video_quality_level,
353                    chunks_dir, dest_video)
354     rebuild_stream(manifest, audio_stream_index, audio_quality_level,
355                    chunks_dir, dest_audio, dest_audio + '.wav')
356
357     # duration = get_clip_duration(manifest)
358
359     delay = calc_tracks_delay(manifest, video_stream_index,
360                               audio_stream_index)
361
362     # optionally encode audio to vorbis:
363     # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
364     mux_command = ("ffmpeg -i %s \\\n" +
365                    "  -itsoffset %f -async 1 -i %s \\\n" +
366                    "  -vcodec copy -acodec copy ffout.mkv") % \
367         (dest_video, delay, dest_audio + '.wav')
368
369     print mux_command
370
371
372 def options_parser():
373     version = "%%prog %s" % __version__
374     usage = "usage: %prog [options] <manifest URL or file>"
375     parser = OptionParser(usage=usage, version=version,
376                           description=__description__, epilog=__author_info__)
377     parser.add_option("-i", "--info",
378                       action="store_true", dest="info_only",
379                       default=False, help="print Manifest info and exit")
380     parser.add_option("-m", "--manifest-only",
381                       action="store_true", dest="manifest_only",
382                       default=False, help="download Manifest file and exit")
383     parser.add_option("-n", "--no-download",
384                       action="store_false", dest="download",
385                       default=True, help="disable downloading chunks")
386     parser.add_option("-s", "--sync-delay",
387                       action="store_true", dest="sync_delay",
388                       default=False, help="show the sync delay between the given streams and exit")
389     parser.add_option("-d", "--dest-dir", metavar="<dir>",
390                       dest="dest_dir", default=tempfile.gettempdir(),
391                       help="destination directory")
392     parser.add_option("-c", "--chunks-dir", metavar="<dir>",
393                       dest="chunks_dir", default=None,
394                       help="directory containing chunks, if different from destination dir")
395     parser.add_option("-v", "--video-stream", metavar="<n>",
396                       type="int", dest="video_stream_index", default=0,
397                       help="index of the video stream")
398     parser.add_option("-a", "--audio-stream", metavar="<n>",
399                       type="int", dest="audio_stream_index", default=1,
400                       help="index of the audio stream")
401     parser.add_option("-q", "--video-quality", metavar="<n>",
402                       type="int", dest="video_quality_level", default=0,
403                       help="index of the video quality level")
404     parser.add_option("-Q", "--audio-quality", metavar="<n>",
405                       type="int", dest="audio_quality_level", default=0,
406                       help="index of the audio quality level")
407
408     return parser
409
410
411 def main():
412     parser = options_parser()
413     (options, args) = parser.parse_args()
414
415     if len(args) != 1:
416         parser.print_help()
417         parser.exit(1)
418
419     url = args[0]
420     manifest, url = get_manifest(url, options.dest_dir)
421
422     if options.manifest_only:
423         parser.exit(0)
424
425     if options.sync_delay:
426         print calc_tracks_delay(manifest,
427                                 options.video_stream_index,
428                                 options.audio_stream_index)
429         parser.exit(0)
430
431     if options.info_only:
432         print_manifest_info(manifest)
433         parser.exit(0)
434
435     print_manifest_info(manifest)
436
437     smooth_download(url, manifest, options.dest_dir,
438                     options.video_stream_index, options.audio_stream_index,
439                     options.video_quality_level, options.audio_quality_level,
440                     options.chunks_dir, options.download)
441
442
443 if __name__ == "__main__":
444     main()