39785a4028db72f0560215edbaa9edf02a24dfb2
[smooth-dl.git] / smooth-dl.py
1 #!/usr/bin/env python
2 #
3 # smooth-dl - download videos served using Smooth Streaming technology
4 #
5 # Copyright (C) 2010  Antonio Ospite <ospite@studenti.unina.it>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 #
21 # TODO:
22 #  - Handle HTTP errors:
23 #       "Connection reset by peer"
24 #       "Resource not  available"
25 #       "Gateway Time-out"
26 # - Support more Manifest formats:
27 #       WaveFormatEx attribute instead of PrivateCodecdata
28 #       'd' and other attributes in chunk element ('i', 's', 'q')
29 #
30 # basically, write a proper implementation of manifest parsing and chunk
31 # downloading
32
33 import os
34 import re
35 import sys
36 import xml.etree.ElementTree as etree
37 import urllib2
38 import struct
39 import tempfile
40 from optparse import OptionParser
41 from urlparse import urlparse, urlunparse
42
43 __description__ = "Download videos served using Smooth Streaming technology"
44 __version__ = "0.x"
45 __author_info__ = "Written by Antonio Ospite http://ao2.it"
46
47
48 def get_chunk_data(data):
49
50     moof_size = struct.unpack(">L", data[0:4])[0]
51     mdat_size = struct.unpack(">L", data[moof_size:moof_size + 4])[0]
52
53     data_start = moof_size + 4 + len('mdat')
54     data_size = mdat_size - 4 - len('mdat')
55
56     # print len(data[data_start:]), \
57     #        len(data[data_start:data_start + data_size]), data_size
58
59     assert len(data[data_start:]) == data_size
60
61     return data[data_start:data_start + data_size]
62
63
64 def hexstring_to_bytes(hex_string):
65     res = ""
66     for i in range(0, len(hex_string), 2):
67         res += chr(int(hex_string[i:i + 2], 16))
68
69     return res
70
71
72 def write_wav_header(out_file, fmt, codec_private_data, data_len):
73
74     extradata = hexstring_to_bytes(codec_private_data)
75
76     fmt['cbSize'] = len(extradata)
77     fmt_len = 18 + fmt['cbSize']
78     wave_len = len("WAVEfmt ") + 4 + fmt_len + len('data') + 4
79
80     out_file.write("RIFF")
81     out_file.write(struct.pack('<L', wave_len))
82     out_file.write("WAVEfmt ")
83     out_file.write(struct.pack('<L', fmt_len))
84     out_file.write(struct.pack('<H', fmt['wFormatTag']))
85     out_file.write(struct.pack('<H', fmt['nChannels']))
86     out_file.write(struct.pack('<L', fmt['nSamplesPerSec']))
87     out_file.write(struct.pack('<L', fmt['nAvgBytesPerSec']))
88     out_file.write(struct.pack('<H', fmt['nBlockAlign']))
89     out_file.write(struct.pack('<H', fmt['wBitsPerSample']))
90     out_file.write(struct.pack('<H', fmt['cbSize']))
91     out_file.write(extradata)
92     out_file.write("data")
93     out_file.write(struct.pack('<L', data_len))
94
95
96 def download_file(src_url, dest_file, mode):
97     try:
98         response = urllib2.urlopen(src_url)
99         data = response.read()
100     except urllib2.HTTPError:
101         sys.stderr.write("Error while dowloading URL: %s" % src_url)
102         raise
103
104     if dest_file:
105         f = open(dest_file, mode)
106         f.write(data)
107         f.close()
108
109     return data
110
111
112 def get_manifest(url, dest_dir):
113     """Returns the manifest element and the base content URL"""
114
115     # Remove the querystring if present
116     manifest_url = urlunparse(urlparse(url)._replace(query=''))
117
118     if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
119         manifest_url += '/Manifest'
120
121     if os.path.exists(url):
122         local_manifest_path = url
123     else:
124         local_manifest_path = os.path.join(dest_dir, 'Manifest')
125         download_file(manifest_url, local_manifest_path, "w")
126
127     manifest = etree.parse(local_manifest_path)
128
129     version = manifest.getroot().attrib['MajorVersion']
130     if version != "2":
131         raise Exception('Only Smooth Streaming version 2 supported')
132
133     try:
134         # if some intermediate client Manifest is used, like in Rai Replay
135         clip = manifest.find("Clip")
136         manifest_url = clip.attrib["Url"]
137         manifest = download_file(manifest_url, None, None)
138     except AttributeError:
139         pass
140
141     manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
142     base_url = manifest_pattern.sub("", manifest_url)
143
144     return (manifest, base_url)
145
146
147 def print_manifest_info(manifest):
148
149     streams = manifest.findall('.//StreamIndex')
150
151     for i, s in enumerate(streams):
152         stream_type = s.attrib["Type"]
153
154         print "Stream: %s Type: %s" % (i, stream_type)
155
156         print "\tQuality Levels:"
157         qualities = s.findall("QualityLevel")
158         for i, q in enumerate(qualities):
159             bitrate = q.attrib["Bitrate"]
160             fourcc = q.attrib["FourCC"]
161
162             if stream_type == "video":
163                 size = "%sx%s" % (q.attrib["MaxWidth"], q.attrib["MaxHeight"])
164                 print "\t%2s: %4s %10s @ %7s bps" % (i, fourcc, size, bitrate)
165             if stream_type == "audio":
166                 channels = q.attrib["Channels"]
167                 sampling_rate = q.attrib["SamplingRate"]
168                 bits_per_sample = q.attrib["BitsPerSample"]
169                 print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % \
170                     (i, fourcc, sampling_rate, bits_per_sample, channels,
171                      bitrate)
172
173     print
174
175
176 def get_chunk_quality_string(stream, quality_level):
177     quality = stream.findall("QualityLevel")[quality_level]
178     bitrate = quality.attrib["Bitrate"]
179
180     quality_attributes = quality.findall("CustomAttributes/Attribute")
181     custom_attributes = ""
182     for i in quality_attributes:
183         custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
184     custom_attributes = custom_attributes.rstrip(',')
185
186     # Assume URLs are in this form:
187     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
188     # or
189     # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
190     url = stream.attrib["Url"]
191
192     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
193     chunks_quality = chunks_quality.replace("{CustomAttributes}",
194                                             custom_attributes)
195
196     return chunks_quality
197
198
199 def get_chunk_name_string(stream, chunk_time):
200     url = stream.attrib["Url"]
201     chunk_name = url.split('/')[1].replace("{start time}", str(chunk_time))
202
203     return chunk_name
204
205
206 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
207     stream = manifest.findall('.//StreamIndex')[stream_index]
208
209     chunks_quality = get_chunk_quality_string(stream, quality_level)
210
211     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
212     if not os.path.exists(chunks_dest_dir):
213         os.mkdir(chunks_dest_dir, 0755)
214
215     chunks = stream.findall("c")
216     data_size = 0
217     print "\nDownloading Stream %d" % stream_index
218     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
219     sys.stdout.flush()
220
221     stream_duration = 0
222     for i, chunk in enumerate(chunks):
223
224         if "t" in chunk.attrib:
225             chunk_time = chunk.attrib["t"]
226         elif "d" in chunk.attrib:
227             chunk_time = stream_duration
228             stream_duration = chunk_time + int(chunk.attrib["d"])
229
230         chunk_name = get_chunk_name_string(stream, chunk_time)
231         chunk_file = os.path.join(dest_dir, chunks_quality, chunk_name)
232
233         if not os.path.exists(chunk_file):
234             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
235             data = download_file(chunk_url, chunk_file, "wb")
236         else:
237             f = open(chunk_file, "rb")
238             data = f.read()
239             f.close()
240
241         data_size += len(data)
242         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
243         sys.stdout.flush()
244     print "\tDownloaded size:", data_size
245
246
247 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
248                    dest_file_name, final_dest_file=None):
249
250     if final_dest_file is None:
251         final_dest_file = dest_file_name
252
253     stream = manifest.findall('.//StreamIndex')[stream_index]
254
255     chunks_quality = get_chunk_quality_string(stream, quality_level)
256
257     chunks_src_dir = os.path.join(src_dir, chunks_quality)
258
259     dest_file = open(dest_file_name, "wb")
260
261     chunks = stream.findall("c")
262     data_size = 0
263     print "\nRebuilding Stream %d" % stream_index
264     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
265     sys.stdout.flush()
266
267     stream_duration = 0
268     for i, chunk in enumerate(chunks):
269
270         if "t" in chunk.attrib:
271             chunk_time = chunk.attrib["t"]
272         elif "d" in chunk.attrib:
273             chunk_time = stream_duration
274             stream_duration = chunk_time + int(chunk.attrib["d"])
275
276         chunk_name = get_chunk_name_string(stream, chunk_time)
277         chunk_file = os.path.join(chunks_src_dir, chunk_name)
278
279         f = open(chunk_file, "rb")
280         data = get_chunk_data(f.read())
281         f.close()
282         dest_file.write(data)
283         data_size += len(data)
284         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
285         sys.stdout.flush()
286
287     # Add a nice WAV header
288     if stream.attrib['Type'] == "audio":
289         quality = stream.findall("QualityLevel")[quality_level]
290         codec_private_data = quality.attrib['CodecPrivateData']
291
292         fmt = {}
293         fmt['wFormatTag'] = int(quality.attrib['AudioTag'])
294         fmt['nChannels'] = int(quality.attrib['Channels'])
295         fmt['nSamplesPerSec'] = int(quality.attrib['SamplingRate'])
296         fmt['nAvgBytesPerSec'] = int(quality.attrib['Bitrate']) / 8
297         fmt['wBitsPerSample'] = int(quality.attrib['BitsPerSample'])
298         fmt['nBlockAlign'] = int(quality.attrib['PacketSize'])
299         fmt['cbSize'] = 0
300
301         f = open(final_dest_file, "wb")
302         write_wav_header(f, fmt, codec_private_data, data_size)
303         dest_file.close()
304         dest_file = open(dest_file_name, "rb")
305         f.write(dest_file.read())
306         f.close()
307         dest_file.close()
308
309     print
310     print "Stream %d, actual data size: %d\n" % (stream_index, data_size)
311
312
313 def calc_tracks_delay(manifest, stream1_index, stream2_index):
314     streams = manifest.findall('.//StreamIndex')
315
316     s1 = streams[stream1_index]
317     s2 = streams[stream2_index]
318
319     if "TimeScale" not in s1 or "TimeScale" not in s2:
320         return 0
321
322     s1_start_chunk = s1.find("c")
323     s2_start_chunk = s2.find("c")
324
325     if "t" not in s1_start_chunk.attrib \
326        or "t" not in s2_start_chunk.attrib:
327         return 0
328
329     s1_start_time = int(s1_start_chunk.attrib['t'])
330     s2_start_time = int(s2_start_chunk.attrib['t'])
331
332     s1_timescale = float(s1.attrib['TimeScale'])
333     s2_timescale = float(s2.attrib['TimeScale'])
334
335     # calc difference in seconds
336     delay = s2_start_time / s2_timescale - \
337         s1_start_time / s1_timescale
338
339     return delay
340
341
342 def get_clip_duration(manifest):
343     # TODO: use <Clip ClipBegin="" ClipEnd=""> if Duration is not available
344     duration = manifest.getroot().attrib['Duration']
345
346     return float(duration) / 10000000  # here is the default timescale
347
348
349 def smooth_download(url, manifest, dest_dir,
350                     video_stream_index=0, audio_stream_index=1,
351                     video_quality_level=0, audio_quality_level=0,
352                     chunks_dir=None, download=True,
353                     out_video_file='_video.vc1', out_audio_file='_audio.raw'):
354
355     if chunks_dir is None:
356         chunks_dir = dest_dir
357
358     if download:
359         download_chunks(url, manifest, video_stream_index,
360                         video_quality_level, chunks_dir)
361         download_chunks(url, manifest, audio_stream_index,
362                         audio_quality_level, chunks_dir)
363
364     dest_video = os.path.join(dest_dir, out_video_file)
365     dest_audio = os.path.join(dest_dir, out_audio_file)
366
367     rebuild_stream(manifest, video_stream_index, video_quality_level,
368                    chunks_dir, dest_video)
369     rebuild_stream(manifest, audio_stream_index, audio_quality_level,
370                    chunks_dir, dest_audio, dest_audio + '.wav')
371
372     # duration = get_clip_duration(manifest)
373
374     delay = calc_tracks_delay(manifest, video_stream_index,
375                               audio_stream_index)
376
377     # optionally encode audio to vorbis:
378     # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
379     mux_command = ("ffmpeg -i %s \\\n" +
380                    "  -itsoffset %f -async 1 -i %s \\\n" +
381                    "  -vcodec copy -acodec copy ffout.mkv") % \
382         (dest_video, delay, dest_audio + '.wav')
383
384     print mux_command
385
386
387 def options_parser():
388     version = "%%prog %s" % __version__
389     usage = "usage: %prog [options] <manifest URL or file>"
390     parser = OptionParser(usage=usage, version=version,
391                           description=__description__, epilog=__author_info__)
392     parser.add_option("-i", "--info",
393                       action="store_true", dest="info_only",
394                       default=False, help="print Manifest info and exit")
395     parser.add_option("-m", "--manifest-only",
396                       action="store_true", dest="manifest_only",
397                       default=False, help="download Manifest file and exit")
398     parser.add_option("-n", "--no-download",
399                       action="store_false", dest="download",
400                       default=True, help="disable downloading chunks")
401     parser.add_option("-s", "--sync-delay",
402                       action="store_true", dest="sync_delay",
403                       default=False, help="show the sync delay between the given streams and exit")
404     parser.add_option("-d", "--dest-dir", metavar="<dir>",
405                       dest="dest_dir", default=tempfile.gettempdir(),
406                       help="destination directory")
407     parser.add_option("-c", "--chunks-dir", metavar="<dir>",
408                       dest="chunks_dir", default=None,
409                       help="directory containing chunks, if different from destination dir")
410     parser.add_option("-v", "--video-stream", metavar="<n>",
411                       type="int", dest="video_stream_index", default=0,
412                       help="index of the video stream")
413     parser.add_option("-a", "--audio-stream", metavar="<n>",
414                       type="int", dest="audio_stream_index", default=1,
415                       help="index of the audio stream")
416     parser.add_option("-q", "--video-quality", metavar="<n>",
417                       type="int", dest="video_quality_level", default=0,
418                       help="index of the video quality level")
419     parser.add_option("-Q", "--audio-quality", metavar="<n>",
420                       type="int", dest="audio_quality_level", default=0,
421                       help="index of the audio quality level")
422
423     return parser
424
425
426 def main():
427     parser = options_parser()
428     (options, args) = parser.parse_args()
429
430     if len(args) != 1:
431         parser.print_help()
432         parser.exit(1)
433
434     if not os.path.exists(options.dest_dir):
435         os.mkdir(options.dest_dir, 0755)
436
437     url = args[0]
438     manifest, url = get_manifest(url, options.dest_dir)
439
440     if options.manifest_only:
441         parser.exit(0)
442
443     if options.sync_delay:
444         print calc_tracks_delay(manifest,
445                                 options.video_stream_index,
446                                 options.audio_stream_index)
447         parser.exit(0)
448
449     if options.info_only:
450         print_manifest_info(manifest)
451         parser.exit(0)
452
453     print_manifest_info(manifest)
454
455     smooth_download(url, manifest, options.dest_dir,
456                     options.video_stream_index, options.audio_stream_index,
457                     options.video_quality_level, options.audio_quality_level,
458                     options.chunks_dir, options.download)
459
460
461 if __name__ == "__main__":
462     main()