smmoth-dl.py: if there is a Clip element use its manifest
[smooth-dl.git] / smooth-dl.py
1 #!/usr/bin/env python
2 #
3 # smooth-dl - download videos served using Smooth Streaming technology
4 #
5 # Copyright (C) 2010  Antonio Ospite <ospite@studenti.unina.it>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 #
21 # TODO:
22 #  - Handle HTTP errors:
23 #       "Connection reset by peer"
24 #       "Resource not  available"
25 #       "Gateway Time-out"
26 # - Support more Manifest formats:
27 #       WaveFormatEx attribute instead of PrivateCodecdata
28 #       'd' and other attributes in chunk element ('i', 's', 'q')
29 #
30 # basically, write a proper implementation of manifest parsing and chunk
31 # downloading
32
33
34 __description = "Download videos served using Smooth Streaming technology"
35 __version = "0.x"
36 __author_info = "Written by Antonio Ospite http://ao2.it"
37
38 import os
39 import re
40 import sys
41 import xml.etree.ElementTree as etree
42 import urllib2
43 import struct
44 import tempfile
45 from optparse import OptionParser
46 from urlparse import urlparse, urlunparse
47
48
49 def get_chunk_data(data):
50
51     moof_size = struct.unpack(">L", data[0:4])[0]
52     mdat_size = struct.unpack(">L", data[moof_size:moof_size + 4])[0]
53
54     data_start = moof_size + 4 + len('mdat')
55     data_size = mdat_size - 4 - len('mdat')
56
57     #print len(data[data_start:]), \
58     #        len(data[data_start:data_start + data_size]), data_size
59
60     assert(len(data[data_start:]) == data_size)
61
62     return data[data_start:data_start + data_size]
63
64
65 def hexstring_to_bytes(hex_string):
66     res = ""
67     for i in range(0, len(hex_string), 2):
68             res += chr(int(hex_string[i:i + 2], 16))
69
70     return res
71
72
73 def write_wav_header(out_file, fmt, codec_private_data, data_len):
74
75     extradata = hexstring_to_bytes(codec_private_data)
76
77     fmt['cbSize'] = len(extradata)
78     fmt_len = 18 + fmt['cbSize']
79     wave_len = len("WAVEfmt ") + 4 + fmt_len + len('data') + 4
80
81     out_file.write("RIFF")
82     out_file.write(struct.pack('<L', wave_len))
83     out_file.write("WAVEfmt ")
84     out_file.write(struct.pack('<L', fmt_len))
85     out_file.write(struct.pack('<H', fmt['wFormatTag']))
86     out_file.write(struct.pack('<H', fmt['nChannels']))
87     out_file.write(struct.pack('<L', fmt['nSamplesPerSec']))
88     out_file.write(struct.pack('<L', fmt['nAvgBytesPerSec']))
89     out_file.write(struct.pack('<H', fmt['nBlockAlign']))
90     out_file.write(struct.pack('<H', fmt['wBitsPerSample']))
91     out_file.write(struct.pack('<H', fmt['cbSize']))
92     out_file.write(extradata)
93     out_file.write("data")
94     out_file.write(struct.pack('<L', data_len))
95
96 def download_file(src_url, dest_file, mode):
97     try:
98         response = urllib2.urlopen(src_url)
99         data = response.read()
100     except urllib2.HTTPError as e:
101         sys.stderr.write("Error while dowloading URL: %s" % src_url)
102         raise
103
104     if dest_file:
105         f = open(dest_file, mode)
106         f.write(data)
107         f.close()
108
109     return data
110
111 def get_manifest(url, dest_dir=tempfile.gettempdir()):
112     """Returns the manifest and the new URL if this is changed"""
113
114     if os.path.exists(dest_dir) == False:
115         os.mkdir(dest_dir, 0755)
116
117     # Remove the querystring if present
118     manifest_url = urlunparse(urlparse(url)._replace(query=''))
119
120     if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
121         manifest_url += '/Manifest'
122
123     if manifest_url.startswith('http://'):
124         local_manifest_path = os.path.join(dest_dir, 'Manifest')
125         download_file(manifest_url, local_manifest_path, "w")
126     else:
127         local_manifest_path = url
128
129     manifest = etree.parse(local_manifest_path)
130
131     version = manifest.getroot().attrib['MajorVersion']
132     if version != "2":
133         raise Exception('Only Smooth Streaming version 2 supported')
134
135     try:
136         # if some intermediate client Manifest is used, like in Rai Replay
137         clip = manifest.find("Clip")
138         manifest_url = clip.attrib["Url"]
139         manifest = download_file(manifest_url, None, None)
140     except AttributeError:
141         pass
142
143     manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
144     base_url = manifest_pattern.sub("", manifest_url)
145
146     return (manifest, base_url)
147
148
149 def print_manifest_info(manifest):
150
151     streams = manifest.findall('.//StreamIndex')
152
153     for i, s in enumerate(streams):
154         stream_type = s.attrib["Type"]
155         url = s.attrib["Url"]
156
157         print "Stream: %s Type: %s" % (i, stream_type)
158
159         print "\tQuality Levels:"
160         qualities = s.findall("QualityLevel")
161         for i, q in enumerate(qualities):
162             bitrate = q.attrib["Bitrate"]
163             fourcc = q.attrib["FourCC"]
164
165             if stream_type == "video":
166                 size = "%sx%s" % (q.attrib["MaxWidth"], q.attrib["MaxHeight"])
167                 print "\t%2s: %4s %10s @ %7s bps" % (i, fourcc, size, bitrate)
168             if stream_type == "audio":
169                 channels = q.attrib["Channels"]
170                 sampling_rate = q.attrib["SamplingRate"]
171                 bits_per_sample = q.attrib["BitsPerSample"]
172                 print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % (i, fourcc,
173                         sampling_rate, bits_per_sample, channels, bitrate)
174
175     print
176
177
178 def get_chunk_quality_string(stream, quality_level):
179     quality = stream.findall("QualityLevel")[quality_level]
180     bitrate = quality.attrib["Bitrate"]
181
182     quality_attributes = quality.findall("CustomAttributes/Attribute")
183     custom_attributes = ""
184     for i in quality_attributes:
185         custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
186     custom_attributes = custom_attributes.rstrip(',')
187
188     # Assume URLs are in this form:
189     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
190     # or
191     # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
192     url = stream.attrib["Url"]
193
194     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
195     chunks_quality = chunks_quality.replace("{CustomAttributes}", custom_attributes)
196
197     return chunks_quality
198
199
200 def get_chunk_name_string(stream, chunk):
201     t = chunk.attrib["t"]
202     url = stream.attrib["Url"]
203     chunk_name = url.split('/')[1].replace("{start time}", t)
204
205     return chunk_name
206
207
208 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
209
210     if os.path.exists(dest_dir) == False:
211         os.mkdir(dest_dir, 0755)
212
213     stream = manifest.findall('.//StreamIndex')[stream_index]
214
215     chunks_quality = get_chunk_quality_string(stream, quality_level)
216
217     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
218     if os.path.exists(chunks_dest_dir) == False:
219         os.mkdir(chunks_dest_dir, 0755)
220
221     chunks = stream.findall("c")
222     data_size = 0
223     print "\nDownloading Stream %d" % stream_index
224     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
225     sys.stdout.flush()
226     for i, c in enumerate(chunks):
227
228         chunk_name = get_chunk_name_string(stream, c)
229         chunk_file = os.path.join(dest_dir,  chunks_quality, chunk_name)
230
231         if os.path.exists(chunk_file) == False:
232             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
233             data = download_file(chunk_url, chunk_file, "wb")
234         else:
235             f = open(chunk_file, "rb")
236             data = f.read()
237             f.close()
238
239         data_size += len(data)
240         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
241         sys.stdout.flush()
242     print "\tDownloaded size:", data_size
243
244
245 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
246         dest_file_name, final_dest_file=None):
247
248     if final_dest_file == None:
249         final_dest_file = dest_file_name
250
251     stream = manifest.findall('.//StreamIndex')[stream_index]
252
253     chunks_quality = get_chunk_quality_string(stream, quality_level)
254
255     chunks_src_dir = os.path.join(src_dir, chunks_quality)
256
257     dest_file = open(dest_file_name, "wb")
258
259     chunks = stream.findall("c")
260     data_size = 0
261     print "\nRebuilding Stream %d" % stream_index
262     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
263     sys.stdout.flush()
264     for i, c in enumerate(chunks):
265
266         chunk_name = get_chunk_name_string(stream, c)
267         chunk_file = os.path.join(chunks_src_dir, chunk_name)
268
269         f = open(chunk_file, "rb")
270         data = get_chunk_data(f.read())
271         f.close()
272         dest_file.write(data)
273         data_size += len(data)
274         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
275         sys.stdout.flush()
276
277     # Add a nice WAV header
278     if stream.attrib['Type'] == "audio":
279         quality = stream.findall("QualityLevel")[quality_level]
280         codec_private_data = quality.attrib['CodecPrivateData']
281
282         fmt = {}
283         fmt['wFormatTag'] = int(quality.attrib['AudioTag'])
284         fmt['nChannels'] = int(quality.attrib['Channels'])
285         fmt['nSamplesPerSec'] = int(quality.attrib['SamplingRate'])
286         fmt['nAvgBytesPerSec'] = int(quality.attrib['Bitrate']) / 8
287         fmt['wBitsPerSample'] = int(quality.attrib['BitsPerSample'])
288         fmt['nBlockAlign'] = int(quality.attrib['PacketSize'])
289         fmt['cbSize'] = 0
290
291         f = open(final_dest_file, "wb")
292         write_wav_header(f, fmt, codec_private_data, data_size)
293         dest_file.close()
294         dest_file = open(dest_file_name, "rb")
295         f.write(dest_file.read())
296         f.close()
297         dest_file.close()
298
299     print
300     print "Stream %d, actual data size: %d\n" % (stream_index, data_size)
301
302
303 def calc_tracks_delay(manifest, stream1_index, stream2_index):
304     streams = manifest.findall('.//StreamIndex')
305
306     s1 = streams[stream1_index]
307     s2 = streams[stream2_index]
308
309     s1_start_chunk = s1.find("c")
310     s2_start_chunk = s2.find("c")
311
312     s1_start_time = int(s1_start_chunk.attrib['t'])
313     s2_start_time = int(s2_start_chunk.attrib['t'])
314
315     s1_timescale = float(s1.attrib['TimeScale'])
316     s2_timescale = float(s2.attrib['TimeScale'])
317
318     # calc difference in seconds
319     delay = s2_start_time / s2_timescale - \
320             s1_start_time / s1_timescale
321
322     return delay
323
324
325 def get_clip_duration(manifest):
326     # TODO: use <Clip ClipBegin="" ClipEnd=""> if Duration is not available
327     duration = manifest.getroot().attrib['Duration']
328
329     return float(duration) / 10000000  # here is the default timescale
330
331
332 def smooth_download(url, manifest, dest_dir=tempfile.gettempdir(),
333         video_stream_index=0, audio_stream_index=1,
334         video_quality_level=0, audio_quality_level=0,
335         chunks_dir=None, download=True,
336         out_video_file='_video.vc1', out_audio_file='_audio.raw'):
337
338         if chunks_dir == None:
339             chunks_dir = dest_dir
340
341         if download:
342             download_chunks(url, manifest, video_stream_index,
343                     video_quality_level, chunks_dir)
344             download_chunks(url, manifest, audio_stream_index,
345                     audio_quality_level, chunks_dir)
346
347         dest_video = os.path.join(dest_dir, out_video_file)
348         dest_audio = os.path.join(dest_dir, out_audio_file)
349
350         rebuild_stream(manifest, video_stream_index, video_quality_level,
351                 chunks_dir, dest_video)
352         rebuild_stream(manifest, audio_stream_index, audio_quality_level,
353                 chunks_dir, dest_audio, dest_audio + '.wav')
354
355         #duration = get_clip_duration(manifest)
356
357         delay = calc_tracks_delay(manifest, video_stream_index,
358                 audio_stream_index)
359
360         # optionally encode audio to vorbis:
361         # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
362         mux_command = ("ffmpeg -i %s \\\n" +
363                       "  -itsoffset %f -async 1 -i %s \\\n" +
364                       "  -vcodec copy -acodec copy ffout.mkv") % \
365                       (dest_video, delay, dest_audio + '.wav')
366
367         print mux_command
368
369
370 def options_parser():
371     version = "%%prog %s" % __version
372     usage = "usage: %prog [options] <manifest URL or file>"
373     parser = OptionParser(usage=usage, version=version,
374             description=__description, epilog=__author_info)
375     parser.add_option("-i", "--info",
376                       action="store_true", dest="info_only",
377                       default=False, help="print Manifest info and exit")
378     parser.add_option("-m", "--manifest-only",
379                       action="store_true", dest="manifest_only",
380                       default=False, help="download Manifest file and exit")
381     parser.add_option("-n", "--no-download",
382                       action="store_false", dest="download",
383                       default=True, help="disable downloading chunks")
384     parser.add_option("-s", "--sync-delay",
385                       action="store_true", dest="sync_delay",
386                       default=False, help="show the sync delay between the given streams and exit")
387     parser.add_option("-d", "--dest-dir", metavar="<dir>",
388                       dest="dest_dir", default=tempfile.gettempdir(),
389                       help="destination directory")
390     parser.add_option("-c", "--chunks-dir", metavar="<dir>",
391                       dest="chunks_dir", default=None,
392                       help="directory containing chunks, if different from destination dir")
393     parser.add_option("-v", "--video-stream",  metavar="<n>",
394                       type="int", dest="video_stream_index", default=0,
395                       help="index of the video stream")
396     parser.add_option("-a", "--audio-stream", metavar="<n>",
397                       type="int", dest="audio_stream_index", default=1,
398                       help="index of the audio stream")
399     parser.add_option("-q", "--video-quality", metavar="<n>",
400                       type="int", dest="video_quality_level", default=0,
401                       help="index of the video quality level")
402     parser.add_option("-Q", "--audio-quality", metavar="<n>",
403                       type="int", dest="audio_quality_level", default=0,
404                       help="index of the audio quality level")
405
406     return parser
407
408
409 if __name__ == "__main__":
410
411     parser = options_parser()
412     (options, args) = parser.parse_args()
413
414     if len(args) != 1:
415         parser.print_help()
416         parser.exit(1)
417
418     url = args[0]
419     manifest, url = get_manifest(url, options.dest_dir)
420
421     if options.manifest_only:
422         parser.exit(0)
423
424     if options.sync_delay:
425         print calc_tracks_delay(manifest,
426                 options.video_stream_index,
427                 options.audio_stream_index)
428         parser.exit(0)
429
430     if options.info_only:
431         print_manifest_info(manifest)
432         parser.exit(0)
433
434     print_manifest_info(manifest)
435
436     smooth_download(url, manifest, options.dest_dir,
437             options.video_stream_index, options.audio_stream_index,
438             options.video_quality_level, options.audio_quality_level,
439             options.chunks_dir, options.download)