3297102dd4c7b3a45de280edafb8841b35c6bcd8
[smooth-dl.git] / smooth-dl.py
1 #!/usr/bin/env python
2 #
3 # smooth-dl - download videos served using Smooth Streaming technology
4 #
5 # Copyright (C) 2010  Antonio Ospite <ospite@studenti.unina.it>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 #
21 # TODO:
22 #  - Handle HTTP errors:
23 #       "Connection reset by peer"
24 #       "Resource not  available"
25 #       "Gateway Time-out"
26 # - Support more Manifest formats:
27 #       WaveFormatEx attribute instead of PrivateCodecdata
28 #       'd' and other attributes in chunk element ('i', 's', 'q')
29 #
30 # basically, write a proper implementation of manifest parsing and chunk
31 # downloading
32
33
34 __description = "Download videos served using Smooth Streaming technology"
35 __version = "0.x"
36 __author_info = "Written by Antonio Ospite http://ao2.it"
37
38 import os
39 import re
40 import sys
41 import xml.etree.ElementTree as etree
42 import urllib2
43 import struct
44 import tempfile
45 from optparse import OptionParser
46 from urlparse import urlparse, urlunparse
47
48
49 def get_chunk_data(data):
50
51     moof_size = struct.unpack(">L", data[0:4])[0]
52     mdat_size = struct.unpack(">L", data[moof_size:moof_size + 4])[0]
53
54     data_start = moof_size + 4 + len('mdat')
55     data_size = mdat_size - 4 - len('mdat')
56
57     #print len(data[data_start:]), \
58     #        len(data[data_start:data_start + data_size]), data_size
59
60     assert(len(data[data_start:]) == data_size)
61
62     return data[data_start:data_start + data_size]
63
64
65 def hexstring_to_bytes(hex_string):
66     res = ""
67     for i in range(0, len(hex_string), 2):
68             res += chr(int(hex_string[i:i + 2], 16))
69
70     return res
71
72
73 def write_wav_header(out_file, fmt, codec_private_data, data_len):
74
75     extradata = hexstring_to_bytes(codec_private_data)
76
77     fmt['cbSize'] = len(extradata)
78     fmt_len = 18 + fmt['cbSize']
79     wave_len = len("WAVEfmt ") + 4 + fmt_len + len('data') + 4
80
81     out_file.write("RIFF")
82     out_file.write(struct.pack('<L', wave_len))
83     out_file.write("WAVEfmt ")
84     out_file.write(struct.pack('<L', fmt_len))
85     out_file.write(struct.pack('<H', fmt['wFormatTag']))
86     out_file.write(struct.pack('<H', fmt['nChannels']))
87     out_file.write(struct.pack('<L', fmt['nSamplesPerSec']))
88     out_file.write(struct.pack('<L', fmt['nAvgBytesPerSec']))
89     out_file.write(struct.pack('<H', fmt['nBlockAlign']))
90     out_file.write(struct.pack('<H', fmt['wBitsPerSample']))
91     out_file.write(struct.pack('<H', fmt['cbSize']))
92     out_file.write(extradata)
93     out_file.write("data")
94     out_file.write(struct.pack('<L', data_len))
95
96
97 def get_manifest(url, dest_dir=tempfile.gettempdir()):
98     """Returns the manifest and the new URL if this is changed"""
99
100     if os.path.exists(dest_dir) == False:
101         os.mkdir(dest_dir, 0755)
102
103     # Remove the querystring if present
104     manifest_url = urlunparse(urlparse(url)._replace(query=''))
105
106     if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
107         manifest_url += '/Manifest'
108
109     if manifest_url.startswith('http://'):
110
111         response = urllib2.urlopen(manifest_url)
112         data = response.read()
113
114         local_manifest_path = os.path.join(dest_dir, 'Manifest')
115
116         f = open(local_manifest_path, "w")
117         f.write(data)
118         f.close()
119     else:
120         local_manifest_path = url
121
122     manifest = etree.parse(local_manifest_path)
123
124     version = manifest.getroot().attrib['MajorVersion']
125     if version != "2":
126         raise Exception('Only Smooth Streaming version 2 supported')
127
128     try:
129         # if some intermediate client Manifest is used, like in Rai Replay
130         clip = manifest.find("Clip")
131         manifest_url = clip.attrib["Url"]
132     except:
133         pass
134
135     manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
136     base_url = manifest_pattern.sub("", manifest_url)
137
138     return (manifest, base_url)
139
140
141 def print_manifest_info(manifest):
142
143     streams = manifest.findall('.//StreamIndex')
144
145     for i, s in enumerate(streams):
146         stream_type = s.attrib["Type"]
147         url = s.attrib["Url"]
148
149         print "Stream: %s Type: %s" % (i, stream_type)
150
151         print "\tQuality Levels:"
152         qualities = s.findall("QualityLevel")
153         for i, q in enumerate(qualities):
154             bitrate = q.attrib["Bitrate"]
155             fourcc = q.attrib["FourCC"]
156
157             if stream_type == "video":
158                 size = "%sx%s" % (q.attrib["MaxWidth"], q.attrib["MaxHeight"])
159                 print "\t%2s: %4s %10s @ %7s bps" % (i, fourcc, size, bitrate)
160             if stream_type == "audio":
161                 channels = q.attrib["Channels"]
162                 sampling_rate = q.attrib["SamplingRate"]
163                 bits_per_sample = q.attrib["BitsPerSample"]
164                 print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % (i, fourcc,
165                         sampling_rate, bits_per_sample, channels, bitrate)
166
167     print
168
169
170 def get_chunk_quality_string(stream, quality_level):
171     quality = stream.findall("QualityLevel")[quality_level]
172     bitrate = quality.attrib["Bitrate"]
173
174     quality_attributes = quality.findall("CustomAttributes/Attribute")
175     custom_attributes = ""
176     for i in quality_attributes:
177         custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
178     custom_attributes = custom_attributes.rstrip(',')
179
180     # Assume URLs are in this form:
181     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
182     # or
183     # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
184     url = stream.attrib["Url"]
185
186     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
187     chunks_quality = chunks_quality.replace("{CustomAttributes}", custom_attributes)
188
189     return chunks_quality
190
191
192 def get_chunk_name_string(stream, chunk):
193     t = chunk.attrib["t"]
194     url = stream.attrib["Url"]
195     chunk_name = url.split('/')[1].replace("{start time}", t)
196
197     return chunk_name
198
199
200 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
201
202     if os.path.exists(dest_dir) == False:
203         os.mkdir(dest_dir, 0755)
204
205     stream = manifest.findall('.//StreamIndex')[stream_index]
206
207     chunks_quality = get_chunk_quality_string(stream, quality_level)
208
209     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
210     if os.path.exists(chunks_dest_dir) == False:
211         os.mkdir(chunks_dest_dir, 0755)
212
213     chunks = stream.findall("c")
214     data_size = 0
215     print "\nDownloading Stream %d" % stream_index
216     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
217     sys.stdout.flush()
218     for i, c in enumerate(chunks):
219
220         chunk_name = get_chunk_name_string(stream, c)
221         chunk_file = os.path.join(dest_dir,  chunks_quality, chunk_name)
222
223         if os.path.exists(chunk_file) == False:
224             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
225             try:
226                 response = urllib2.urlopen(chunk_url)
227                 data = response.read()
228
229                 f = open(chunk_file, "wb")
230                 f.write(data)
231                 f.close()
232             except Exception as e:
233                 print e
234
235         else:
236             f = open(chunk_file, "rb")
237             data = f.read()
238             f.close()
239
240         data_size += len(data)
241         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
242         sys.stdout.flush()
243     print "\tDownloaded size:", data_size
244
245
246 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
247         dest_file_name, final_dest_file=None):
248
249     if final_dest_file == None:
250         final_dest_file = dest_file_name
251
252     stream = manifest.findall('.//StreamIndex')[stream_index]
253
254     chunks_quality = get_chunk_quality_string(stream, quality_level)
255
256     chunks_src_dir = os.path.join(src_dir, chunks_quality)
257
258     dest_file = open(dest_file_name, "wb")
259
260     chunks = stream.findall("c")
261     data_size = 0
262     print "\nRebuilding Stream %d" % stream_index
263     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
264     sys.stdout.flush()
265     for i, c in enumerate(chunks):
266
267         chunk_name = get_chunk_name_string(stream, c)
268         chunk_file = os.path.join(chunks_src_dir, chunk_name)
269
270         f = open(chunk_file, "rb")
271         data = get_chunk_data(f.read())
272         f.close()
273         dest_file.write(data)
274         data_size += len(data)
275         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
276         sys.stdout.flush()
277
278     # Add a nice WAV header
279     if stream.attrib['Type'] == "audio":
280         quality = stream.findall("QualityLevel")[quality_level]
281         codec_private_data = quality.attrib['CodecPrivateData']
282
283         fmt = {}
284         fmt['wFormatTag'] = int(quality.attrib['AudioTag'])
285         fmt['nChannels'] = int(quality.attrib['Channels'])
286         fmt['nSamplesPerSec'] = int(quality.attrib['SamplingRate'])
287         fmt['nAvgBytesPerSec'] = int(quality.attrib['Bitrate']) / 8
288         fmt['wBitsPerSample'] = int(quality.attrib['BitsPerSample'])
289         fmt['nBlockAlign'] = int(quality.attrib['PacketSize'])
290         fmt['cbSize'] = 0
291
292         f = open(final_dest_file, "wb")
293         write_wav_header(f, fmt, codec_private_data, data_size)
294         dest_file.close()
295         dest_file = open(dest_file_name, "rb")
296         f.write(dest_file.read())
297         f.close()
298         dest_file.close()
299
300     print
301     print "Stream %d, actual data size: %d\n" % (stream_index, data_size)
302
303
304 def calc_tracks_delay(manifest, stream1_index, stream2_index):
305     streams = manifest.findall('.//StreamIndex')
306
307     s1 = streams[stream1_index]
308     s2 = streams[stream2_index]
309
310     s1_start_chunk = s1.find("c")
311     s2_start_chunk = s2.find("c")
312
313     s1_start_time = int(s1_start_chunk.attrib['t'])
314     s2_start_time = int(s2_start_chunk.attrib['t'])
315
316     s1_timescale = float(s1.attrib['TimeScale'])
317     s2_timescale = float(s2.attrib['TimeScale'])
318
319     # calc difference in seconds
320     delay = s2_start_time / s2_timescale - \
321             s1_start_time / s1_timescale
322
323     return delay
324
325
326 def get_clip_duration(manifest):
327     # TODO: use <Clip ClipBegin="" ClipEnd=""> if Duration is not available
328     duration = manifest.getroot().attrib['Duration']
329
330     return float(duration) / 10000000  # here is the default timescale
331
332
333 def smooth_download(url, manifest, dest_dir=tempfile.gettempdir(),
334         video_stream_index=0, audio_stream_index=1,
335         video_quality_level=0, audio_quality_level=0,
336         chunks_dir=None, download=True,
337         out_video_file='_video.vc1', out_audio_file='_audio.raw'):
338
339         if chunks_dir == None:
340             chunks_dir = dest_dir
341
342         if download:
343             download_chunks(url, manifest, video_stream_index,
344                     video_quality_level, chunks_dir)
345             download_chunks(url, manifest, audio_stream_index,
346                     audio_quality_level, chunks_dir)
347
348         dest_video = os.path.join(dest_dir, out_video_file)
349         dest_audio = os.path.join(dest_dir, out_audio_file)
350
351         rebuild_stream(manifest, video_stream_index, video_quality_level,
352                 chunks_dir, dest_video)
353         rebuild_stream(manifest, audio_stream_index, audio_quality_level,
354                 chunks_dir, dest_audio, dest_audio + '.wav')
355
356         #duration = get_clip_duration(manifest)
357
358         delay = calc_tracks_delay(manifest, video_stream_index,
359                 audio_stream_index)
360
361         # optionally encode audio to vorbis:
362         # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
363         mux_command = ("ffmpeg -i %s \\\n" +
364                       "  -itsoffset %f -async 1 -i %s \\\n" +
365                       "  -vcodec copy -acodec copy ffout.mkv") % \
366                       (dest_video, delay, dest_audio + '.wav')
367
368         print mux_command
369
370
371 def options_parser():
372     version = "%%prog %s" % __version
373     usage = "usage: %prog [options] <manifest URL or file>"
374     parser = OptionParser(usage=usage, version=version,
375             description=__description, epilog=__author_info)
376     parser.add_option("-i", "--info",
377                       action="store_true", dest="info_only",
378                       default=False, help="print Manifest info and exit")
379     parser.add_option("-m", "--manifest-only",
380                       action="store_true", dest="manifest_only",
381                       default=False, help="download Manifest file and exit")
382     parser.add_option("-n", "--no-download",
383                       action="store_false", dest="download",
384                       default=True, help="disable downloading chunks")
385     parser.add_option("-s", "--sync-delay",
386                       action="store_true", dest="sync_delay",
387                       default=False, help="show the sync delay between the given streams and exit")
388     parser.add_option("-d", "--dest-dir", metavar="<dir>",
389                       dest="dest_dir", default=tempfile.gettempdir(),
390                       help="destination directory")
391     parser.add_option("-c", "--chunks-dir", metavar="<dir>",
392                       dest="chunks_dir", default=None,
393                       help="directory containing chunks, if different from destination dir")
394     parser.add_option("-v", "--video-stream",  metavar="<n>",
395                       type="int", dest="video_stream_index", default=0,
396                       help="index of the video stream")
397     parser.add_option("-a", "--audio-stream", metavar="<n>",
398                       type="int", dest="audio_stream_index", default=1,
399                       help="index of the audio stream")
400     parser.add_option("-q", "--video-quality", metavar="<n>",
401                       type="int", dest="video_quality_level", default=0,
402                       help="index of the video quality level")
403     parser.add_option("-Q", "--audio-quality", metavar="<n>",
404                       type="int", dest="audio_quality_level", default=0,
405                       help="index of the audio quality level")
406
407     return parser
408
409
410 if __name__ == "__main__":
411
412     parser = options_parser()
413     (options, args) = parser.parse_args()
414
415     if len(args) != 1:
416         parser.print_help()
417         parser.exit(1)
418
419     url = args[0]
420     manifest, url = get_manifest(url, options.dest_dir)
421
422     if options.manifest_only:
423         parser.exit(0)
424
425     if options.sync_delay:
426         print calc_tracks_delay(manifest,
427                 options.video_stream_index,
428                 options.audio_stream_index)
429         parser.exit(0)
430
431     if options.info_only:
432         print_manifest_info(manifest)
433         parser.exit(0)
434
435     print_manifest_info(manifest)
436
437     smooth_download(url, manifest, options.dest_dir,
438             options.video_stream_index, options.audio_stream_index,
439             options.video_quality_level, options.audio_quality_level,
440             options.chunks_dir, options.download)