ac6e3889dc7eaf097292cb94c4a78a0150f8be02
[smooth-dl.git] / smooth-dl.py
1 #!/usr/bin/env python
2 #
3 # smooth-dl - download videos served using Smooth Streaming technology
4 #
5 # Copyright (C) 2010  Antonio Ospite <ospite@studenti.unina.it>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 #
21 # TODO:
22 #  - Handle HTTP errors:
23 #       "Connection reset by peer"
24 #       "Resource not  available"
25 #       "Gateway Time-out"
26 # - Support more Manifest formats:
27 #       WaveFormatEx attribute instead of PrivateCodecdata
28 #       'd' and other attributes in chunk element ('i', 's', 'q')
29 #
30 # basically, write a proper implementation of manifest parsing and chunk
31 # downloading
32
33
34 __description = "Download videos served using Smooth Streaming technology"
35 __version = "0.x"
36 __author_info = "Written by Antonio Ospite http://ao2.it"
37
38 import os
39 import sys
40 import xml.etree.ElementTree as etree
41 import urllib2
42 import struct
43 import tempfile
44 from optparse import OptionParser
45
46
47 def get_chunk_data(data):
48
49     moof_size = struct.unpack(">L", data[0:4])[0]
50     mdat_size = struct.unpack(">L", data[moof_size:moof_size + 4])[0]
51
52     data_start = moof_size + 4 + len('mdat')
53     data_size = mdat_size - 4 - len('mdat')
54
55     #print len(data[data_start:]), \
56     #        len(data[data_start:data_start + data_size]), data_size
57
58     assert(len(data[data_start:]) == data_size)
59
60     return data[data_start:data_start + data_size]
61
62
63 def hexstring_to_bytes(hex_string):
64     res = ""
65     for i in range(0, len(hex_string), 2):
66             res += chr(int(hex_string[i:i + 2], 16))
67
68     return res
69
70
71 def write_wav_header(out_file, fmt, codec_private_data, data_len):
72
73     extradata = hexstring_to_bytes(codec_private_data)
74
75     fmt['cbSize'] = len(extradata)
76     fmt_len = 18 + fmt['cbSize']
77     wave_len = len("WAVEfmt ") + 4 + fmt_len + len('data') + 4
78
79     out_file.write("RIFF")
80     out_file.write(struct.pack('<L', wave_len))
81     out_file.write("WAVEfmt ")
82     out_file.write(struct.pack('<L', fmt_len))
83     out_file.write(struct.pack('<H', fmt['wFormatTag']))
84     out_file.write(struct.pack('<H', fmt['nChannels']))
85     out_file.write(struct.pack('<L', fmt['nSamplesPerSec']))
86     out_file.write(struct.pack('<L', fmt['nAvgBytesPerSec']))
87     out_file.write(struct.pack('<H', fmt['nBlockAlign']))
88     out_file.write(struct.pack('<H', fmt['wBitsPerSample']))
89     out_file.write(struct.pack('<H', fmt['cbSize']))
90     out_file.write(extradata)
91     out_file.write("data")
92     out_file.write(struct.pack('<L', data_len))
93
94
95 def get_manifest(base_url, dest_dir=tempfile.gettempdir(),
96         manifest_file='Manifest'):
97     """Returns the manifest and the new URL if this is changed"""
98
99     if os.path.exists(dest_dir) == False:
100         os.mkdir(dest_dir, 0755)
101
102     if base_url.startswith('http://'):
103
104         manifest_url = base_url
105         if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
106             manifest_url += '/Manifest'
107
108         response = urllib2.urlopen(manifest_url)
109         data = response.read()
110
111         manifest_path = os.path.join(dest_dir, manifest_file)
112         f = open(manifest_path, "w")
113         f.write(data)
114         f.close()
115     else:
116         manifest_path = base_url
117
118     manifest = etree.parse(manifest_path)
119
120     version = manifest.getroot().attrib['MajorVersion']
121     if version != "2":
122         raise Exception('Only Smooth Streaming version 2 supported')
123
124     try:
125         # if some intermediate client Manifest is used, like in Rai Replay
126         clip = manifest.find("Clip")
127         actual_manifest_url = clip.attrib["Url"]
128         base_url = actual_manifest_url.lower().replace("/manifest", "")
129     except:
130         pass
131
132     return (manifest, base_url)
133
134
135 def print_manifest_info(manifest):
136
137     streams = manifest.findall('.//StreamIndex')
138
139     for i, s in enumerate(streams):
140         stream_type = s.attrib["Type"]
141         url = s.attrib["Url"]
142
143         print "Stream: %s Type: %s" % (i, stream_type)
144
145         print "\tQuality Levels:"
146         qualities = s.findall("QualityLevel")
147         for i, q in enumerate(qualities):
148             bitrate = q.attrib["Bitrate"]
149             fourcc = q.attrib["FourCC"]
150
151             if stream_type == "video":
152                 size = "%sx%s" % (q.attrib["MaxWidth"], q.attrib["MaxHeight"])
153                 print "\t%2s: %4s %10s @ %7s bps" % (i, fourcc, size, bitrate)
154             if stream_type == "audio":
155                 channels = q.attrib["Channels"]
156                 sampling_rate = q.attrib["SamplingRate"]
157                 bits_per_sample = q.attrib["BitsPerSample"]
158                 print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % (i, fourcc,
159                         sampling_rate, bits_per_sample, channels, bitrate)
160
161     print
162
163
164 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
165
166     if os.path.exists(dest_dir) == False:
167         os.mkdir(dest_dir, 0755)
168
169     stream = manifest.findall('.//StreamIndex')[stream_index]
170
171     quality = stream.findall("QualityLevel")[quality_level]
172     bitrate = quality.attrib["Bitrate"]
173
174     quality_attributes = quality.findall("CustomAttributes/Attribute")
175     custom_attributes = ""
176     for i in quality_attributes:
177         custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
178     custom_attributes = custom_attributes.rstrip(',')
179
180     # Assume URLs are in this form:
181     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
182     # or
183     # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
184     url = stream.attrib["Url"]
185
186     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
187     chunks_quality = chunks_quality.replace("{CustomAttributes}", custom_attributes)
188     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
189     if os.path.exists(chunks_dest_dir) == False:
190         os.mkdir(chunks_dest_dir, 0755)
191
192     chunks = stream.findall("c")
193     data_size = 0
194     print "\nDownloading Stream %d" % stream_index
195     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
196     sys.stdout.flush()
197     for i, c in enumerate(chunks):
198         t = c.attrib["t"]
199
200         chunk_name = url.split('/')[1].replace("{start time}", t)
201         chunk_file = os.path.join(dest_dir,  chunks_quality, chunk_name)
202
203         if os.path.exists(chunk_file) == False:
204             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
205             response = urllib2.urlopen(chunk_url)
206             data = response.read()
207
208             f = open(chunk_file, "wb")
209             f.write(data)
210             f.close()
211         else:
212             f = open(chunk_file, "rb")
213             data = f.read()
214             f.close()
215
216         data_size += len(data)
217         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
218         sys.stdout.flush()
219     print "\tDownloaded size:", data_size
220
221
222 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
223         dest_file_name, final_dest_file=None):
224
225     if final_dest_file == None:
226         final_dest_file = dest_file_name
227
228     stream = manifest.findall('.//StreamIndex')[stream_index]
229
230     quality = stream.findall("QualityLevel")[quality_level]
231     bitrate = quality.attrib["Bitrate"]
232
233     quality_attributes = quality.findall("CustomAttributes/Attribute")
234     custom_attributes = ""
235     for i in quality_attributes:
236         custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
237     custom_attributes = custom_attributes.rstrip(',')
238
239     # Assume URLs are in this form:
240     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
241     # or
242     # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
243     url = stream.attrib["Url"]
244
245     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
246     chunks_quality = chunks_quality.replace("{CustomAttributes}", custom_attributes)
247     chunks_src_dir = os.path.join(src_dir, chunks_quality)
248
249     dest_file = open(dest_file_name, "wb")
250
251     chunks = stream.findall("c")
252     data_size = 0
253     print "\nRebuilding Stream %d" % stream_index
254     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
255     sys.stdout.flush()
256     for i, c in enumerate(chunks):
257         t = c.attrib["t"]
258
259         chunk_name = url.split('/')[1].replace("{start time}", t)
260         chunk_file = os.path.join(chunks_src_dir, chunk_name)
261
262         f = open(chunk_file, "rb")
263         data = get_chunk_data(f.read())
264         f.close()
265         dest_file.write(data)
266         data_size += len(data)
267         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
268         sys.stdout.flush()
269
270     # Add a nice WAV header
271     if stream.attrib['Type'] == "audio":
272         codec_private_data = quality.attrib['CodecPrivateData']
273
274         fmt = {}
275         fmt['wFormatTag'] = int(quality.attrib['AudioTag'])
276         fmt['nChannels'] = int(quality.attrib['Channels'])
277         fmt['nSamplesPerSec'] = int(quality.attrib['SamplingRate'])
278         fmt['nAvgBytesPerSec'] = int(quality.attrib['Bitrate']) / 8
279         fmt['wBitsPerSample'] = int(quality.attrib['BitsPerSample'])
280         fmt['nBlockAlign'] = int(quality.attrib['PacketSize'])
281         fmt['cbSize'] = 0
282
283         f = open(final_dest_file, "wb")
284         write_wav_header(f, fmt, codec_private_data, data_size)
285         dest_file.close()
286         dest_file = open(dest_file_name, "rb")
287         f.write(dest_file.read())
288         f.close()
289         dest_file.close()
290
291     print
292     print "Stream %d, actual data size: %d\n" % (stream_index, data_size)
293
294
295 def calc_tracks_delay(manifest, stream1_index, stream2_index):
296     streams = manifest.findall('.//StreamIndex')
297
298     s1 = streams[stream1_index]
299     s2 = streams[stream2_index]
300
301     s1_start_chunk = s1.find("c")
302     s2_start_chunk = s2.find("c")
303
304     s1_start_time = int(s1_start_chunk.attrib['t'])
305     s2_start_time = int(s2_start_chunk.attrib['t'])
306
307     s1_timescale = float(s1.attrib['TimeScale'])
308     s2_timescale = float(s2.attrib['TimeScale'])
309
310     # calc difference in seconds
311     delay = s2_start_time / s2_timescale - \
312             s1_start_time / s1_timescale
313
314     return delay
315
316
317 def get_clip_duration(manifest):
318     # TODO: use <Clip ClipBegin="" ClipEnd=""> if Duration is not available
319     duration = manifest.getroot().attrib['Duration']
320
321     return float(duration) / 10000000  # here is the default timescale
322
323
324 def smooth_download(url, manifest, dest_dir=tempfile.gettempdir(),
325         video_stream_index=0, audio_stream_index=1,
326         video_quality_level=0, audio_quality_level=0,
327         chunks_dir=None, download=True,
328         out_video_file='_video.vc1', out_audio_file='_audio.raw'):
329
330         if chunks_dir == None:
331             chunks_dir = dest_dir
332
333         if download:
334             download_chunks(url, manifest, video_stream_index,
335                     video_quality_level, chunks_dir)
336             download_chunks(url, manifest, audio_stream_index,
337                     audio_quality_level, chunks_dir)
338
339         dest_video = os.path.join(dest_dir, out_video_file)
340         dest_audio = os.path.join(dest_dir, out_audio_file)
341
342         rebuild_stream(manifest, video_stream_index, video_quality_level,
343                 chunks_dir, dest_video)
344         rebuild_stream(manifest, audio_stream_index, audio_quality_level,
345                 chunks_dir, dest_audio, dest_audio + '.wav')
346
347         #duration = get_clip_duration(manifest)
348
349         delay = calc_tracks_delay(manifest, video_stream_index,
350                 audio_stream_index)
351
352         # optionally encode audio to vorbis:
353         # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
354         mux_command = ("ffmpeg -i %s \\\n" +
355                       "  -itsoffset %f -async 1 -i %s \\\n" +
356                       "  -vcodec copy -acodec copy ffout.mkv") % \
357                       (dest_video, delay, dest_audio + '.wav')
358
359         print mux_command
360
361
362 def options_parser():
363     version = "%%prog %s" % __version
364     usage = "usage: %prog [options] <manifest URL or file>"
365     parser = OptionParser(usage=usage, version=version,
366             description=__description, epilog=__author_info)
367     parser.add_option("-i", "--info",
368                       action="store_true", dest="info_only",
369                       default=False, help="print Manifest info and exit")
370     parser.add_option("-m", "--manifest-only",
371                       action="store_true", dest="manifest_only",
372                       default=False, help="download Manifest file and exit")
373     parser.add_option("-n", "--no-download",
374                       action="store_false", dest="download",
375                       default=True, help="disable downloading chunks")
376     parser.add_option("-s", "--sync-delay",
377                       action="store_true", dest="sync_delay",
378                       default=False, help="show the sync delay between the given streams and exit")
379     parser.add_option("-d", "--dest-dir", metavar="<dir>",
380                       dest="dest_dir", default=tempfile.gettempdir(),
381                       help="destination directory")
382     parser.add_option("-c", "--chunks-dir", metavar="<dir>",
383                       dest="chunks_dir", default=None,
384                       help="directory containing chunks, if different from destination dir")
385     parser.add_option("-v", "--video-stream",  metavar="<n>",
386                       type="int", dest="video_stream_index", default=0,
387                       help="index of the video stream")
388     parser.add_option("-a", "--audio-stream", metavar="<n>",
389                       type="int", dest="audio_stream_index", default=1,
390                       help="index of the audio stream")
391     parser.add_option("-q", "--video-quality", metavar="<n>",
392                       type="int", dest="video_quality_level", default=0,
393                       help="index of the video quality level")
394     parser.add_option("-Q", "--audio-quality", metavar="<n>",
395                       type="int", dest="audio_quality_level", default=0,
396                       help="index of the audio quality level")
397
398     return parser
399
400
401 if __name__ == "__main__":
402
403     parser = options_parser()
404     (options, args) = parser.parse_args()
405
406     if len(args) != 1:
407         parser.print_help()
408         parser.exit(1)
409
410     url = args[0]
411     manifest, url = get_manifest(url, options.dest_dir)
412
413     if options.manifest_only:
414         parser.exit(0)
415
416     if options.sync_delay:
417         print calc_tracks_delay(manifest,
418                 options.video_stream_index,
419                 options.audio_stream_index)
420         parser.exit(0)
421
422     if options.info_only:
423         print_manifest_info(manifest)
424         parser.exit(0)
425
426     print_manifest_info(manifest)
427
428     smooth_download(url, manifest, options.dest_dir,
429             options.video_stream_index, options.audio_stream_index,
430             options.video_quality_level, options.audio_quality_level,
431             options.chunks_dir, options.download)