smooth-dl.py: fix some pep8 issue
[smooth-dl.git] / smooth-dl.py
1 #!/usr/bin/env python
2 #
3 # smooth-dl - download videos served using Smooth Streaming technology
4 #
5 # Copyright (C) 2010  Antonio Ospite <ospite@studenti.unina.it>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 #
21 # TODO:
22 #  - Handle HTTP errors:
23 #       "Connection reset by peer"
24 #       "Resource not  available"
25 #       "Gateway Time-out"
26 # - Support more Manifest formats:
27 #       WaveFormatEx attribute instead of PrivateCodecdata
28 #       'd' and other attributes in chunk element ('i', 's', 'q')
29 #
30 # basically, write a proper implementation of manifest parsing and chunk
31 # downloading
32
33 import os
34 import re
35 import sys
36 import xml.etree.ElementTree as etree
37 import urllib2
38 import struct
39 import tempfile
40 from optparse import OptionParser
41 from urlparse import urlparse, urlunparse
42
43 __description = "Download videos served using Smooth Streaming technology"
44 __version = "0.x"
45 __author_info = "Written by Antonio Ospite http://ao2.it"
46
47
48 def get_chunk_data(data):
49
50     moof_size = struct.unpack(">L", data[0:4])[0]
51     mdat_size = struct.unpack(">L", data[moof_size:moof_size + 4])[0]
52
53     data_start = moof_size + 4 + len('mdat')
54     data_size = mdat_size - 4 - len('mdat')
55
56     # print len(data[data_start:]), \
57     #        len(data[data_start:data_start + data_size]), data_size
58
59     assert(len(data[data_start:]) == data_size)
60
61     return data[data_start:data_start + data_size]
62
63
64 def hexstring_to_bytes(hex_string):
65     res = ""
66     for i in range(0, len(hex_string), 2):
67             res += chr(int(hex_string[i:i + 2], 16))
68
69     return res
70
71
72 def write_wav_header(out_file, fmt, codec_private_data, data_len):
73
74     extradata = hexstring_to_bytes(codec_private_data)
75
76     fmt['cbSize'] = len(extradata)
77     fmt_len = 18 + fmt['cbSize']
78     wave_len = len("WAVEfmt ") + 4 + fmt_len + len('data') + 4
79
80     out_file.write("RIFF")
81     out_file.write(struct.pack('<L', wave_len))
82     out_file.write("WAVEfmt ")
83     out_file.write(struct.pack('<L', fmt_len))
84     out_file.write(struct.pack('<H', fmt['wFormatTag']))
85     out_file.write(struct.pack('<H', fmt['nChannels']))
86     out_file.write(struct.pack('<L', fmt['nSamplesPerSec']))
87     out_file.write(struct.pack('<L', fmt['nAvgBytesPerSec']))
88     out_file.write(struct.pack('<H', fmt['nBlockAlign']))
89     out_file.write(struct.pack('<H', fmt['wBitsPerSample']))
90     out_file.write(struct.pack('<H', fmt['cbSize']))
91     out_file.write(extradata)
92     out_file.write("data")
93     out_file.write(struct.pack('<L', data_len))
94
95
96 def download_file(src_url, dest_file, mode):
97     try:
98         response = urllib2.urlopen(src_url)
99         data = response.read()
100     except urllib2.HTTPError as e:
101         sys.stderr.write("Error while dowloading URL: %s" % src_url)
102         raise
103
104     if dest_file:
105         f = open(dest_file, mode)
106         f.write(data)
107         f.close()
108
109     return data
110
111
112 def get_manifest(url, dest_dir=tempfile.gettempdir()):
113     """Returns the manifest and the new URL if this is changed"""
114
115     if os.path.exists(dest_dir) == False:
116         os.mkdir(dest_dir, 0755)
117
118     # Remove the querystring if present
119     manifest_url = urlunparse(urlparse(url)._replace(query=''))
120
121     if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
122         manifest_url += '/Manifest'
123
124     if manifest_url.startswith('http://'):
125         local_manifest_path = os.path.join(dest_dir, 'Manifest')
126         download_file(manifest_url, local_manifest_path, "w")
127     else:
128         local_manifest_path = url
129
130     manifest = etree.parse(local_manifest_path)
131
132     version = manifest.getroot().attrib['MajorVersion']
133     if version != "2":
134         raise Exception('Only Smooth Streaming version 2 supported')
135
136     try:
137         # if some intermediate client Manifest is used, like in Rai Replay
138         clip = manifest.find("Clip")
139         manifest_url = clip.attrib["Url"]
140         manifest = download_file(manifest_url, None, None)
141     except AttributeError:
142         pass
143
144     manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
145     base_url = manifest_pattern.sub("", manifest_url)
146
147     return (manifest, base_url)
148
149
150 def print_manifest_info(manifest):
151
152     streams = manifest.findall('.//StreamIndex')
153
154     for i, s in enumerate(streams):
155         stream_type = s.attrib["Type"]
156         url = s.attrib["Url"]
157
158         print "Stream: %s Type: %s" % (i, stream_type)
159
160         print "\tQuality Levels:"
161         qualities = s.findall("QualityLevel")
162         for i, q in enumerate(qualities):
163             bitrate = q.attrib["Bitrate"]
164             fourcc = q.attrib["FourCC"]
165
166             if stream_type == "video":
167                 size = "%sx%s" % (q.attrib["MaxWidth"], q.attrib["MaxHeight"])
168                 print "\t%2s: %4s %10s @ %7s bps" % (i, fourcc, size, bitrate)
169             if stream_type == "audio":
170                 channels = q.attrib["Channels"]
171                 sampling_rate = q.attrib["SamplingRate"]
172                 bits_per_sample = q.attrib["BitsPerSample"]
173                 print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % \
174                     (i, fourcc, sampling_rate, bits_per_sample, channels,
175                      bitrate)
176
177     print
178
179
180 def get_chunk_quality_string(stream, quality_level):
181     quality = stream.findall("QualityLevel")[quality_level]
182     bitrate = quality.attrib["Bitrate"]
183
184     quality_attributes = quality.findall("CustomAttributes/Attribute")
185     custom_attributes = ""
186     for i in quality_attributes:
187         custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
188     custom_attributes = custom_attributes.rstrip(',')
189
190     # Assume URLs are in this form:
191     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
192     # or
193     # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
194     url = stream.attrib["Url"]
195
196     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
197     chunks_quality = chunks_quality.replace("{CustomAttributes}",
198                                             custom_attributes)
199
200     return chunks_quality
201
202
203 def get_chunk_name_string(stream, chunk):
204     t = chunk.attrib["t"]
205     url = stream.attrib["Url"]
206     chunk_name = url.split('/')[1].replace("{start time}", t)
207
208     return chunk_name
209
210
211 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
212
213     if os.path.exists(dest_dir) == False:
214         os.mkdir(dest_dir, 0755)
215
216     stream = manifest.findall('.//StreamIndex')[stream_index]
217
218     chunks_quality = get_chunk_quality_string(stream, quality_level)
219
220     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
221     if os.path.exists(chunks_dest_dir) == False:
222         os.mkdir(chunks_dest_dir, 0755)
223
224     chunks = stream.findall("c")
225     data_size = 0
226     print "\nDownloading Stream %d" % stream_index
227     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
228     sys.stdout.flush()
229     for i, c in enumerate(chunks):
230
231         chunk_name = get_chunk_name_string(stream, c)
232         chunk_file = os.path.join(dest_dir,  chunks_quality, chunk_name)
233
234         if os.path.exists(chunk_file) == False:
235             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
236             data = download_file(chunk_url, chunk_file, "wb")
237         else:
238             f = open(chunk_file, "rb")
239             data = f.read()
240             f.close()
241
242         data_size += len(data)
243         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
244         sys.stdout.flush()
245     print "\tDownloaded size:", data_size
246
247
248 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
249                    dest_file_name, final_dest_file=None):
250
251     if final_dest_file is None:
252         final_dest_file = dest_file_name
253
254     stream = manifest.findall('.//StreamIndex')[stream_index]
255
256     chunks_quality = get_chunk_quality_string(stream, quality_level)
257
258     chunks_src_dir = os.path.join(src_dir, chunks_quality)
259
260     dest_file = open(dest_file_name, "wb")
261
262     chunks = stream.findall("c")
263     data_size = 0
264     print "\nRebuilding Stream %d" % stream_index
265     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
266     sys.stdout.flush()
267     for i, c in enumerate(chunks):
268
269         chunk_name = get_chunk_name_string(stream, c)
270         chunk_file = os.path.join(chunks_src_dir, chunk_name)
271
272         f = open(chunk_file, "rb")
273         data = get_chunk_data(f.read())
274         f.close()
275         dest_file.write(data)
276         data_size += len(data)
277         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
278         sys.stdout.flush()
279
280     # Add a nice WAV header
281     if stream.attrib['Type'] == "audio":
282         quality = stream.findall("QualityLevel")[quality_level]
283         codec_private_data = quality.attrib['CodecPrivateData']
284
285         fmt = {}
286         fmt['wFormatTag'] = int(quality.attrib['AudioTag'])
287         fmt['nChannels'] = int(quality.attrib['Channels'])
288         fmt['nSamplesPerSec'] = int(quality.attrib['SamplingRate'])
289         fmt['nAvgBytesPerSec'] = int(quality.attrib['Bitrate']) / 8
290         fmt['wBitsPerSample'] = int(quality.attrib['BitsPerSample'])
291         fmt['nBlockAlign'] = int(quality.attrib['PacketSize'])
292         fmt['cbSize'] = 0
293
294         f = open(final_dest_file, "wb")
295         write_wav_header(f, fmt, codec_private_data, data_size)
296         dest_file.close()
297         dest_file = open(dest_file_name, "rb")
298         f.write(dest_file.read())
299         f.close()
300         dest_file.close()
301
302     print
303     print "Stream %d, actual data size: %d\n" % (stream_index, data_size)
304
305
306 def calc_tracks_delay(manifest, stream1_index, stream2_index):
307     streams = manifest.findall('.//StreamIndex')
308
309     s1 = streams[stream1_index]
310     s2 = streams[stream2_index]
311
312     s1_start_chunk = s1.find("c")
313     s2_start_chunk = s2.find("c")
314
315     s1_start_time = int(s1_start_chunk.attrib['t'])
316     s2_start_time = int(s2_start_chunk.attrib['t'])
317
318     s1_timescale = float(s1.attrib['TimeScale'])
319     s2_timescale = float(s2.attrib['TimeScale'])
320
321     # calc difference in seconds
322     delay = s2_start_time / s2_timescale - \
323         s1_start_time / s1_timescale
324
325     return delay
326
327
328 def get_clip_duration(manifest):
329     # TODO: use <Clip ClipBegin="" ClipEnd=""> if Duration is not available
330     duration = manifest.getroot().attrib['Duration']
331
332     return float(duration) / 10000000  # here is the default timescale
333
334
335 def smooth_download(url, manifest, dest_dir=tempfile.gettempdir(),
336                     video_stream_index=0, audio_stream_index=1,
337                     video_quality_level=0, audio_quality_level=0,
338                     chunks_dir=None, download=True,
339                     out_video_file='_video.vc1', out_audio_file='_audio.raw'):
340
341         if chunks_dir is None:
342             chunks_dir = dest_dir
343
344         if download:
345             download_chunks(url, manifest, video_stream_index,
346                             video_quality_level, chunks_dir)
347             download_chunks(url, manifest, audio_stream_index,
348                             audio_quality_level, chunks_dir)
349
350         dest_video = os.path.join(dest_dir, out_video_file)
351         dest_audio = os.path.join(dest_dir, out_audio_file)
352
353         rebuild_stream(manifest, video_stream_index, video_quality_level,
354                        chunks_dir, dest_video)
355         rebuild_stream(manifest, audio_stream_index, audio_quality_level,
356                        chunks_dir, dest_audio, dest_audio + '.wav')
357
358         # duration = get_clip_duration(manifest)
359
360         delay = calc_tracks_delay(manifest, video_stream_index,
361                                   audio_stream_index)
362
363         # optionally encode audio to vorbis:
364         # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
365         mux_command = ("ffmpeg -i %s \\\n" +
366                        "  -itsoffset %f -async 1 -i %s \\\n" +
367                        "  -vcodec copy -acodec copy ffout.mkv") % \
368             (dest_video, delay, dest_audio + '.wav')
369
370         print mux_command
371
372
373 def options_parser():
374     version = "%%prog %s" % __version
375     usage = "usage: %prog [options] <manifest URL or file>"
376     parser = OptionParser(usage=usage, version=version,
377                           description=__description, epilog=__author_info)
378     parser.add_option("-i", "--info",
379                       action="store_true", dest="info_only",
380                       default=False, help="print Manifest info and exit")
381     parser.add_option("-m", "--manifest-only",
382                       action="store_true", dest="manifest_only",
383                       default=False, help="download Manifest file and exit")
384     parser.add_option("-n", "--no-download",
385                       action="store_false", dest="download",
386                       default=True, help="disable downloading chunks")
387     parser.add_option("-s", "--sync-delay",
388                       action="store_true", dest="sync_delay",
389                       default=False, help="show the sync delay between the given streams and exit")
390     parser.add_option("-d", "--dest-dir", metavar="<dir>",
391                       dest="dest_dir", default=tempfile.gettempdir(),
392                       help="destination directory")
393     parser.add_option("-c", "--chunks-dir", metavar="<dir>",
394                       dest="chunks_dir", default=None,
395                       help="directory containing chunks, if different from destination dir")
396     parser.add_option("-v", "--video-stream",  metavar="<n>",
397                       type="int", dest="video_stream_index", default=0,
398                       help="index of the video stream")
399     parser.add_option("-a", "--audio-stream", metavar="<n>",
400                       type="int", dest="audio_stream_index", default=1,
401                       help="index of the audio stream")
402     parser.add_option("-q", "--video-quality", metavar="<n>",
403                       type="int", dest="video_quality_level", default=0,
404                       help="index of the video quality level")
405     parser.add_option("-Q", "--audio-quality", metavar="<n>",
406                       type="int", dest="audio_quality_level", default=0,
407                       help="index of the audio quality level")
408
409     return parser
410
411
412 if __name__ == "__main__":
413
414     parser = options_parser()
415     (options, args) = parser.parse_args()
416
417     if len(args) != 1:
418         parser.print_help()
419         parser.exit(1)
420
421     url = args[0]
422     manifest, url = get_manifest(url, options.dest_dir)
423
424     if options.manifest_only:
425         parser.exit(0)
426
427     if options.sync_delay:
428         print calc_tracks_delay(manifest,
429                                 options.video_stream_index,
430                                 options.audio_stream_index)
431         parser.exit(0)
432
433     if options.info_only:
434         print_manifest_info(manifest)
435         parser.exit(0)
436
437     print_manifest_info(manifest)
438
439     smooth_download(url, manifest, options.dest_dir,
440                     options.video_stream_index, options.audio_stream_index,
441                     options.video_quality_level, options.audio_quality_level,
442                     options.chunks_dir, options.download)