smooth-dl.py: factor out a download_file() function
[smooth-dl.git] / smooth-dl.py
1 #!/usr/bin/env python
2 #
3 # smooth-dl - download videos served using Smooth Streaming technology
4 #
5 # Copyright (C) 2010  Antonio Ospite <ospite@studenti.unina.it>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 #
21 # TODO:
22 #  - Handle HTTP errors:
23 #       "Connection reset by peer"
24 #       "Resource not  available"
25 #       "Gateway Time-out"
26 # - Support more Manifest formats:
27 #       WaveFormatEx attribute instead of PrivateCodecdata
28 #       'd' and other attributes in chunk element ('i', 's', 'q')
29 #
30 # basically, write a proper implementation of manifest parsing and chunk
31 # downloading
32
33
34 __description = "Download videos served using Smooth Streaming technology"
35 __version = "0.x"
36 __author_info = "Written by Antonio Ospite http://ao2.it"
37
38 import os
39 import re
40 import sys
41 import xml.etree.ElementTree as etree
42 import urllib2
43 import struct
44 import tempfile
45 from optparse import OptionParser
46 from urlparse import urlparse, urlunparse
47
48
49 def get_chunk_data(data):
50
51     moof_size = struct.unpack(">L", data[0:4])[0]
52     mdat_size = struct.unpack(">L", data[moof_size:moof_size + 4])[0]
53
54     data_start = moof_size + 4 + len('mdat')
55     data_size = mdat_size - 4 - len('mdat')
56
57     #print len(data[data_start:]), \
58     #        len(data[data_start:data_start + data_size]), data_size
59
60     assert(len(data[data_start:]) == data_size)
61
62     return data[data_start:data_start + data_size]
63
64
65 def hexstring_to_bytes(hex_string):
66     res = ""
67     for i in range(0, len(hex_string), 2):
68             res += chr(int(hex_string[i:i + 2], 16))
69
70     return res
71
72
73 def write_wav_header(out_file, fmt, codec_private_data, data_len):
74
75     extradata = hexstring_to_bytes(codec_private_data)
76
77     fmt['cbSize'] = len(extradata)
78     fmt_len = 18 + fmt['cbSize']
79     wave_len = len("WAVEfmt ") + 4 + fmt_len + len('data') + 4
80
81     out_file.write("RIFF")
82     out_file.write(struct.pack('<L', wave_len))
83     out_file.write("WAVEfmt ")
84     out_file.write(struct.pack('<L', fmt_len))
85     out_file.write(struct.pack('<H', fmt['wFormatTag']))
86     out_file.write(struct.pack('<H', fmt['nChannels']))
87     out_file.write(struct.pack('<L', fmt['nSamplesPerSec']))
88     out_file.write(struct.pack('<L', fmt['nAvgBytesPerSec']))
89     out_file.write(struct.pack('<H', fmt['nBlockAlign']))
90     out_file.write(struct.pack('<H', fmt['wBitsPerSample']))
91     out_file.write(struct.pack('<H', fmt['cbSize']))
92     out_file.write(extradata)
93     out_file.write("data")
94     out_file.write(struct.pack('<L', data_len))
95
96 def download_file(src_url, dest_file, mode):
97     try:
98         response = urllib2.urlopen(src_url)
99         data = response.read()
100     except urllib2.HTTPError as e:
101         sys.stderr.write("Error while dowloading URL: %s" % src_url)
102         raise
103
104     if dest_file:
105         f = open(dest_file, mode)
106         f.write(data)
107         f.close()
108
109     return data
110
111 def get_manifest(url, dest_dir=tempfile.gettempdir()):
112     """Returns the manifest and the new URL if this is changed"""
113
114     if os.path.exists(dest_dir) == False:
115         os.mkdir(dest_dir, 0755)
116
117     # Remove the querystring if present
118     manifest_url = urlunparse(urlparse(url)._replace(query=''))
119
120     if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
121         manifest_url += '/Manifest'
122
123     if manifest_url.startswith('http://'):
124         local_manifest_path = os.path.join(dest_dir, 'Manifest')
125         download_file(manifest_url, local_manifest_path, "w")
126     else:
127         local_manifest_path = url
128
129     manifest = etree.parse(local_manifest_path)
130
131     version = manifest.getroot().attrib['MajorVersion']
132     if version != "2":
133         raise Exception('Only Smooth Streaming version 2 supported')
134
135     try:
136         # if some intermediate client Manifest is used, like in Rai Replay
137         clip = manifest.find("Clip")
138         manifest_url = clip.attrib["Url"]
139     except:
140         pass
141
142     manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
143     base_url = manifest_pattern.sub("", manifest_url)
144
145     return (manifest, base_url)
146
147
148 def print_manifest_info(manifest):
149
150     streams = manifest.findall('.//StreamIndex')
151
152     for i, s in enumerate(streams):
153         stream_type = s.attrib["Type"]
154         url = s.attrib["Url"]
155
156         print "Stream: %s Type: %s" % (i, stream_type)
157
158         print "\tQuality Levels:"
159         qualities = s.findall("QualityLevel")
160         for i, q in enumerate(qualities):
161             bitrate = q.attrib["Bitrate"]
162             fourcc = q.attrib["FourCC"]
163
164             if stream_type == "video":
165                 size = "%sx%s" % (q.attrib["MaxWidth"], q.attrib["MaxHeight"])
166                 print "\t%2s: %4s %10s @ %7s bps" % (i, fourcc, size, bitrate)
167             if stream_type == "audio":
168                 channels = q.attrib["Channels"]
169                 sampling_rate = q.attrib["SamplingRate"]
170                 bits_per_sample = q.attrib["BitsPerSample"]
171                 print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % (i, fourcc,
172                         sampling_rate, bits_per_sample, channels, bitrate)
173
174     print
175
176
177 def get_chunk_quality_string(stream, quality_level):
178     quality = stream.findall("QualityLevel")[quality_level]
179     bitrate = quality.attrib["Bitrate"]
180
181     quality_attributes = quality.findall("CustomAttributes/Attribute")
182     custom_attributes = ""
183     for i in quality_attributes:
184         custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
185     custom_attributes = custom_attributes.rstrip(',')
186
187     # Assume URLs are in this form:
188     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
189     # or
190     # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
191     url = stream.attrib["Url"]
192
193     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
194     chunks_quality = chunks_quality.replace("{CustomAttributes}", custom_attributes)
195
196     return chunks_quality
197
198
199 def get_chunk_name_string(stream, chunk):
200     t = chunk.attrib["t"]
201     url = stream.attrib["Url"]
202     chunk_name = url.split('/')[1].replace("{start time}", t)
203
204     return chunk_name
205
206
207 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
208
209     if os.path.exists(dest_dir) == False:
210         os.mkdir(dest_dir, 0755)
211
212     stream = manifest.findall('.//StreamIndex')[stream_index]
213
214     chunks_quality = get_chunk_quality_string(stream, quality_level)
215
216     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
217     if os.path.exists(chunks_dest_dir) == False:
218         os.mkdir(chunks_dest_dir, 0755)
219
220     chunks = stream.findall("c")
221     data_size = 0
222     print "\nDownloading Stream %d" % stream_index
223     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
224     sys.stdout.flush()
225     for i, c in enumerate(chunks):
226
227         chunk_name = get_chunk_name_string(stream, c)
228         chunk_file = os.path.join(dest_dir,  chunks_quality, chunk_name)
229
230         if os.path.exists(chunk_file) == False:
231             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
232             data = download_file(chunk_url, chunk_file, "wb")
233         else:
234             f = open(chunk_file, "rb")
235             data = f.read()
236             f.close()
237
238         data_size += len(data)
239         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
240         sys.stdout.flush()
241     print "\tDownloaded size:", data_size
242
243
244 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
245         dest_file_name, final_dest_file=None):
246
247     if final_dest_file == None:
248         final_dest_file = dest_file_name
249
250     stream = manifest.findall('.//StreamIndex')[stream_index]
251
252     chunks_quality = get_chunk_quality_string(stream, quality_level)
253
254     chunks_src_dir = os.path.join(src_dir, chunks_quality)
255
256     dest_file = open(dest_file_name, "wb")
257
258     chunks = stream.findall("c")
259     data_size = 0
260     print "\nRebuilding Stream %d" % stream_index
261     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
262     sys.stdout.flush()
263     for i, c in enumerate(chunks):
264
265         chunk_name = get_chunk_name_string(stream, c)
266         chunk_file = os.path.join(chunks_src_dir, chunk_name)
267
268         f = open(chunk_file, "rb")
269         data = get_chunk_data(f.read())
270         f.close()
271         dest_file.write(data)
272         data_size += len(data)
273         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
274         sys.stdout.flush()
275
276     # Add a nice WAV header
277     if stream.attrib['Type'] == "audio":
278         quality = stream.findall("QualityLevel")[quality_level]
279         codec_private_data = quality.attrib['CodecPrivateData']
280
281         fmt = {}
282         fmt['wFormatTag'] = int(quality.attrib['AudioTag'])
283         fmt['nChannels'] = int(quality.attrib['Channels'])
284         fmt['nSamplesPerSec'] = int(quality.attrib['SamplingRate'])
285         fmt['nAvgBytesPerSec'] = int(quality.attrib['Bitrate']) / 8
286         fmt['wBitsPerSample'] = int(quality.attrib['BitsPerSample'])
287         fmt['nBlockAlign'] = int(quality.attrib['PacketSize'])
288         fmt['cbSize'] = 0
289
290         f = open(final_dest_file, "wb")
291         write_wav_header(f, fmt, codec_private_data, data_size)
292         dest_file.close()
293         dest_file = open(dest_file_name, "rb")
294         f.write(dest_file.read())
295         f.close()
296         dest_file.close()
297
298     print
299     print "Stream %d, actual data size: %d\n" % (stream_index, data_size)
300
301
302 def calc_tracks_delay(manifest, stream1_index, stream2_index):
303     streams = manifest.findall('.//StreamIndex')
304
305     s1 = streams[stream1_index]
306     s2 = streams[stream2_index]
307
308     s1_start_chunk = s1.find("c")
309     s2_start_chunk = s2.find("c")
310
311     s1_start_time = int(s1_start_chunk.attrib['t'])
312     s2_start_time = int(s2_start_chunk.attrib['t'])
313
314     s1_timescale = float(s1.attrib['TimeScale'])
315     s2_timescale = float(s2.attrib['TimeScale'])
316
317     # calc difference in seconds
318     delay = s2_start_time / s2_timescale - \
319             s1_start_time / s1_timescale
320
321     return delay
322
323
324 def get_clip_duration(manifest):
325     # TODO: use <Clip ClipBegin="" ClipEnd=""> if Duration is not available
326     duration = manifest.getroot().attrib['Duration']
327
328     return float(duration) / 10000000  # here is the default timescale
329
330
331 def smooth_download(url, manifest, dest_dir=tempfile.gettempdir(),
332         video_stream_index=0, audio_stream_index=1,
333         video_quality_level=0, audio_quality_level=0,
334         chunks_dir=None, download=True,
335         out_video_file='_video.vc1', out_audio_file='_audio.raw'):
336
337         if chunks_dir == None:
338             chunks_dir = dest_dir
339
340         if download:
341             download_chunks(url, manifest, video_stream_index,
342                     video_quality_level, chunks_dir)
343             download_chunks(url, manifest, audio_stream_index,
344                     audio_quality_level, chunks_dir)
345
346         dest_video = os.path.join(dest_dir, out_video_file)
347         dest_audio = os.path.join(dest_dir, out_audio_file)
348
349         rebuild_stream(manifest, video_stream_index, video_quality_level,
350                 chunks_dir, dest_video)
351         rebuild_stream(manifest, audio_stream_index, audio_quality_level,
352                 chunks_dir, dest_audio, dest_audio + '.wav')
353
354         #duration = get_clip_duration(manifest)
355
356         delay = calc_tracks_delay(manifest, video_stream_index,
357                 audio_stream_index)
358
359         # optionally encode audio to vorbis:
360         # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
361         mux_command = ("ffmpeg -i %s \\\n" +
362                       "  -itsoffset %f -async 1 -i %s \\\n" +
363                       "  -vcodec copy -acodec copy ffout.mkv") % \
364                       (dest_video, delay, dest_audio + '.wav')
365
366         print mux_command
367
368
369 def options_parser():
370     version = "%%prog %s" % __version
371     usage = "usage: %prog [options] <manifest URL or file>"
372     parser = OptionParser(usage=usage, version=version,
373             description=__description, epilog=__author_info)
374     parser.add_option("-i", "--info",
375                       action="store_true", dest="info_only",
376                       default=False, help="print Manifest info and exit")
377     parser.add_option("-m", "--manifest-only",
378                       action="store_true", dest="manifest_only",
379                       default=False, help="download Manifest file and exit")
380     parser.add_option("-n", "--no-download",
381                       action="store_false", dest="download",
382                       default=True, help="disable downloading chunks")
383     parser.add_option("-s", "--sync-delay",
384                       action="store_true", dest="sync_delay",
385                       default=False, help="show the sync delay between the given streams and exit")
386     parser.add_option("-d", "--dest-dir", metavar="<dir>",
387                       dest="dest_dir", default=tempfile.gettempdir(),
388                       help="destination directory")
389     parser.add_option("-c", "--chunks-dir", metavar="<dir>",
390                       dest="chunks_dir", default=None,
391                       help="directory containing chunks, if different from destination dir")
392     parser.add_option("-v", "--video-stream",  metavar="<n>",
393                       type="int", dest="video_stream_index", default=0,
394                       help="index of the video stream")
395     parser.add_option("-a", "--audio-stream", metavar="<n>",
396                       type="int", dest="audio_stream_index", default=1,
397                       help="index of the audio stream")
398     parser.add_option("-q", "--video-quality", metavar="<n>",
399                       type="int", dest="video_quality_level", default=0,
400                       help="index of the video quality level")
401     parser.add_option("-Q", "--audio-quality", metavar="<n>",
402                       type="int", dest="audio_quality_level", default=0,
403                       help="index of the audio quality level")
404
405     return parser
406
407
408 if __name__ == "__main__":
409
410     parser = options_parser()
411     (options, args) = parser.parse_args()
412
413     if len(args) != 1:
414         parser.print_help()
415         parser.exit(1)
416
417     url = args[0]
418     manifest, url = get_manifest(url, options.dest_dir)
419
420     if options.manifest_only:
421         parser.exit(0)
422
423     if options.sync_delay:
424         print calc_tracks_delay(manifest,
425                 options.video_stream_index,
426                 options.audio_stream_index)
427         parser.exit(0)
428
429     if options.info_only:
430         print_manifest_info(manifest)
431         parser.exit(0)
432
433     print_manifest_info(manifest)
434
435     smooth_download(url, manifest, options.dest_dir,
436             options.video_stream_index, options.audio_stream_index,
437             options.video_quality_level, options.audio_quality_level,
438             options.chunks_dir, options.download)