60c3e00dfac76c5ef801bfa42c89a05df09a94b1
[smooth-dl.git] / smooth-dl.py
1 #!/usr/bin/env python
2 #
3 # smooth-dl - download videos served using Smooth Streaming technology
4 #
5 # Copyright (C) 2010-2016  Antonio Ospite <ao2@ao2.it>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 #
21 # TODO:
22 #  - Handle HTTP errors:
23 #       "Connection reset by peer"
24 #       "Resource not  available"
25 #       "Gateway Time-out"
26 # - Support more Manifest formats:
27 #       WaveFormatEx attribute instead of PrivateCodecdata
28 #       'd' and other attributes in chunk element ('i', 's', 'q')
29 #
30 # basically, write a proper implementation of manifest parsing and chunk
31 # downloading
32
33 import os
34 import re
35 import sys
36 import xml.etree.ElementTree as etree
37 import urllib2
38 import struct
39 import tempfile
40 from optparse import OptionParser
41 from urlparse import urlparse, urlunparse
42
43 __description__ = "Download videos served using Smooth Streaming technology"
44 __version__ = "0.x"
45 __author_info__ = "Written by Antonio Ospite http://ao2.it"
46
47
48 def get_chunk_data(data):
49
50     moof_size = struct.unpack(">L", data[0:4])[0]
51     mdat_size = struct.unpack(">L", data[moof_size:moof_size + 4])[0]
52
53     data_start = moof_size + 4 + len('mdat')
54     data_size = mdat_size - 4 - len('mdat')
55
56     # print len(data[data_start:]), \
57     #        len(data[data_start:data_start + data_size]), data_size
58
59     assert len(data[data_start:]) == data_size
60
61     return data[data_start:data_start + data_size]
62
63
64 def hexstring_to_bytes(hex_string):
65     res = ""
66     for i in range(0, len(hex_string), 2):
67         res += chr(int(hex_string[i:i + 2], 16))
68
69     return res
70
71
72 def write_wav_header(out_file, fmt, codec_private_data, data_len):
73
74     extradata = hexstring_to_bytes(codec_private_data)
75
76     fmt['cbSize'] = len(extradata)
77     fmt_len = 18 + fmt['cbSize']
78     wave_len = len("WAVEfmt ") + 4 + fmt_len + len('data') + 4
79
80     out_file.write("RIFF")
81     out_file.write(struct.pack('<L', wave_len))
82     out_file.write("WAVEfmt ")
83     out_file.write(struct.pack('<L', fmt_len))
84     out_file.write(struct.pack('<H', fmt['wFormatTag']))
85     out_file.write(struct.pack('<H', fmt['nChannels']))
86     out_file.write(struct.pack('<L', fmt['nSamplesPerSec']))
87     out_file.write(struct.pack('<L', fmt['nAvgBytesPerSec']))
88     out_file.write(struct.pack('<H', fmt['nBlockAlign']))
89     out_file.write(struct.pack('<H', fmt['wBitsPerSample']))
90     out_file.write(struct.pack('<H', fmt['cbSize']))
91     out_file.write(extradata)
92     out_file.write("data")
93     out_file.write(struct.pack('<L', data_len))
94
95
96 def download_file(src_url, dest_file, mode):
97
98     if os.path.exists(src_url):
99         f = open(src_url, "rb")
100         data = f.read()
101         f.close()
102     else:
103         try:
104             response = urllib2.urlopen(src_url)
105             data = response.read()
106         except urllib2.HTTPError:
107             sys.stderr.write("Error while dowloading URL: %s\n" % src_url)
108             raise
109
110     if dest_file:
111         f = open(dest_file, mode)
112         f.write(data)
113         f.close()
114
115     return data
116
117
118 def get_manifest(url, dest_dir):
119     """Returns the manifest element and the base content URL"""
120
121     # Remove the querystring if present
122     manifest_url = urlunparse(urlparse(url)._replace(query=''))
123
124     if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
125         manifest_url += '/Manifest'
126
127     local_manifest_path = os.path.join(dest_dir, 'Manifest')
128     download_file(manifest_url, local_manifest_path, "w")
129
130     manifest = etree.parse(local_manifest_path)
131
132     version = manifest.getroot().attrib['MajorVersion']
133     if version != "2":
134         raise Exception('Only Smooth Streaming version 2 supported')
135
136     # if some intermediate client Manifest is used, like in Rai Replay
137     # then get the final manifest
138     clip = manifest.find("Clip")
139     if clip is not None and "Url" in clip.attrib:
140         tmp_manifest_url = clip.attrib["Url"]
141         try:
142             tmp_manifest = download_file(tmp_manifest_url, None, None)
143             # set the new values only if the dowload succeded
144             manifest_url = tmp_manifest_url
145             manifest = tmp_manifest
146         except urllib2.HTTPError:
147             pass
148
149     manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
150     base_url = manifest_pattern.sub("", manifest_url)
151
152     return (manifest, base_url)
153
154
155 def print_manifest_info(manifest):
156
157     streams = manifest.findall('.//StreamIndex')
158
159     for i, s in enumerate(streams):
160         stream_type = s.attrib["Type"]
161
162         print "Stream: %s Type: %s" % (i, stream_type)
163
164         print "\tQuality Levels:"
165         qualities = s.findall("QualityLevel")
166         for i, q in enumerate(qualities):
167             bitrate = q.attrib["Bitrate"]
168             fourcc = q.attrib["FourCC"]
169
170             if stream_type == "video":
171                 size = "%sx%s" % (q.attrib["MaxWidth"], q.attrib["MaxHeight"])
172                 print "\t%2s: %4s %10s @ %7s bps" % (i, fourcc, size, bitrate)
173             if stream_type == "audio":
174                 channels = q.attrib["Channels"]
175                 sampling_rate = q.attrib["SamplingRate"]
176                 bits_per_sample = q.attrib["BitsPerSample"]
177                 print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % \
178                     (i, fourcc, sampling_rate, bits_per_sample, channels,
179                      bitrate)
180
181     print
182
183
184 def get_chunk_quality_string(stream, quality_level):
185     quality = stream.findall("QualityLevel")[quality_level]
186     bitrate = quality.attrib["Bitrate"]
187
188     quality_attributes = quality.findall("CustomAttributes/Attribute")
189     custom_attributes = ""
190     for i in quality_attributes:
191         custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
192     custom_attributes = custom_attributes.rstrip(',')
193
194     # Assume URLs are in this form:
195     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
196     # or
197     # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
198     url = stream.attrib["Url"]
199
200     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
201     chunks_quality = chunks_quality.replace("{CustomAttributes}",
202                                             custom_attributes)
203
204     return chunks_quality
205
206
207 def get_chunk_name_string(stream, chunk_time):
208     url = stream.attrib["Url"]
209     chunk_name = url.split('/')[1].replace("{start time}", str(chunk_time))
210
211     return chunk_name
212
213
214 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
215     stream = manifest.findall('.//StreamIndex')[stream_index]
216
217     chunks_quality = get_chunk_quality_string(stream, quality_level)
218
219     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
220     if not os.path.exists(chunks_dest_dir):
221         os.mkdir(chunks_dest_dir, 0755)
222
223     chunks = stream.findall("c")
224     data_size = 0
225     print "\nDownloading Stream %d" % stream_index
226     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
227     sys.stdout.flush()
228
229     stream_duration = 0
230     for i, chunk in enumerate(chunks):
231
232         if "t" in chunk.attrib:
233             chunk_time = chunk.attrib["t"]
234         elif "d" in chunk.attrib:
235             chunk_time = stream_duration
236             stream_duration = chunk_time + int(chunk.attrib["d"])
237
238         chunk_name = get_chunk_name_string(stream, chunk_time)
239         chunk_file = os.path.join(dest_dir, chunks_quality, chunk_name)
240
241         if not os.path.exists(chunk_file):
242             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
243             data = download_file(chunk_url, chunk_file, "wb")
244         else:
245             f = open(chunk_file, "rb")
246             data = f.read()
247             f.close()
248
249         data_size += len(data)
250         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
251         sys.stdout.flush()
252     print "\tDownloaded size:", data_size
253
254
255 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
256                    dest_file_name, final_dest_file=None):
257
258     if final_dest_file is None:
259         final_dest_file = dest_file_name
260
261     stream = manifest.findall('.//StreamIndex')[stream_index]
262
263     chunks_quality = get_chunk_quality_string(stream, quality_level)
264
265     chunks_src_dir = os.path.join(src_dir, chunks_quality)
266
267     dest_file = open(dest_file_name, "wb")
268
269     chunks = stream.findall("c")
270     data_size = 0
271     print "\nRebuilding Stream %d" % stream_index
272     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
273     sys.stdout.flush()
274
275     stream_duration = 0
276     for i, chunk in enumerate(chunks):
277
278         if "t" in chunk.attrib:
279             chunk_time = chunk.attrib["t"]
280         elif "d" in chunk.attrib:
281             chunk_time = stream_duration
282             stream_duration = chunk_time + int(chunk.attrib["d"])
283
284         chunk_name = get_chunk_name_string(stream, chunk_time)
285         chunk_file = os.path.join(chunks_src_dir, chunk_name)
286
287         f = open(chunk_file, "rb")
288         data = get_chunk_data(f.read())
289         f.close()
290         dest_file.write(data)
291         data_size += len(data)
292         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
293         sys.stdout.flush()
294
295     # Add a nice WAV header
296     if stream.attrib['Type'] == "audio":
297         quality = stream.findall("QualityLevel")[quality_level]
298         codec_private_data = quality.attrib['CodecPrivateData']
299
300         fmt = {}
301         fmt['wFormatTag'] = int(quality.attrib['AudioTag'])
302         fmt['nChannels'] = int(quality.attrib['Channels'])
303         fmt['nSamplesPerSec'] = int(quality.attrib['SamplingRate'])
304         fmt['nAvgBytesPerSec'] = int(quality.attrib['Bitrate']) / 8
305         fmt['wBitsPerSample'] = int(quality.attrib['BitsPerSample'])
306         fmt['nBlockAlign'] = int(quality.attrib['PacketSize'])
307         fmt['cbSize'] = 0
308
309         f = open(final_dest_file, "wb")
310         write_wav_header(f, fmt, codec_private_data, data_size)
311         dest_file.close()
312         dest_file = open(dest_file_name, "rb")
313         f.write(dest_file.read())
314         f.close()
315         dest_file.close()
316
317     print
318     print "Stream %d, actual data size: %d\n" % (stream_index, data_size)
319
320
321 def calc_tracks_delay(manifest, stream1_index, stream2_index):
322     streams = manifest.findall('.//StreamIndex')
323
324     s1 = streams[stream1_index]
325     s2 = streams[stream2_index]
326
327     if "TimeScale" not in s1 or "TimeScale" not in s2:
328         return 0
329
330     s1_start_chunk = s1.find("c")
331     s2_start_chunk = s2.find("c")
332
333     if "t" not in s1_start_chunk.attrib \
334        or "t" not in s2_start_chunk.attrib:
335         return 0
336
337     s1_start_time = int(s1_start_chunk.attrib['t'])
338     s2_start_time = int(s2_start_chunk.attrib['t'])
339
340     s1_timescale = float(s1.attrib['TimeScale'])
341     s2_timescale = float(s2.attrib['TimeScale'])
342
343     # calc difference in seconds
344     delay = s2_start_time / s2_timescale - \
345         s1_start_time / s1_timescale
346
347     return delay
348
349
350 def get_clip_duration(manifest):
351     # TODO: use <Clip ClipBegin="" ClipEnd=""> if Duration is not available
352     duration = manifest.getroot().attrib['Duration']
353
354     return float(duration) / 10000000  # here is the default timescale
355
356
357 def smooth_download(url, manifest, dest_dir,
358                     video_stream_index=0, audio_stream_index=1,
359                     video_quality_level=0, audio_quality_level=0,
360                     chunks_dir=None, download=True,
361                     out_video_file='_video.vc1', out_audio_file='_audio.raw'):
362
363     if chunks_dir is None:
364         chunks_dir = dest_dir
365
366     if download:
367         download_chunks(url, manifest, video_stream_index,
368                         video_quality_level, chunks_dir)
369         download_chunks(url, manifest, audio_stream_index,
370                         audio_quality_level, chunks_dir)
371
372     dest_video = os.path.join(dest_dir, out_video_file)
373     dest_audio = os.path.join(dest_dir, out_audio_file)
374
375     rebuild_stream(manifest, video_stream_index, video_quality_level,
376                    chunks_dir, dest_video)
377     rebuild_stream(manifest, audio_stream_index, audio_quality_level,
378                    chunks_dir, dest_audio, dest_audio + '.wav')
379
380     # duration = get_clip_duration(manifest)
381
382     delay = calc_tracks_delay(manifest, video_stream_index,
383                               audio_stream_index)
384
385     # optionally encode audio to vorbis:
386     # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
387     mux_command = ("ffmpeg -i %s \\\n" +
388                    "  -itsoffset %f -async 1 -i %s \\\n" +
389                    "  -vcodec copy -acodec copy ffout.mkv") % \
390         (dest_video, delay, dest_audio + '.wav')
391
392     print mux_command
393
394
395 def options_parser():
396     version = "%%prog %s" % __version__
397     usage = "usage: %prog [options] <manifest URL or file>"
398     parser = OptionParser(usage=usage, version=version,
399                           description=__description__, epilog=__author_info__)
400     parser.add_option("-i", "--info",
401                       action="store_true", dest="info_only",
402                       default=False, help="print Manifest info and exit")
403     parser.add_option("-m", "--manifest-only",
404                       action="store_true", dest="manifest_only",
405                       default=False, help="download Manifest file and exit")
406     parser.add_option("-n", "--no-download",
407                       action="store_false", dest="download",
408                       default=True, help="disable downloading chunks")
409     parser.add_option("-s", "--sync-delay",
410                       action="store_true", dest="sync_delay",
411                       default=False, help="show the sync delay between the given streams and exit")
412     parser.add_option("-d", "--dest-dir", metavar="<dir>",
413                       dest="dest_dir", default=tempfile.gettempdir(),
414                       help="destination directory")
415     parser.add_option("-c", "--chunks-dir", metavar="<dir>",
416                       dest="chunks_dir", default=None,
417                       help="directory containing chunks, if different from destination dir")
418     parser.add_option("-v", "--video-stream", metavar="<n>",
419                       type="int", dest="video_stream_index", default=0,
420                       help="index of the video stream")
421     parser.add_option("-a", "--audio-stream", metavar="<n>",
422                       type="int", dest="audio_stream_index", default=1,
423                       help="index of the audio stream")
424     parser.add_option("-q", "--video-quality", metavar="<n>",
425                       type="int", dest="video_quality_level", default=0,
426                       help="index of the video quality level")
427     parser.add_option("-Q", "--audio-quality", metavar="<n>",
428                       type="int", dest="audio_quality_level", default=0,
429                       help="index of the audio quality level")
430
431     return parser
432
433
434 def main():
435     parser = options_parser()
436     (options, args) = parser.parse_args()
437
438     if len(args) != 1:
439         parser.print_help()
440         parser.exit(1)
441
442     if not os.path.exists(options.dest_dir):
443         os.mkdir(options.dest_dir, 0755)
444
445     url = args[0]
446     manifest, url = get_manifest(url, options.dest_dir)
447
448     if options.manifest_only:
449         parser.exit(0)
450
451     if options.sync_delay:
452         print calc_tracks_delay(manifest,
453                                 options.video_stream_index,
454                                 options.audio_stream_index)
455         parser.exit(0)
456
457     if options.info_only:
458         print_manifest_info(manifest)
459         parser.exit(0)
460
461     print_manifest_info(manifest)
462
463     smooth_download(url, manifest, options.dest_dir,
464                     options.video_stream_index, options.audio_stream_index,
465                     options.video_quality_level, options.audio_quality_level,
466                     options.chunks_dir, options.download)
467
468
469 if __name__ == "__main__":
470     main()