smooth-dl.py: update to python3
[smooth-dl.git] / smooth-dl.py
1 #!/usr/bin/env python3
2 #
3 # smooth-dl - download videos served using Smooth Streaming technology
4 #
5 # Copyright (C) 2010-2016  Antonio Ospite <ao2@ao2.it>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 #
20 #
21 # TODO:
22 #  - Handle HTTP errors:
23 #       "Connection reset by peer"
24 #       "Resource not  available"
25 #       "Gateway Time-out"
26 # - Support more Manifest formats:
27 #       WaveFormatEx attribute instead of PrivateCodecdata
28 #       'd' and other attributes in chunk element ('i', 's', 'q')
29 #
30 # basically, write a proper implementation of manifest parsing and chunk
31 # downloading
32
33 import os
34 import re
35 import sys
36 import xml.etree.ElementTree as etree
37 import urllib.request
38 import urllib.error
39 import urllib.parse
40 import struct
41 import tempfile
42 from optparse import OptionParser
43 from urllib.parse import urlparse, urlunparse
44
45 __description__ = "Download videos served using Smooth Streaming technology"
46 __version__ = "0.x"
47 __author_info__ = "Written by Antonio Ospite http://ao2.it"
48
49
50 def get_chunk_data(data):
51
52     moof_size = struct.unpack(">L", data[0:4])[0]
53     mdat_size = struct.unpack(">L", data[moof_size:moof_size + 4])[0]
54
55     data_start = moof_size + 4 + len('mdat')
56     data_size = mdat_size - 4 - len('mdat')
57
58     # print len(data[data_start:]), \
59     #        len(data[data_start:data_start + data_size]), data_size
60
61     assert len(data[data_start:]) == data_size
62
63     return data[data_start:data_start + data_size]
64
65
66 def hexstring_to_bytes(hex_string):
67     return bytearray.fromhex(hex_string)
68
69
70 def write_wav_header(out_file, fmt, codec_private_data, data_len):
71
72     extradata = hexstring_to_bytes(codec_private_data)
73
74     fmt['cbSize'] = len(extradata)
75     fmt_len = 18 + fmt['cbSize']
76     wave_len = len("WAVEfmt ") + 4 + fmt_len + len('data') + 4
77
78     out_file.write(bytes("RIFF", "ascii"))
79     out_file.write(struct.pack('<L', wave_len))
80     out_file.write(bytes("WAVEfmt ", "ascii"))
81     out_file.write(struct.pack('<L', fmt_len))
82     out_file.write(struct.pack('<H', fmt['wFormatTag']))
83     out_file.write(struct.pack('<H', fmt['nChannels']))
84     out_file.write(struct.pack('<L', fmt['nSamplesPerSec']))
85     out_file.write(struct.pack('<L', fmt['nAvgBytesPerSec']))
86     out_file.write(struct.pack('<H', fmt['nBlockAlign']))
87     out_file.write(struct.pack('<H', fmt['wBitsPerSample']))
88     out_file.write(struct.pack('<H', fmt['cbSize']))
89     out_file.write(extradata)
90     out_file.write(bytes("data", "ascii"))
91     out_file.write(struct.pack('<L', data_len))
92
93
94 def download_file(src_url, dest_file, mode):
95
96     if os.path.exists(src_url):
97         f = open(src_url, "rb")
98         data = f.read()
99         f.close()
100     else:
101         try:
102             response = urllib.request.urlopen(src_url)
103             data = response.read()
104         except urllib.error.HTTPError:
105             sys.stderr.write("Error while dowloading URL: %s\n" % src_url)
106             raise
107
108     if dest_file:
109         f = open(dest_file, mode)
110         f.write(data)
111         f.close()
112
113     return data
114
115
116 def get_manifest(url, dest_dir):
117     """Returns the manifest element and the base content URL"""
118
119     # Remove the querystring if present
120     manifest_url = urlunparse(urlparse(url)._replace(query=''))
121
122     if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
123         manifest_url += '/Manifest'
124
125     local_manifest_path = os.path.join(dest_dir, 'Manifest')
126     download_file(manifest_url, local_manifest_path, "wb")
127
128     manifest = etree.parse(local_manifest_path)
129
130     version = manifest.getroot().attrib['MajorVersion']
131     if version != "2":
132         raise Exception('Only Smooth Streaming version 2 supported')
133
134     # if some intermediate client Manifest is used, like in Rai Replay
135     # then get the final manifest
136     clip = manifest.find("Clip")
137     if clip is not None and "Url" in clip.attrib:
138         tmp_manifest_url = clip.attrib["Url"]
139         try:
140             tmp_manifest = download_file(tmp_manifest_url, None, None)
141             # set the new values only if the dowload succeded
142             manifest_url = tmp_manifest_url
143             manifest = tmp_manifest
144         except urllib.error.HTTPError:
145             pass
146
147     manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
148     base_url = manifest_pattern.sub("", manifest_url)
149
150     return (manifest, base_url)
151
152
153 def print_manifest_info(manifest):
154
155     streams = manifest.findall('.//StreamIndex')
156
157     for i, s in enumerate(streams):
158         stream_type = s.attrib["Type"]
159
160         print("Stream: %s Type: %s" % (i, stream_type))
161
162         print("\tQuality Levels:")
163         qualities = s.findall("QualityLevel")
164         for i, q in enumerate(qualities):
165             bitrate = q.attrib["Bitrate"]
166             fourcc = q.attrib["FourCC"]
167
168             if stream_type == "video":
169                 size = "%sx%s" % (q.attrib["MaxWidth"], q.attrib["MaxHeight"])
170                 print("\t%2s: %4s %10s @ %7s bps" % (i, fourcc, size, bitrate))
171             if stream_type == "audio":
172                 channels = q.attrib["Channels"]
173                 sampling_rate = q.attrib["SamplingRate"]
174                 bits_per_sample = q.attrib["BitsPerSample"]
175                 print("\t%2s: %4s %sHz %sbits %sch @ %7s bps" %
176                       (i, fourcc, sampling_rate, bits_per_sample, channels,
177                        bitrate))
178
179     print()
180
181
182 def get_chunk_quality_string(stream, quality_level):
183     quality = stream.findall("QualityLevel")[quality_level]
184     bitrate = quality.attrib["Bitrate"]
185
186     quality_attributes = quality.findall("CustomAttributes/Attribute")
187     custom_attributes = ""
188     for i in quality_attributes:
189         custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
190     custom_attributes = custom_attributes.rstrip(',')
191
192     # Assume URLs are in this form:
193     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
194     # or
195     # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
196     url = stream.attrib["Url"]
197
198     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
199     chunks_quality = chunks_quality.replace("{CustomAttributes}",
200                                             custom_attributes)
201
202     return chunks_quality
203
204
205 def get_chunk_name_string(stream, chunk_time):
206     url = stream.attrib["Url"]
207     chunk_name = url.split('/')[1].replace("{start time}", str(chunk_time))
208
209     return chunk_name
210
211
212 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
213     stream = manifest.findall('.//StreamIndex')[stream_index]
214
215     chunks_quality = get_chunk_quality_string(stream, quality_level)
216
217     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
218     if not os.path.exists(chunks_dest_dir):
219         os.mkdir(chunks_dest_dir, 0o755)
220
221     chunks = stream.findall("c")
222     data_size = 0
223     print("\nDownloading Stream %d" % stream_index)
224     print("\tChunks %10d/%-10d" % (0, len(chunks)), "\r", end=' ')
225     sys.stdout.flush()
226
227     stream_duration = 0
228     for i, chunk in enumerate(chunks):
229
230         if "t" in chunk.attrib:
231             chunk_time = chunk.attrib["t"]
232         elif "d" in chunk.attrib:
233             chunk_time = stream_duration
234             stream_duration = chunk_time + int(chunk.attrib["d"])
235
236         chunk_name = get_chunk_name_string(stream, chunk_time)
237         chunk_file = os.path.join(dest_dir, chunks_quality, chunk_name)
238
239         if not os.path.exists(chunk_file):
240             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
241             data = download_file(chunk_url, chunk_file, "wb")
242         else:
243             f = open(chunk_file, "rb")
244             data = f.read()
245             f.close()
246
247         data_size += len(data)
248         print("\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r", end=' ')
249         sys.stdout.flush()
250     print("\tDownloaded size:", data_size)
251
252
253 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
254                    dest_file_name, final_dest_file=None):
255
256     if final_dest_file is None:
257         final_dest_file = dest_file_name
258
259     stream = manifest.findall('.//StreamIndex')[stream_index]
260
261     chunks_quality = get_chunk_quality_string(stream, quality_level)
262
263     chunks_src_dir = os.path.join(src_dir, chunks_quality)
264
265     dest_file = open(dest_file_name, "wb")
266
267     chunks = stream.findall("c")
268     data_size = 0
269     print("\nRebuilding Stream %d" % stream_index)
270     print("\tChunks %10d/%-10d" % (0, len(chunks)), "\r", end=' ')
271     sys.stdout.flush()
272
273     stream_duration = 0
274     for i, chunk in enumerate(chunks):
275
276         if "t" in chunk.attrib:
277             chunk_time = chunk.attrib["t"]
278         elif "d" in chunk.attrib:
279             chunk_time = stream_duration
280             stream_duration = chunk_time + int(chunk.attrib["d"])
281
282         chunk_name = get_chunk_name_string(stream, chunk_time)
283         chunk_file = os.path.join(chunks_src_dir, chunk_name)
284
285         f = open(chunk_file, "rb")
286         data = get_chunk_data(f.read())
287         f.close()
288         dest_file.write(data)
289         data_size += len(data)
290         print("\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r", end=' ')
291         sys.stdout.flush()
292
293     # Add a nice WAV header
294     if stream.attrib['Type'] == "audio":
295         quality = stream.findall("QualityLevel")[quality_level]
296         codec_private_data = quality.attrib['CodecPrivateData']
297
298         fmt = {}
299         fmt['wFormatTag'] = int(quality.attrib['AudioTag'])
300         fmt['nChannels'] = int(quality.attrib['Channels'])
301         fmt['nSamplesPerSec'] = int(quality.attrib['SamplingRate'])
302         fmt['nAvgBytesPerSec'] = int(quality.attrib['Bitrate']) // 8
303         fmt['wBitsPerSample'] = int(quality.attrib['BitsPerSample'])
304         fmt['nBlockAlign'] = int(quality.attrib['PacketSize'])
305         fmt['cbSize'] = 0
306
307         f = open(final_dest_file, "wb")
308         write_wav_header(f, fmt, codec_private_data, data_size)
309         dest_file.close()
310         dest_file = open(dest_file_name, "rb")
311         f.write(dest_file.read())
312         f.close()
313         dest_file.close()
314
315     print()
316     print("Stream %d, actual data size: %d\n" % (stream_index, data_size))
317
318
319 def calc_tracks_delay(manifest, stream1_index, stream2_index):
320     streams = manifest.findall('.//StreamIndex')
321
322     s1 = streams[stream1_index]
323     s2 = streams[stream2_index]
324
325     if "TimeScale" not in s1 or "TimeScale" not in s2:
326         return 0
327
328     s1_start_chunk = s1.find("c")
329     s2_start_chunk = s2.find("c")
330
331     if "t" not in s1_start_chunk.attrib \
332        or "t" not in s2_start_chunk.attrib:
333         return 0
334
335     s1_start_time = int(s1_start_chunk.attrib['t'])
336     s2_start_time = int(s2_start_chunk.attrib['t'])
337
338     s1_timescale = float(s1.attrib['TimeScale'])
339     s2_timescale = float(s2.attrib['TimeScale'])
340
341     # calc difference in seconds
342     delay = s2_start_time / s2_timescale - \
343         s1_start_time / s1_timescale
344
345     return delay
346
347
348 def get_clip_duration(manifest):
349     # TODO: use <Clip ClipBegin="" ClipEnd=""> if Duration is not available
350     duration = manifest.getroot().attrib['Duration']
351
352     return float(duration) / 10000000  # here is the default timescale
353
354
355 def smooth_download(url, manifest, dest_dir,
356                     video_stream_index=0, audio_stream_index=1,
357                     video_quality_level=0, audio_quality_level=0,
358                     chunks_dir=None, download=True,
359                     out_video_file='_video.vc1', out_audio_file='_audio.raw'):
360
361     if chunks_dir is None:
362         chunks_dir = dest_dir
363
364     if download:
365         download_chunks(url, manifest, video_stream_index,
366                         video_quality_level, chunks_dir)
367         download_chunks(url, manifest, audio_stream_index,
368                         audio_quality_level, chunks_dir)
369
370     dest_video = os.path.join(dest_dir, out_video_file)
371     dest_audio = os.path.join(dest_dir, out_audio_file)
372
373     rebuild_stream(manifest, video_stream_index, video_quality_level,
374                    chunks_dir, dest_video)
375     rebuild_stream(manifest, audio_stream_index, audio_quality_level,
376                    chunks_dir, dest_audio, dest_audio + '.wav')
377
378     # duration = get_clip_duration(manifest)
379
380     delay = calc_tracks_delay(manifest, video_stream_index,
381                               audio_stream_index)
382
383     # optionally encode audio to vorbis:
384     # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
385     mux_command = ("ffmpeg -i %s \\\n" +
386                    "  -itsoffset %f -async 1 -i %s \\\n" +
387                    "  -vcodec copy -acodec copy ffout.mkv") % \
388         (dest_video, delay, dest_audio + '.wav')
389
390     print(mux_command)
391
392
393 def options_parser():
394     version = "%%prog %s" % __version__
395     usage = "usage: %prog [options] <manifest URL or file>"
396     parser = OptionParser(usage=usage, version=version,
397                           description=__description__, epilog=__author_info__)
398     parser.add_option("-i", "--info",
399                       action="store_true", dest="info_only",
400                       default=False, help="print Manifest info and exit")
401     parser.add_option("-m", "--manifest-only",
402                       action="store_true", dest="manifest_only",
403                       default=False, help="download Manifest file and exit")
404     parser.add_option("-n", "--no-download",
405                       action="store_false", dest="download",
406                       default=True, help="disable downloading chunks")
407     parser.add_option("-s", "--sync-delay",
408                       action="store_true", dest="sync_delay",
409                       default=False, help="show the sync delay between the given streams and exit")
410     parser.add_option("-d", "--dest-dir", metavar="<dir>",
411                       dest="dest_dir", default=tempfile.gettempdir(),
412                       help="destination directory")
413     parser.add_option("-c", "--chunks-dir", metavar="<dir>",
414                       dest="chunks_dir", default=None,
415                       help="directory containing chunks, if different from destination dir")
416     parser.add_option("-v", "--video-stream", metavar="<n>",
417                       type="int", dest="video_stream_index", default=0,
418                       help="index of the video stream")
419     parser.add_option("-a", "--audio-stream", metavar="<n>",
420                       type="int", dest="audio_stream_index", default=1,
421                       help="index of the audio stream")
422     parser.add_option("-q", "--video-quality", metavar="<n>",
423                       type="int", dest="video_quality_level", default=0,
424                       help="index of the video quality level")
425     parser.add_option("-Q", "--audio-quality", metavar="<n>",
426                       type="int", dest="audio_quality_level", default=0,
427                       help="index of the audio quality level")
428
429     return parser
430
431
432 def main():
433     parser = options_parser()
434     (options, args) = parser.parse_args()
435
436     if len(args) != 1:
437         parser.print_help()
438         parser.exit(1)
439
440     if not os.path.exists(options.dest_dir):
441         os.mkdir(options.dest_dir, 0o755)
442
443     url = args[0]
444     manifest, url = get_manifest(url, options.dest_dir)
445
446     if options.manifest_only:
447         parser.exit(0)
448
449     if options.sync_delay:
450         print(calc_tracks_delay(manifest,
451                                 options.video_stream_index,
452                                 options.audio_stream_index))
453         parser.exit(0)
454
455     if options.info_only:
456         print_manifest_info(manifest)
457         parser.exit(0)
458
459     print_manifest_info(manifest)
460
461     smooth_download(url, manifest, options.dest_dir,
462                     options.video_stream_index, options.audio_stream_index,
463                     options.video_quality_level, options.audio_quality_level,
464                     options.chunks_dir, options.download)
465
466
467 if __name__ == "__main__":
468     main()