3 # smooth-dl - download videos served using Smooth Streaming technology
5 # Copyright (C) 2010-2016 Antonio Ospite <ao2@ao2.it>
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 # - Handle HTTP errors:
23 # "Connection reset by peer"
24 # "Resource not available"
26 # - Support more Manifest formats:
27 # WaveFormatEx attribute instead of PrivateCodecdata
28 # 'd' and other attributes in chunk element ('i', 's', 'q')
30 # basically, write a proper implementation of manifest parsing and chunk
36 import xml.etree.ElementTree as etree
42 from optparse import OptionParser
43 from urllib.parse import urlparse, urlunparse
45 __description__ = "Download videos served using Smooth Streaming technology"
47 __author_info__ = "Written by Antonio Ospite http://ao2.it"
50 def get_chunk_data(data):
52 moof_size = struct.unpack(">L", data[0:4])[0]
53 mdat_size = struct.unpack(">L", data[moof_size:moof_size + 4])[0]
55 data_start = moof_size + 4 + len('mdat')
56 data_size = mdat_size - 4 - len('mdat')
58 # print len(data[data_start:]), \
59 # len(data[data_start:data_start + data_size]), data_size
61 assert len(data[data_start:]) == data_size
63 return data[data_start:data_start + data_size]
66 def hexstring_to_bytes(hex_string):
67 return bytearray.fromhex(hex_string)
70 def write_wav_header(out_file, fmt, codec_private_data, data_len):
72 extradata = hexstring_to_bytes(codec_private_data)
74 fmt['cbSize'] = len(extradata)
75 fmt_len = 18 + fmt['cbSize']
76 wave_len = len("WAVEfmt ") + 4 + fmt_len + len('data') + 4
78 out_file.write(bytes("RIFF", "ascii"))
79 out_file.write(struct.pack('<L', wave_len))
80 out_file.write(bytes("WAVEfmt ", "ascii"))
81 out_file.write(struct.pack('<L', fmt_len))
82 out_file.write(struct.pack('<H', fmt['wFormatTag']))
83 out_file.write(struct.pack('<H', fmt['nChannels']))
84 out_file.write(struct.pack('<L', fmt['nSamplesPerSec']))
85 out_file.write(struct.pack('<L', fmt['nAvgBytesPerSec']))
86 out_file.write(struct.pack('<H', fmt['nBlockAlign']))
87 out_file.write(struct.pack('<H', fmt['wBitsPerSample']))
88 out_file.write(struct.pack('<H', fmt['cbSize']))
89 out_file.write(extradata)
90 out_file.write(bytes("data", "ascii"))
91 out_file.write(struct.pack('<L', data_len))
94 def download_file(src_url, dest_file, mode):
96 if os.path.exists(src_url):
97 f = open(src_url, "rb")
102 response = urllib.request.urlopen(src_url)
103 data = response.read()
104 except urllib.error.HTTPError:
105 sys.stderr.write("Error while dowloading URL: %s\n" % src_url)
109 f = open(dest_file, mode)
116 def get_manifest(url, dest_dir):
117 """Returns the manifest element and the base content URL"""
119 # Remove the querystring if present
120 manifest_url = urlunparse(urlparse(url)._replace(query=''))
122 if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
123 manifest_url += '/Manifest'
125 local_manifest_path = os.path.join(dest_dir, 'Manifest')
126 download_file(manifest_url, local_manifest_path, "wb")
128 manifest = etree.parse(local_manifest_path)
130 version = manifest.getroot().attrib['MajorVersion']
132 raise Exception('Only Smooth Streaming version 2 supported')
134 # if some intermediate client Manifest is used, like in Rai Replay
135 # then get the final manifest
136 clip = manifest.find("Clip")
137 if clip is not None and "Url" in clip.attrib:
138 tmp_manifest_url = clip.attrib["Url"]
140 tmp_manifest = download_file(tmp_manifest_url, None, None)
141 # set the new values only if the dowload succeded
142 manifest_url = tmp_manifest_url
143 manifest = tmp_manifest
144 except urllib.error.HTTPError:
147 manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
148 base_url = manifest_pattern.sub("", manifest_url)
150 return (manifest, base_url)
153 def print_manifest_info(manifest):
155 streams = manifest.findall('.//StreamIndex')
157 for i, s in enumerate(streams):
158 stream_type = s.attrib["Type"]
160 print("Stream: %s Type: %s" % (i, stream_type))
162 print("\tQuality Levels:")
163 qualities = s.findall("QualityLevel")
164 for i, q in enumerate(qualities):
165 bitrate = q.attrib["Bitrate"]
166 fourcc = q.attrib["FourCC"]
168 if stream_type == "video":
169 size = "%sx%s" % (q.attrib["MaxWidth"], q.attrib["MaxHeight"])
170 print("\t%2s: %4s %10s @ %7s bps" % (i, fourcc, size, bitrate))
171 if stream_type == "audio":
172 channels = q.attrib["Channels"]
173 sampling_rate = q.attrib["SamplingRate"]
174 bits_per_sample = q.attrib["BitsPerSample"]
175 print("\t%2s: %4s %sHz %sbits %sch @ %7s bps" %
176 (i, fourcc, sampling_rate, bits_per_sample, channels,
182 def get_chunk_quality_string(stream, quality_level):
183 quality = stream.findall("QualityLevel")[quality_level]
184 bitrate = quality.attrib["Bitrate"]
186 quality_attributes = quality.findall("CustomAttributes/Attribute")
187 custom_attributes = ""
188 for i in quality_attributes:
189 custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
190 custom_attributes = custom_attributes.rstrip(',')
192 # Assume URLs are in this form:
193 # Url="QualityLevels({bitrate})/Fragments(video={start time})"
195 # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
196 url = stream.attrib["Url"]
198 chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
199 chunks_quality = chunks_quality.replace("{CustomAttributes}",
202 return chunks_quality
205 def get_chunk_name_string(stream, chunk_time):
206 url = stream.attrib["Url"]
207 chunk_name = url.split('/')[1].replace("{start time}", str(chunk_time))
212 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
213 stream = manifest.findall('.//StreamIndex')[stream_index]
215 chunks_quality = get_chunk_quality_string(stream, quality_level)
217 chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
218 if not os.path.exists(chunks_dest_dir):
219 os.mkdir(chunks_dest_dir, 0o755)
221 chunks = stream.findall("c")
223 print("\nDownloading Stream %d" % stream_index)
224 print("\tChunks %10d/%-10d" % (0, len(chunks)), "\r", end=' ')
228 for i, chunk in enumerate(chunks):
230 if "t" in chunk.attrib:
231 chunk_time = chunk.attrib["t"]
232 elif "d" in chunk.attrib:
233 chunk_time = stream_duration
234 stream_duration = chunk_time + int(chunk.attrib["d"])
236 chunk_name = get_chunk_name_string(stream, chunk_time)
237 chunk_file = os.path.join(dest_dir, chunks_quality, chunk_name)
239 if not os.path.exists(chunk_file):
240 chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
241 data = download_file(chunk_url, chunk_file, "wb")
243 f = open(chunk_file, "rb")
247 data_size += len(data)
248 print("\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r", end=' ')
250 print("\tDownloaded size:", data_size)
253 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
254 dest_file_name, final_dest_file=None):
256 if final_dest_file is None:
257 final_dest_file = dest_file_name
259 stream = manifest.findall('.//StreamIndex')[stream_index]
261 chunks_quality = get_chunk_quality_string(stream, quality_level)
263 chunks_src_dir = os.path.join(src_dir, chunks_quality)
265 dest_file = open(dest_file_name, "wb")
267 chunks = stream.findall("c")
269 print("\nRebuilding Stream %d" % stream_index)
270 print("\tChunks %10d/%-10d" % (0, len(chunks)), "\r", end=' ')
274 for i, chunk in enumerate(chunks):
276 if "t" in chunk.attrib:
277 chunk_time = chunk.attrib["t"]
278 elif "d" in chunk.attrib:
279 chunk_time = stream_duration
280 stream_duration = chunk_time + int(chunk.attrib["d"])
282 chunk_name = get_chunk_name_string(stream, chunk_time)
283 chunk_file = os.path.join(chunks_src_dir, chunk_name)
285 f = open(chunk_file, "rb")
286 data = get_chunk_data(f.read())
288 dest_file.write(data)
289 data_size += len(data)
290 print("\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r", end=' ')
293 # Add a nice WAV header
294 if stream.attrib['Type'] == "audio":
295 quality = stream.findall("QualityLevel")[quality_level]
296 codec_private_data = quality.attrib['CodecPrivateData']
299 fmt['wFormatTag'] = int(quality.attrib['AudioTag'])
300 fmt['nChannels'] = int(quality.attrib['Channels'])
301 fmt['nSamplesPerSec'] = int(quality.attrib['SamplingRate'])
302 fmt['nAvgBytesPerSec'] = int(quality.attrib['Bitrate']) // 8
303 fmt['wBitsPerSample'] = int(quality.attrib['BitsPerSample'])
304 fmt['nBlockAlign'] = int(quality.attrib['PacketSize'])
307 f = open(final_dest_file, "wb")
308 write_wav_header(f, fmt, codec_private_data, data_size)
310 dest_file = open(dest_file_name, "rb")
311 f.write(dest_file.read())
316 print("Stream %d, actual data size: %d\n" % (stream_index, data_size))
319 def calc_tracks_delay(manifest, stream1_index, stream2_index):
320 streams = manifest.findall('.//StreamIndex')
322 s1 = streams[stream1_index]
323 s2 = streams[stream2_index]
325 if "TimeScale" not in s1 or "TimeScale" not in s2:
328 s1_start_chunk = s1.find("c")
329 s2_start_chunk = s2.find("c")
331 if "t" not in s1_start_chunk.attrib \
332 or "t" not in s2_start_chunk.attrib:
335 s1_start_time = int(s1_start_chunk.attrib['t'])
336 s2_start_time = int(s2_start_chunk.attrib['t'])
338 s1_timescale = float(s1.attrib['TimeScale'])
339 s2_timescale = float(s2.attrib['TimeScale'])
341 # calc difference in seconds
342 delay = s2_start_time / s2_timescale - \
343 s1_start_time / s1_timescale
348 def get_clip_duration(manifest):
349 # TODO: use <Clip ClipBegin="" ClipEnd=""> if Duration is not available
350 duration = manifest.getroot().attrib['Duration']
352 return float(duration) / 10000000 # here is the default timescale
355 def smooth_download(url, manifest, dest_dir,
356 video_stream_index=0, audio_stream_index=1,
357 video_quality_level=0, audio_quality_level=0,
358 chunks_dir=None, download=True,
359 out_video_file='_video.vc1', out_audio_file='_audio.raw'):
361 if chunks_dir is None:
362 chunks_dir = dest_dir
365 download_chunks(url, manifest, video_stream_index,
366 video_quality_level, chunks_dir)
367 download_chunks(url, manifest, audio_stream_index,
368 audio_quality_level, chunks_dir)
370 dest_video = os.path.join(dest_dir, out_video_file)
371 dest_audio = os.path.join(dest_dir, out_audio_file)
373 rebuild_stream(manifest, video_stream_index, video_quality_level,
374 chunks_dir, dest_video)
375 rebuild_stream(manifest, audio_stream_index, audio_quality_level,
376 chunks_dir, dest_audio, dest_audio + '.wav')
378 # duration = get_clip_duration(manifest)
380 delay = calc_tracks_delay(manifest, video_stream_index,
383 # optionally encode audio to vorbis:
384 # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
385 mux_command = ("ffmpeg -i %s \\\n" +
386 " -itsoffset %f -async 1 -i %s \\\n" +
387 " -vcodec copy -acodec copy ffout.mkv") % \
388 (dest_video, delay, dest_audio + '.wav')
393 def options_parser():
394 version = "%%prog %s" % __version__
395 usage = "usage: %prog [options] <manifest URL or file>"
396 parser = OptionParser(usage=usage, version=version,
397 description=__description__, epilog=__author_info__)
398 parser.add_option("-i", "--info",
399 action="store_true", dest="info_only",
400 default=False, help="print Manifest info and exit")
401 parser.add_option("-m", "--manifest-only",
402 action="store_true", dest="manifest_only",
403 default=False, help="download Manifest file and exit")
404 parser.add_option("-n", "--no-download",
405 action="store_false", dest="download",
406 default=True, help="disable downloading chunks")
407 parser.add_option("-s", "--sync-delay",
408 action="store_true", dest="sync_delay",
409 default=False, help="show the sync delay between the given streams and exit")
410 parser.add_option("-d", "--dest-dir", metavar="<dir>",
411 dest="dest_dir", default=tempfile.gettempdir(),
412 help="destination directory")
413 parser.add_option("-c", "--chunks-dir", metavar="<dir>",
414 dest="chunks_dir", default=None,
415 help="directory containing chunks, if different from destination dir")
416 parser.add_option("-v", "--video-stream", metavar="<n>",
417 type="int", dest="video_stream_index", default=0,
418 help="index of the video stream")
419 parser.add_option("-a", "--audio-stream", metavar="<n>",
420 type="int", dest="audio_stream_index", default=1,
421 help="index of the audio stream")
422 parser.add_option("-q", "--video-quality", metavar="<n>",
423 type="int", dest="video_quality_level", default=0,
424 help="index of the video quality level")
425 parser.add_option("-Q", "--audio-quality", metavar="<n>",
426 type="int", dest="audio_quality_level", default=0,
427 help="index of the audio quality level")
433 parser = options_parser()
434 (options, args) = parser.parse_args()
440 if not os.path.exists(options.dest_dir):
441 os.mkdir(options.dest_dir, 0o755)
444 manifest, url = get_manifest(url, options.dest_dir)
446 if options.manifest_only:
449 if options.sync_delay:
450 print(calc_tracks_delay(manifest,
451 options.video_stream_index,
452 options.audio_stream_index))
455 if options.info_only:
456 print_manifest_info(manifest)
459 print_manifest_info(manifest)
461 smooth_download(url, manifest, options.dest_dir,
462 options.video_stream_index, options.audio_stream_index,
463 options.video_quality_level, options.audio_quality_level,
464 options.chunks_dir, options.download)
467 if __name__ == "__main__":