# basically, write a proper implementation of manifest parsing and chunk
# downloading
-
-__description = "Download videos served using Smooth Streaming technology"
-__version = "0.x"
-__author_info = "Written by Antonio Ospite http://ao2.it"
-
import os
+import re
import sys
import xml.etree.ElementTree as etree
import urllib2
import struct
import tempfile
from optparse import OptionParser
+from urlparse import urlparse, urlunparse
+
+__description__ = "Download videos served using Smooth Streaming technology"
+__version__ = "0.x"
+__author_info__ = "Written by Antonio Ospite http://ao2.it"
def get_chunk_data(data):
data_start = moof_size + 4 + len('mdat')
data_size = mdat_size - 4 - len('mdat')
- #print len(data[data_start:]), \
+ # print len(data[data_start:]), \
# len(data[data_start:data_start + data_size]), data_size
- assert(len(data[data_start:]) == data_size)
+ assert len(data[data_start:]) == data_size
return data[data_start:data_start + data_size]
def hexstring_to_bytes(hex_string):
res = ""
for i in range(0, len(hex_string), 2):
- res += chr(int(hex_string[i:i + 2], 16))
+ res += chr(int(hex_string[i:i + 2], 16))
return res
out_file.write(struct.pack('<L', data_len))
-def get_manifest(base_url, dest_dir=tempfile.gettempdir(),
- manifest_file='Manifest'):
- """Returns the manifest and the new URL if this is changed"""
+def download_file(src_url, dest_file, mode):
+ try:
+ response = urllib2.urlopen(src_url)
+ data = response.read()
+ except urllib2.HTTPError:
+ sys.stderr.write("Error while dowloading URL: %s" % src_url)
+ raise
- if os.path.exists(dest_dir) == False:
- os.mkdir(dest_dir, 0755)
+ if dest_file:
+ f = open(dest_file, mode)
+ f.write(data)
+ f.close()
- if base_url.startswith('http://'):
+ return data
- manifest_url = base_url
- if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
- manifest_url += '/Manifest'
- response = urllib2.urlopen(manifest_url)
- data = response.read()
+def get_manifest(url, dest_dir):
+ """Returns the manifest element and the base content URL"""
- manifest_path = os.path.join(dest_dir, manifest_file)
- f = open(manifest_path, "w")
- f.write(data)
- f.close()
+ # Remove the querystring if present
+ manifest_url = urlunparse(urlparse(url)._replace(query=''))
+
+ if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
+ manifest_url += '/Manifest'
+
+ if os.path.exists(url):
+ local_manifest_path = url
else:
- manifest_path = base_url
+ local_manifest_path = os.path.join(dest_dir, 'Manifest')
+ download_file(manifest_url, local_manifest_path, "w")
- manifest = etree.parse(manifest_path)
+ manifest = etree.parse(local_manifest_path)
version = manifest.getroot().attrib['MajorVersion']
if version != "2":
raise Exception('Only Smooth Streaming version 2 supported')
- try:
- # if some intermediate client Manifest is used, like in Rai Replay
- clip = manifest.find("Clip")
- actual_manifest_url = clip.attrib["Url"]
- base_url = actual_manifest_url.lower().replace("/manifest", "")
- except:
- pass
+ # if some intermediate client Manifest is used, like in Rai Replay
+ # then get the final manifest
+ clip = manifest.find("Clip")
+ if clip is not None and "Url" in clip.attrib:
+ tmp_manifest_url = clip.attrib["Url"]
+ try:
+ tmp_manifest = download_file(tmp_manifest_url, None, None)
+ # set the new values only if the dowload succeded
+ manifest_url = tmp_manifest_url
+ manifest = tmp_manifest
+ except urllib2.HTTPError:
+ pass
+
+ manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
+ base_url = manifest_pattern.sub("", manifest_url)
return (manifest, base_url)
def print_manifest_info(manifest):
- streams = manifest.findall('//StreamIndex')
+ streams = manifest.findall('.//StreamIndex')
for i, s in enumerate(streams):
stream_type = s.attrib["Type"]
- url = s.attrib["Url"]
print "Stream: %s Type: %s" % (i, stream_type)
channels = q.attrib["Channels"]
sampling_rate = q.attrib["SamplingRate"]
bits_per_sample = q.attrib["BitsPerSample"]
- print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % (i, fourcc,
- sampling_rate, bits_per_sample, channels, bitrate)
+ print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % \
+ (i, fourcc, sampling_rate, bits_per_sample, channels,
+ bitrate)
print
-def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
-
- if os.path.exists(dest_dir) == False:
- os.mkdir(dest_dir, 0755)
-
- stream = manifest.findall('//StreamIndex')[stream_index]
-
+def get_chunk_quality_string(stream, quality_level):
quality = stream.findall("QualityLevel")[quality_level]
bitrate = quality.attrib["Bitrate"]
+ quality_attributes = quality.findall("CustomAttributes/Attribute")
+ custom_attributes = ""
+ for i in quality_attributes:
+ custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
+ custom_attributes = custom_attributes.rstrip(',')
+
# Assume URLs are in this form:
# Url="QualityLevels({bitrate})/Fragments(video={start time})"
+ # or
+ # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
url = stream.attrib["Url"]
chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
+ chunks_quality = chunks_quality.replace("{CustomAttributes}",
+ custom_attributes)
+
+ return chunks_quality
+
+
+def get_chunk_name_string(stream, chunk_time):
+ url = stream.attrib["Url"]
+ chunk_name = url.split('/')[1].replace("{start time}", str(chunk_time))
+
+ return chunk_name
+
+
+def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
+ stream = manifest.findall('.//StreamIndex')[stream_index]
+
+ chunks_quality = get_chunk_quality_string(stream, quality_level)
+
chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
- if os.path.exists(chunks_dest_dir) == False:
+ if not os.path.exists(chunks_dest_dir):
os.mkdir(chunks_dest_dir, 0755)
chunks = stream.findall("c")
print "\nDownloading Stream %d" % stream_index
print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
sys.stdout.flush()
- for i, c in enumerate(chunks):
- t = c.attrib["t"]
- chunk_name = url.split('/')[1].replace("{start time}", t)
- chunk_file = os.path.join(dest_dir, chunks_quality, chunk_name)
+ stream_duration = 0
+ for i, chunk in enumerate(chunks):
- if os.path.exists(chunk_file) == False:
- chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
- response = urllib2.urlopen(chunk_url)
- data = response.read()
+ if "t" in chunk.attrib:
+ chunk_time = chunk.attrib["t"]
+ elif "d" in chunk.attrib:
+ chunk_time = stream_duration
+ stream_duration = chunk_time + int(chunk.attrib["d"])
- f = open(chunk_file, "wb")
- f.write(data)
- f.close()
+ chunk_name = get_chunk_name_string(stream, chunk_time)
+ chunk_file = os.path.join(dest_dir, chunks_quality, chunk_name)
+
+ if not os.path.exists(chunk_file):
+ chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
+ data = download_file(chunk_url, chunk_file, "wb")
else:
f = open(chunk_file, "rb")
data = f.read()
def rebuild_stream(manifest, stream_index, quality_level, src_dir,
- dest_file_name, final_dest_file=None):
+ dest_file_name, final_dest_file=None):
- if final_dest_file == None:
+ if final_dest_file is None:
final_dest_file = dest_file_name
- stream = manifest.findall('//StreamIndex')[stream_index]
-
- quality = stream.findall("QualityLevel")[quality_level]
- bitrate = quality.attrib["Bitrate"]
+ stream = manifest.findall('.//StreamIndex')[stream_index]
- # Assume URLs are in this form:
- # Url="QualityLevels({bitrate})/Fragments(video={start time})"
- url = stream.attrib["Url"]
+ chunks_quality = get_chunk_quality_string(stream, quality_level)
- chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
chunks_src_dir = os.path.join(src_dir, chunks_quality)
dest_file = open(dest_file_name, "wb")
print "\nRebuilding Stream %d" % stream_index
print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
sys.stdout.flush()
- for i, c in enumerate(chunks):
- t = c.attrib["t"]
- chunk_name = url.split('/')[1].replace("{start time}", t)
+ stream_duration = 0
+ for i, chunk in enumerate(chunks):
+
+ if "t" in chunk.attrib:
+ chunk_time = chunk.attrib["t"]
+ elif "d" in chunk.attrib:
+ chunk_time = stream_duration
+ stream_duration = chunk_time + int(chunk.attrib["d"])
+
+ chunk_name = get_chunk_name_string(stream, chunk_time)
chunk_file = os.path.join(chunks_src_dir, chunk_name)
f = open(chunk_file, "rb")
# Add a nice WAV header
if stream.attrib['Type'] == "audio":
+ quality = stream.findall("QualityLevel")[quality_level]
codec_private_data = quality.attrib['CodecPrivateData']
fmt = {}
def calc_tracks_delay(manifest, stream1_index, stream2_index):
- streams = manifest.findall('//StreamIndex')
+ streams = manifest.findall('.//StreamIndex')
s1 = streams[stream1_index]
s2 = streams[stream2_index]
+ if "TimeScale" not in s1 or "TimeScale" not in s2:
+ return 0
+
s1_start_chunk = s1.find("c")
s2_start_chunk = s2.find("c")
+ if "t" not in s1_start_chunk.attrib \
+ or "t" not in s2_start_chunk.attrib:
+ return 0
+
s1_start_time = int(s1_start_chunk.attrib['t'])
s2_start_time = int(s2_start_chunk.attrib['t'])
# calc difference in seconds
delay = s2_start_time / s2_timescale - \
- s1_start_time / s1_timescale
+ s1_start_time / s1_timescale
return delay
return float(duration) / 10000000 # here is the default timescale
-def smooth_download(url, manifest, dest_dir=tempfile.gettempdir(),
- video_stream_index=0, audio_stream_index=1,
- video_quality_level=0, audio_quality_level=0,
- chunks_dir=None, download=True,
- out_video_file='_video.vc1', out_audio_file='_audio.raw'):
+def smooth_download(url, manifest, dest_dir,
+ video_stream_index=0, audio_stream_index=1,
+ video_quality_level=0, audio_quality_level=0,
+ chunks_dir=None, download=True,
+ out_video_file='_video.vc1', out_audio_file='_audio.raw'):
- if chunks_dir == None:
- chunks_dir = dest_dir
+ if chunks_dir is None:
+ chunks_dir = dest_dir
- if download:
- download_chunks(url, manifest, video_stream_index,
- video_quality_level, chunks_dir)
- download_chunks(url, manifest, audio_stream_index,
- audio_quality_level, chunks_dir)
+ if download:
+ download_chunks(url, manifest, video_stream_index,
+ video_quality_level, chunks_dir)
+ download_chunks(url, manifest, audio_stream_index,
+ audio_quality_level, chunks_dir)
- dest_video = os.path.join(dest_dir, out_video_file)
- dest_audio = os.path.join(dest_dir, out_audio_file)
+ dest_video = os.path.join(dest_dir, out_video_file)
+ dest_audio = os.path.join(dest_dir, out_audio_file)
- rebuild_stream(manifest, video_stream_index, video_quality_level,
- chunks_dir, dest_video)
- rebuild_stream(manifest, audio_stream_index, audio_quality_level,
- chunks_dir, dest_audio, dest_audio + '.wav')
+ rebuild_stream(manifest, video_stream_index, video_quality_level,
+ chunks_dir, dest_video)
+ rebuild_stream(manifest, audio_stream_index, audio_quality_level,
+ chunks_dir, dest_audio, dest_audio + '.wav')
- #duration = get_clip_duration(manifest)
+ # duration = get_clip_duration(manifest)
- delay = calc_tracks_delay(manifest, video_stream_index,
- audio_stream_index)
+ delay = calc_tracks_delay(manifest, video_stream_index,
+ audio_stream_index)
- # optionally encode audio to vorbis:
- # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
- mux_command = ("ffmpeg -i %s \\\n" +
- " -itsoffset %f -async 1 -i %s \\\n" +
- " -vcodec copy -acodec copy ffout.mkv") % \
- (dest_video, delay, dest_audio + '.wav')
+ # optionally encode audio to vorbis:
+ # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
+ mux_command = ("ffmpeg -i %s \\\n" +
+ " -itsoffset %f -async 1 -i %s \\\n" +
+ " -vcodec copy -acodec copy ffout.mkv") % \
+ (dest_video, delay, dest_audio + '.wav')
- print mux_command
+ print mux_command
def options_parser():
- version = "%%prog %s" % __version
+ version = "%%prog %s" % __version__
usage = "usage: %prog [options] <manifest URL or file>"
parser = OptionParser(usage=usage, version=version,
- description=__description, epilog=__author_info)
+ description=__description__, epilog=__author_info__)
parser.add_option("-i", "--info",
action="store_true", dest="info_only",
default=False, help="print Manifest info and exit")
parser.add_option("-c", "--chunks-dir", metavar="<dir>",
dest="chunks_dir", default=None,
help="directory containing chunks, if different from destination dir")
- parser.add_option("-v", "--video-stream", metavar="<n>",
+ parser.add_option("-v", "--video-stream", metavar="<n>",
type="int", dest="video_stream_index", default=0,
help="index of the video stream")
parser.add_option("-a", "--audio-stream", metavar="<n>",
return parser
-if __name__ == "__main__":
-
+def main():
parser = options_parser()
(options, args) = parser.parse_args()
parser.print_help()
parser.exit(1)
+ if not os.path.exists(options.dest_dir):
+ os.mkdir(options.dest_dir, 0755)
+
url = args[0]
manifest, url = get_manifest(url, options.dest_dir)
if options.sync_delay:
print calc_tracks_delay(manifest,
- options.video_stream_index,
- options.audio_stream_index)
+ options.video_stream_index,
+ options.audio_stream_index)
parser.exit(0)
if options.info_only:
print_manifest_info(manifest)
smooth_download(url, manifest, options.dest_dir,
- options.video_stream_index, options.audio_stream_index,
- options.video_quality_level, options.audio_quality_level,
- options.chunks_dir, options.download)
+ options.video_stream_index, options.audio_stream_index,
+ options.video_quality_level, options.audio_quality_level,
+ options.chunks_dir, options.download)
+
+
+if __name__ == "__main__":
+ main()