smooth-dl.py: support downloading from a local file in download_file()
[smooth-dl.git] / smooth-dl.py
index 9530392..1488e2c 100755 (executable)
 # basically, write a proper implementation of manifest parsing and chunk
 # downloading
 
 # basically, write a proper implementation of manifest parsing and chunk
 # downloading
 
-
-__description = "Download videos served using Smooth Streaming technology"
-__version = "0.x"
-__author_info = "Written by Antonio Ospite http://ao2.it"
-
 import os
 import re
 import sys
 import os
 import re
 import sys
@@ -45,6 +40,10 @@ import tempfile
 from optparse import OptionParser
 from urlparse import urlparse, urlunparse
 
 from optparse import OptionParser
 from urlparse import urlparse, urlunparse
 
+__description__ = "Download videos served using Smooth Streaming technology"
+__version__ = "0.x"
+__author_info__ = "Written by Antonio Ospite http://ao2.it"
+
 
 def get_chunk_data(data):
 
 
 def get_chunk_data(data):
 
@@ -54,10 +53,10 @@ def get_chunk_data(data):
     data_start = moof_size + 4 + len('mdat')
     data_size = mdat_size - 4 - len('mdat')
 
     data_start = moof_size + 4 + len('mdat')
     data_size = mdat_size - 4 - len('mdat')
 
-    #print len(data[data_start:]), \
+    # print len(data[data_start:]), \
     #        len(data[data_start:data_start + data_size]), data_size
 
     #        len(data[data_start:data_start + data_size]), data_size
 
-    assert(len(data[data_start:]) == data_size)
+    assert len(data[data_start:]) == data_size
 
     return data[data_start:data_start + data_size]
 
 
     return data[data_start:data_start + data_size]
 
@@ -65,7 +64,7 @@ def get_chunk_data(data):
 def hexstring_to_bytes(hex_string):
     res = ""
     for i in range(0, len(hex_string), 2):
 def hexstring_to_bytes(hex_string):
     res = ""
     for i in range(0, len(hex_string), 2):
-            res += chr(int(hex_string[i:i + 2], 16))
+        res += chr(int(hex_string[i:i + 2], 16))
 
     return res
 
 
     return res
 
@@ -93,13 +92,20 @@ def write_wav_header(out_file, fmt, codec_private_data, data_len):
     out_file.write("data")
     out_file.write(struct.pack('<L', data_len))
 
     out_file.write("data")
     out_file.write(struct.pack('<L', data_len))
 
+
 def download_file(src_url, dest_file, mode):
 def download_file(src_url, dest_file, mode):
-    try:
-        response = urllib2.urlopen(src_url)
-        data = response.read()
-    except urllib2.HTTPError as e:
-        sys.stderr.write("Error while dowloading URL: %s" % src_url)
-        raise
+
+    if os.path.exists(src_url):
+        f = open(src_url, "rb")
+        data = f.read()
+        f.close()
+    else:
+        try:
+            response = urllib2.urlopen(src_url)
+            data = response.read()
+        except urllib2.HTTPError:
+            sys.stderr.write("Error while dowloading URL: %s" % src_url)
+            raise
 
     if dest_file:
         f = open(dest_file, mode)
 
     if dest_file:
         f = open(dest_file, mode)
@@ -108,11 +114,9 @@ def download_file(src_url, dest_file, mode):
 
     return data
 
 
     return data
 
-def get_manifest(url, dest_dir=tempfile.gettempdir()):
-    """Returns the manifest and the new URL if this is changed"""
 
 
-    if os.path.exists(dest_dir) == False:
-        os.mkdir(dest_dir, 0755)
+def get_manifest(url, dest_dir):
+    """Returns the manifest element and the base content URL"""
 
     # Remove the querystring if present
     manifest_url = urlunparse(urlparse(url)._replace(query=''))
 
     # Remove the querystring if present
     manifest_url = urlunparse(urlparse(url)._replace(query=''))
@@ -120,11 +124,8 @@ def get_manifest(url, dest_dir=tempfile.gettempdir()):
     if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
         manifest_url += '/Manifest'
 
     if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
         manifest_url += '/Manifest'
 
-    if manifest_url.startswith('http://'):
-        local_manifest_path = os.path.join(dest_dir, 'Manifest')
-        download_file(manifest_url, local_manifest_path, "w")
-    else:
-        local_manifest_path = url
+    local_manifest_path = os.path.join(dest_dir, 'Manifest')
+    download_file(manifest_url, local_manifest_path, "w")
 
     manifest = etree.parse(local_manifest_path)
 
 
     manifest = etree.parse(local_manifest_path)
 
@@ -132,12 +133,18 @@ def get_manifest(url, dest_dir=tempfile.gettempdir()):
     if version != "2":
         raise Exception('Only Smooth Streaming version 2 supported')
 
     if version != "2":
         raise Exception('Only Smooth Streaming version 2 supported')
 
-    try:
-        # if some intermediate client Manifest is used, like in Rai Replay
-        clip = manifest.find("Clip")
-        manifest_url = clip.attrib["Url"]
-    except:
-        pass
+    # if some intermediate client Manifest is used, like in Rai Replay
+    # then get the final manifest
+    clip = manifest.find("Clip")
+    if clip is not None and "Url" in clip.attrib:
+        tmp_manifest_url = clip.attrib["Url"]
+        try:
+            tmp_manifest = download_file(tmp_manifest_url, None, None)
+            # set the new values only if the dowload succeded
+            manifest_url = tmp_manifest_url
+            manifest = tmp_manifest
+        except urllib2.HTTPError:
+            pass
 
     manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
     base_url = manifest_pattern.sub("", manifest_url)
 
     manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
     base_url = manifest_pattern.sub("", manifest_url)
@@ -151,7 +158,6 @@ def print_manifest_info(manifest):
 
     for i, s in enumerate(streams):
         stream_type = s.attrib["Type"]
 
     for i, s in enumerate(streams):
         stream_type = s.attrib["Type"]
-        url = s.attrib["Url"]
 
         print "Stream: %s Type: %s" % (i, stream_type)
 
 
         print "Stream: %s Type: %s" % (i, stream_type)
 
@@ -168,8 +174,9 @@ def print_manifest_info(manifest):
                 channels = q.attrib["Channels"]
                 sampling_rate = q.attrib["SamplingRate"]
                 bits_per_sample = q.attrib["BitsPerSample"]
                 channels = q.attrib["Channels"]
                 sampling_rate = q.attrib["SamplingRate"]
                 bits_per_sample = q.attrib["BitsPerSample"]
-                print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % (i, fourcc,
-                        sampling_rate, bits_per_sample, channels, bitrate)
+                print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % \
+                    (i, fourcc, sampling_rate, bits_per_sample, channels,
+                     bitrate)
 
     print
 
 
     print
 
@@ -191,30 +198,26 @@ def get_chunk_quality_string(stream, quality_level):
     url = stream.attrib["Url"]
 
     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
     url = stream.attrib["Url"]
 
     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
-    chunks_quality = chunks_quality.replace("{CustomAttributes}", custom_attributes)
+    chunks_quality = chunks_quality.replace("{CustomAttributes}",
+                                            custom_attributes)
 
     return chunks_quality
 
 
 
     return chunks_quality
 
 
-def get_chunk_name_string(stream, chunk):
-    t = chunk.attrib["t"]
+def get_chunk_name_string(stream, chunk_time):
     url = stream.attrib["Url"]
     url = stream.attrib["Url"]
-    chunk_name = url.split('/')[1].replace("{start time}", t)
+    chunk_name = url.split('/')[1].replace("{start time}", str(chunk_time))
 
     return chunk_name
 
 
 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
 
     return chunk_name
 
 
 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
-
-    if os.path.exists(dest_dir) == False:
-        os.mkdir(dest_dir, 0755)
-
     stream = manifest.findall('.//StreamIndex')[stream_index]
 
     chunks_quality = get_chunk_quality_string(stream, quality_level)
 
     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
     stream = manifest.findall('.//StreamIndex')[stream_index]
 
     chunks_quality = get_chunk_quality_string(stream, quality_level)
 
     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
-    if os.path.exists(chunks_dest_dir) == False:
+    if not os.path.exists(chunks_dest_dir):
         os.mkdir(chunks_dest_dir, 0755)
 
     chunks = stream.findall("c")
         os.mkdir(chunks_dest_dir, 0755)
 
     chunks = stream.findall("c")
@@ -222,12 +225,20 @@ def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
     print "\nDownloading Stream %d" % stream_index
     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
     sys.stdout.flush()
     print "\nDownloading Stream %d" % stream_index
     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
     sys.stdout.flush()
-    for i, c in enumerate(chunks):
 
 
-        chunk_name = get_chunk_name_string(stream, c)
-        chunk_file = os.path.join(dest_dir,  chunks_quality, chunk_name)
+    stream_duration = 0
+    for i, chunk in enumerate(chunks):
+
+        if "t" in chunk.attrib:
+            chunk_time = chunk.attrib["t"]
+        elif "d" in chunk.attrib:
+            chunk_time = stream_duration
+            stream_duration = chunk_time + int(chunk.attrib["d"])
 
 
-        if os.path.exists(chunk_file) == False:
+        chunk_name = get_chunk_name_string(stream, chunk_time)
+        chunk_file = os.path.join(dest_dir, chunks_quality, chunk_name)
+
+        if not os.path.exists(chunk_file):
             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
             data = download_file(chunk_url, chunk_file, "wb")
         else:
             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
             data = download_file(chunk_url, chunk_file, "wb")
         else:
@@ -242,9 +253,9 @@ def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
 
 
 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
 
 
 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
-        dest_file_name, final_dest_file=None):
+                   dest_file_name, final_dest_file=None):
 
 
-    if final_dest_file == None:
+    if final_dest_file is None:
         final_dest_file = dest_file_name
 
     stream = manifest.findall('.//StreamIndex')[stream_index]
         final_dest_file = dest_file_name
 
     stream = manifest.findall('.//StreamIndex')[stream_index]
@@ -260,9 +271,17 @@ def rebuild_stream(manifest, stream_index, quality_level, src_dir,
     print "\nRebuilding Stream %d" % stream_index
     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
     sys.stdout.flush()
     print "\nRebuilding Stream %d" % stream_index
     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
     sys.stdout.flush()
-    for i, c in enumerate(chunks):
 
 
-        chunk_name = get_chunk_name_string(stream, c)
+    stream_duration = 0
+    for i, chunk in enumerate(chunks):
+
+        if "t" in chunk.attrib:
+            chunk_time = chunk.attrib["t"]
+        elif "d" in chunk.attrib:
+            chunk_time = stream_duration
+            stream_duration = chunk_time + int(chunk.attrib["d"])
+
+        chunk_name = get_chunk_name_string(stream, chunk_time)
         chunk_file = os.path.join(chunks_src_dir, chunk_name)
 
         f = open(chunk_file, "rb")
         chunk_file = os.path.join(chunks_src_dir, chunk_name)
 
         f = open(chunk_file, "rb")
@@ -305,9 +324,16 @@ def calc_tracks_delay(manifest, stream1_index, stream2_index):
     s1 = streams[stream1_index]
     s2 = streams[stream2_index]
 
     s1 = streams[stream1_index]
     s2 = streams[stream2_index]
 
+    if "TimeScale" not in s1 or "TimeScale" not in s2:
+        return 0
+
     s1_start_chunk = s1.find("c")
     s2_start_chunk = s2.find("c")
 
     s1_start_chunk = s1.find("c")
     s2_start_chunk = s2.find("c")
 
+    if "t" not in s1_start_chunk.attrib \
+       or "t" not in s2_start_chunk.attrib:
+        return 0
+
     s1_start_time = int(s1_start_chunk.attrib['t'])
     s2_start_time = int(s2_start_chunk.attrib['t'])
 
     s1_start_time = int(s1_start_chunk.attrib['t'])
     s2_start_time = int(s2_start_chunk.attrib['t'])
 
@@ -316,7 +342,7 @@ def calc_tracks_delay(manifest, stream1_index, stream2_index):
 
     # calc difference in seconds
     delay = s2_start_time / s2_timescale - \
 
     # calc difference in seconds
     delay = s2_start_time / s2_timescale - \
-            s1_start_time / s1_timescale
+        s1_start_time / s1_timescale
 
     return delay
 
 
     return delay
 
@@ -328,49 +354,49 @@ def get_clip_duration(manifest):
     return float(duration) / 10000000  # here is the default timescale
 
 
     return float(duration) / 10000000  # here is the default timescale
 
 
-def smooth_download(url, manifest, dest_dir=tempfile.gettempdir(),
-        video_stream_index=0, audio_stream_index=1,
-        video_quality_level=0, audio_quality_level=0,
-        chunks_dir=None, download=True,
-        out_video_file='_video.vc1', out_audio_file='_audio.raw'):
+def smooth_download(url, manifest, dest_dir,
+                    video_stream_index=0, audio_stream_index=1,
+                    video_quality_level=0, audio_quality_level=0,
+                    chunks_dir=None, download=True,
+                    out_video_file='_video.vc1', out_audio_file='_audio.raw'):
 
 
-        if chunks_dir == None:
-            chunks_dir = dest_dir
+    if chunks_dir is None:
+        chunks_dir = dest_dir
 
 
-        if download:
-            download_chunks(url, manifest, video_stream_index,
-                    video_quality_level, chunks_dir)
-            download_chunks(url, manifest, audio_stream_index,
-                    audio_quality_level, chunks_dir)
+    if download:
+        download_chunks(url, manifest, video_stream_index,
+                        video_quality_level, chunks_dir)
+        download_chunks(url, manifest, audio_stream_index,
+                        audio_quality_level, chunks_dir)
 
 
-        dest_video = os.path.join(dest_dir, out_video_file)
-        dest_audio = os.path.join(dest_dir, out_audio_file)
+    dest_video = os.path.join(dest_dir, out_video_file)
+    dest_audio = os.path.join(dest_dir, out_audio_file)
 
 
-        rebuild_stream(manifest, video_stream_index, video_quality_level,
-                chunks_dir, dest_video)
-        rebuild_stream(manifest, audio_stream_index, audio_quality_level,
-                chunks_dir, dest_audio, dest_audio + '.wav')
+    rebuild_stream(manifest, video_stream_index, video_quality_level,
+                   chunks_dir, dest_video)
+    rebuild_stream(manifest, audio_stream_index, audio_quality_level,
+                   chunks_dir, dest_audio, dest_audio + '.wav')
 
 
-        #duration = get_clip_duration(manifest)
+    duration = get_clip_duration(manifest)
 
 
-        delay = calc_tracks_delay(manifest, video_stream_index,
-                audio_stream_index)
+    delay = calc_tracks_delay(manifest, video_stream_index,
+                              audio_stream_index)
 
 
-        # optionally encode audio to vorbis:
-        # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
-        mux_command = ("ffmpeg -i %s \\\n" +
-                      "  -itsoffset %f -async 1 -i %s \\\n" +
-                      "  -vcodec copy -acodec copy ffout.mkv") % \
-                      (dest_video, delay, dest_audio + '.wav')
+    # optionally encode audio to vorbis:
+    # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
+    mux_command = ("ffmpeg -i %s \\\n" +
+                   "  -itsoffset %f -async 1 -i %s \\\n" +
+                   "  -vcodec copy -acodec copy ffout.mkv") % \
+        (dest_video, delay, dest_audio + '.wav')
 
 
-        print mux_command
+    print mux_command
 
 
 def options_parser():
 
 
 def options_parser():
-    version = "%%prog %s" % __version
+    version = "%%prog %s" % __version__
     usage = "usage: %prog [options] <manifest URL or file>"
     parser = OptionParser(usage=usage, version=version,
     usage = "usage: %prog [options] <manifest URL or file>"
     parser = OptionParser(usage=usage, version=version,
-            description=__description, epilog=__author_info)
+                          description=__description__, epilog=__author_info__)
     parser.add_option("-i", "--info",
                       action="store_true", dest="info_only",
                       default=False, help="print Manifest info and exit")
     parser.add_option("-i", "--info",
                       action="store_true", dest="info_only",
                       default=False, help="print Manifest info and exit")
@@ -389,7 +415,7 @@ def options_parser():
     parser.add_option("-c", "--chunks-dir", metavar="<dir>",
                       dest="chunks_dir", default=None,
                       help="directory containing chunks, if different from destination dir")
     parser.add_option("-c", "--chunks-dir", metavar="<dir>",
                       dest="chunks_dir", default=None,
                       help="directory containing chunks, if different from destination dir")
-    parser.add_option("-v", "--video-stream",  metavar="<n>",
+    parser.add_option("-v", "--video-stream", metavar="<n>",
                       type="int", dest="video_stream_index", default=0,
                       help="index of the video stream")
     parser.add_option("-a", "--audio-stream", metavar="<n>",
                       type="int", dest="video_stream_index", default=0,
                       help="index of the video stream")
     parser.add_option("-a", "--audio-stream", metavar="<n>",
@@ -405,8 +431,7 @@ def options_parser():
     return parser
 
 
     return parser
 
 
-if __name__ == "__main__":
-
+def main():
     parser = options_parser()
     (options, args) = parser.parse_args()
 
     parser = options_parser()
     (options, args) = parser.parse_args()
 
@@ -414,6 +439,9 @@ if __name__ == "__main__":
         parser.print_help()
         parser.exit(1)
 
         parser.print_help()
         parser.exit(1)
 
+    if not os.path.exists(options.dest_dir):
+        os.mkdir(options.dest_dir, 0755)
+
     url = args[0]
     manifest, url = get_manifest(url, options.dest_dir)
 
     url = args[0]
     manifest, url = get_manifest(url, options.dest_dir)
 
@@ -422,8 +450,8 @@ if __name__ == "__main__":
 
     if options.sync_delay:
         print calc_tracks_delay(manifest,
 
     if options.sync_delay:
         print calc_tracks_delay(manifest,
-                options.video_stream_index,
-                options.audio_stream_index)
+                                options.video_stream_index,
+                                options.audio_stream_index)
         parser.exit(0)
 
     if options.info_only:
         parser.exit(0)
 
     if options.info_only:
@@ -433,6 +461,10 @@ if __name__ == "__main__":
     print_manifest_info(manifest)
 
     smooth_download(url, manifest, options.dest_dir,
     print_manifest_info(manifest)
 
     smooth_download(url, manifest, options.dest_dir,
-            options.video_stream_index, options.audio_stream_index,
-            options.video_quality_level, options.audio_quality_level,
-            options.chunks_dir, options.download)
+                    options.video_stream_index, options.audio_stream_index,
+                    options.video_quality_level, options.audio_quality_level,
+                    options.chunks_dir, options.download)
+
+
+if __name__ == "__main__":
+    main()