smooth-dl.py: fix some pep8 issue
[smooth-dl.git] / smooth-dl.py
index 2baa009..aa57c6e 100755 (executable)
 # basically, write a proper implementation of manifest parsing and chunk
 # downloading
 
 # basically, write a proper implementation of manifest parsing and chunk
 # downloading
 
-
-__description = "Download videos served using Smooth Streaming technology"
-__version = "0.x"
-__author_info = "Written by Antonio Ospite http://ao2.it"
-
 import os
 import os
+import re
 import sys
 import xml.etree.ElementTree as etree
 import urllib2
 import struct
 import tempfile
 from optparse import OptionParser
 import sys
 import xml.etree.ElementTree as etree
 import urllib2
 import struct
 import tempfile
 from optparse import OptionParser
+from urlparse import urlparse, urlunparse
+
+__description = "Download videos served using Smooth Streaming technology"
+__version = "0.x"
+__author_info = "Written by Antonio Ospite http://ao2.it"
 
 
 def get_chunk_data(data):
 
 
 def get_chunk_data(data):
@@ -52,7 +53,7 @@ def get_chunk_data(data):
     data_start = moof_size + 4 + len('mdat')
     data_size = mdat_size - 4 - len('mdat')
 
     data_start = moof_size + 4 + len('mdat')
     data_size = mdat_size - 4 - len('mdat')
 
-    #print len(data[data_start:]), \
+    # print len(data[data_start:]), \
     #        len(data[data_start:data_start + data_size]), data_size
 
     assert(len(data[data_start:]) == data_size)
     #        len(data[data_start:data_start + data_size]), data_size
 
     assert(len(data[data_start:]) == data_size)
@@ -92,30 +93,41 @@ def write_wav_header(out_file, fmt, codec_private_data, data_len):
     out_file.write(struct.pack('<L', data_len))
 
 
     out_file.write(struct.pack('<L', data_len))
 
 
-def get_manifest(base_url, dest_dir=tempfile.gettempdir(),
-        manifest_file='Manifest'):
+def download_file(src_url, dest_file, mode):
+    try:
+        response = urllib2.urlopen(src_url)
+        data = response.read()
+    except urllib2.HTTPError as e:
+        sys.stderr.write("Error while dowloading URL: %s" % src_url)
+        raise
+
+    if dest_file:
+        f = open(dest_file, mode)
+        f.write(data)
+        f.close()
+
+    return data
+
+
+def get_manifest(url, dest_dir=tempfile.gettempdir()):
     """Returns the manifest and the new URL if this is changed"""
 
     if os.path.exists(dest_dir) == False:
         os.mkdir(dest_dir, 0755)
 
     """Returns the manifest and the new URL if this is changed"""
 
     if os.path.exists(dest_dir) == False:
         os.mkdir(dest_dir, 0755)
 
-    if base_url.startswith('http://'):
+    # Remove the querystring if present
+    manifest_url = urlunparse(urlparse(url)._replace(query=''))
 
 
-        manifest_url = base_url
-        if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
-            manifest_url += '/Manifest'
+    if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
+        manifest_url += '/Manifest'
 
 
-        response = urllib2.urlopen(manifest_url)
-        data = response.read()
-
-        manifest_path = os.path.join(dest_dir, manifest_file)
-        f = open(manifest_path, "w")
-        f.write(data)
-        f.close()
+    if manifest_url.startswith('http://'):
+        local_manifest_path = os.path.join(dest_dir, 'Manifest')
+        download_file(manifest_url, local_manifest_path, "w")
     else:
     else:
-        manifest_path = base_url
+        local_manifest_path = url
 
 
-    manifest = etree.parse(manifest_path)
+    manifest = etree.parse(local_manifest_path)
 
     version = manifest.getroot().attrib['MajorVersion']
     if version != "2":
 
     version = manifest.getroot().attrib['MajorVersion']
     if version != "2":
@@ -124,11 +136,14 @@ def get_manifest(base_url, dest_dir=tempfile.gettempdir(),
     try:
         # if some intermediate client Manifest is used, like in Rai Replay
         clip = manifest.find("Clip")
     try:
         # if some intermediate client Manifest is used, like in Rai Replay
         clip = manifest.find("Clip")
-        actual_manifest_url = clip.attrib["Url"]
-        base_url = actual_manifest_url.lower().replace("/manifest", "")
-    except:
+        manifest_url = clip.attrib["Url"]
+        manifest = download_file(manifest_url, None, None)
+    except AttributeError:
         pass
 
         pass
 
+    manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
+    base_url = manifest_pattern.sub("", manifest_url)
+
     return (manifest, base_url)
 
 
     return (manifest, base_url)
 
 
@@ -155,27 +170,53 @@ def print_manifest_info(manifest):
                 channels = q.attrib["Channels"]
                 sampling_rate = q.attrib["SamplingRate"]
                 bits_per_sample = q.attrib["BitsPerSample"]
                 channels = q.attrib["Channels"]
                 sampling_rate = q.attrib["SamplingRate"]
                 bits_per_sample = q.attrib["BitsPerSample"]
-                print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % (i, fourcc,
-                        sampling_rate, bits_per_sample, channels, bitrate)
+                print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % \
+                    (i, fourcc, sampling_rate, bits_per_sample, channels,
+                     bitrate)
 
     print
 
 
 
     print
 
 
-def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
-
-    if os.path.exists(dest_dir) == False:
-        os.mkdir(dest_dir, 0755)
-
-    stream = manifest.findall('.//StreamIndex')[stream_index]
-
+def get_chunk_quality_string(stream, quality_level):
     quality = stream.findall("QualityLevel")[quality_level]
     bitrate = quality.attrib["Bitrate"]
 
     quality = stream.findall("QualityLevel")[quality_level]
     bitrate = quality.attrib["Bitrate"]
 
+    quality_attributes = quality.findall("CustomAttributes/Attribute")
+    custom_attributes = ""
+    for i in quality_attributes:
+        custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
+    custom_attributes = custom_attributes.rstrip(',')
+
     # Assume URLs are in this form:
     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
     # Assume URLs are in this form:
     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
+    # or
+    # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
     url = stream.attrib["Url"]
 
     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
     url = stream.attrib["Url"]
 
     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
+    chunks_quality = chunks_quality.replace("{CustomAttributes}",
+                                            custom_attributes)
+
+    return chunks_quality
+
+
+def get_chunk_name_string(stream, chunk):
+    t = chunk.attrib["t"]
+    url = stream.attrib["Url"]
+    chunk_name = url.split('/')[1].replace("{start time}", t)
+
+    return chunk_name
+
+
+def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
+
+    if os.path.exists(dest_dir) == False:
+        os.mkdir(dest_dir, 0755)
+
+    stream = manifest.findall('.//StreamIndex')[stream_index]
+
+    chunks_quality = get_chunk_quality_string(stream, quality_level)
+
     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
     if os.path.exists(chunks_dest_dir) == False:
         os.mkdir(chunks_dest_dir, 0755)
     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
     if os.path.exists(chunks_dest_dir) == False:
         os.mkdir(chunks_dest_dir, 0755)
@@ -186,19 +227,13 @@ def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
     sys.stdout.flush()
     for i, c in enumerate(chunks):
     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
     sys.stdout.flush()
     for i, c in enumerate(chunks):
-        t = c.attrib["t"]
 
 
-        chunk_name = url.split('/')[1].replace("{start time}", t)
+        chunk_name = get_chunk_name_string(stream, c)
         chunk_file = os.path.join(dest_dir,  chunks_quality, chunk_name)
 
         if os.path.exists(chunk_file) == False:
             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
         chunk_file = os.path.join(dest_dir,  chunks_quality, chunk_name)
 
         if os.path.exists(chunk_file) == False:
             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
-            response = urllib2.urlopen(chunk_url)
-            data = response.read()
-
-            f = open(chunk_file, "wb")
-            f.write(data)
-            f.close()
+            data = download_file(chunk_url, chunk_file, "wb")
         else:
             f = open(chunk_file, "rb")
             data = f.read()
         else:
             f = open(chunk_file, "rb")
             data = f.read()
@@ -211,21 +246,15 @@ def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
 
 
 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
 
 
 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
-        dest_file_name, final_dest_file=None):
+                   dest_file_name, final_dest_file=None):
 
 
-    if final_dest_file == None:
+    if final_dest_file is None:
         final_dest_file = dest_file_name
 
     stream = manifest.findall('.//StreamIndex')[stream_index]
 
         final_dest_file = dest_file_name
 
     stream = manifest.findall('.//StreamIndex')[stream_index]
 
-    quality = stream.findall("QualityLevel")[quality_level]
-    bitrate = quality.attrib["Bitrate"]
-
-    # Assume URLs are in this form:
-    # Url="QualityLevels({bitrate})/Fragments(video={start time})"
-    url = stream.attrib["Url"]
+    chunks_quality = get_chunk_quality_string(stream, quality_level)
 
 
-    chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
     chunks_src_dir = os.path.join(src_dir, chunks_quality)
 
     dest_file = open(dest_file_name, "wb")
     chunks_src_dir = os.path.join(src_dir, chunks_quality)
 
     dest_file = open(dest_file_name, "wb")
@@ -236,9 +265,8 @@ def rebuild_stream(manifest, stream_index, quality_level, src_dir,
     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
     sys.stdout.flush()
     for i, c in enumerate(chunks):
     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
     sys.stdout.flush()
     for i, c in enumerate(chunks):
-        t = c.attrib["t"]
 
 
-        chunk_name = url.split('/')[1].replace("{start time}", t)
+        chunk_name = get_chunk_name_string(stream, c)
         chunk_file = os.path.join(chunks_src_dir, chunk_name)
 
         f = open(chunk_file, "rb")
         chunk_file = os.path.join(chunks_src_dir, chunk_name)
 
         f = open(chunk_file, "rb")
@@ -251,6 +279,7 @@ def rebuild_stream(manifest, stream_index, quality_level, src_dir,
 
     # Add a nice WAV header
     if stream.attrib['Type'] == "audio":
 
     # Add a nice WAV header
     if stream.attrib['Type'] == "audio":
+        quality = stream.findall("QualityLevel")[quality_level]
         codec_private_data = quality.attrib['CodecPrivateData']
 
         fmt = {}
         codec_private_data = quality.attrib['CodecPrivateData']
 
         fmt = {}
@@ -291,7 +320,7 @@ def calc_tracks_delay(manifest, stream1_index, stream2_index):
 
     # calc difference in seconds
     delay = s2_start_time / s2_timescale - \
 
     # calc difference in seconds
     delay = s2_start_time / s2_timescale - \
-            s1_start_time / s1_timescale
+        s1_start_time / s1_timescale
 
     return delay
 
 
     return delay
 
@@ -304,39 +333,39 @@ def get_clip_duration(manifest):
 
 
 def smooth_download(url, manifest, dest_dir=tempfile.gettempdir(),
 
 
 def smooth_download(url, manifest, dest_dir=tempfile.gettempdir(),
-        video_stream_index=0, audio_stream_index=1,
-        video_quality_level=0, audio_quality_level=0,
-        chunks_dir=None, download=True,
-        out_video_file='_video.vc1', out_audio_file='_audio.raw'):
+                    video_stream_index=0, audio_stream_index=1,
+                    video_quality_level=0, audio_quality_level=0,
+                    chunks_dir=None, download=True,
+                    out_video_file='_video.vc1', out_audio_file='_audio.raw'):
 
 
-        if chunks_dir == None:
+        if chunks_dir is None:
             chunks_dir = dest_dir
 
         if download:
             download_chunks(url, manifest, video_stream_index,
             chunks_dir = dest_dir
 
         if download:
             download_chunks(url, manifest, video_stream_index,
-                    video_quality_level, chunks_dir)
+                            video_quality_level, chunks_dir)
             download_chunks(url, manifest, audio_stream_index,
             download_chunks(url, manifest, audio_stream_index,
-                    audio_quality_level, chunks_dir)
+                            audio_quality_level, chunks_dir)
 
         dest_video = os.path.join(dest_dir, out_video_file)
         dest_audio = os.path.join(dest_dir, out_audio_file)
 
         rebuild_stream(manifest, video_stream_index, video_quality_level,
 
         dest_video = os.path.join(dest_dir, out_video_file)
         dest_audio = os.path.join(dest_dir, out_audio_file)
 
         rebuild_stream(manifest, video_stream_index, video_quality_level,
-                chunks_dir, dest_video)
+                       chunks_dir, dest_video)
         rebuild_stream(manifest, audio_stream_index, audio_quality_level,
         rebuild_stream(manifest, audio_stream_index, audio_quality_level,
-                chunks_dir, dest_audio, dest_audio + '.wav')
+                       chunks_dir, dest_audio, dest_audio + '.wav')
 
 
-        #duration = get_clip_duration(manifest)
+        # duration = get_clip_duration(manifest)
 
         delay = calc_tracks_delay(manifest, video_stream_index,
 
         delay = calc_tracks_delay(manifest, video_stream_index,
-                audio_stream_index)
+                                  audio_stream_index)
 
         # optionally encode audio to vorbis:
         # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
         mux_command = ("ffmpeg -i %s \\\n" +
 
         # optionally encode audio to vorbis:
         # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
         mux_command = ("ffmpeg -i %s \\\n" +
-                      "  -itsoffset %f -async 1 -i %s \\\n" +
-                      "  -vcodec copy -acodec copy ffout.mkv") % \
-                      (dest_video, delay, dest_audio + '.wav')
+                       "  -itsoffset %f -async 1 -i %s \\\n" +
+                       "  -vcodec copy -acodec copy ffout.mkv") % \
+            (dest_video, delay, dest_audio + '.wav')
 
         print mux_command
 
 
         print mux_command
 
@@ -345,7 +374,7 @@ def options_parser():
     version = "%%prog %s" % __version
     usage = "usage: %prog [options] <manifest URL or file>"
     parser = OptionParser(usage=usage, version=version,
     version = "%%prog %s" % __version
     usage = "usage: %prog [options] <manifest URL or file>"
     parser = OptionParser(usage=usage, version=version,
-            description=__description, epilog=__author_info)
+                          description=__description, epilog=__author_info)
     parser.add_option("-i", "--info",
                       action="store_true", dest="info_only",
                       default=False, help="print Manifest info and exit")
     parser.add_option("-i", "--info",
                       action="store_true", dest="info_only",
                       default=False, help="print Manifest info and exit")
@@ -397,8 +426,8 @@ if __name__ == "__main__":
 
     if options.sync_delay:
         print calc_tracks_delay(manifest,
 
     if options.sync_delay:
         print calc_tracks_delay(manifest,
-                options.video_stream_index,
-                options.audio_stream_index)
+                                options.video_stream_index,
+                                options.audio_stream_index)
         parser.exit(0)
 
     if options.info_only:
         parser.exit(0)
 
     if options.info_only:
@@ -408,6 +437,6 @@ if __name__ == "__main__":
     print_manifest_info(manifest)
 
     smooth_download(url, manifest, options.dest_dir,
     print_manifest_info(manifest)
 
     smooth_download(url, manifest, options.dest_dir,
-            options.video_stream_index, options.audio_stream_index,
-            options.video_quality_level, options.audio_quality_level,
-            options.chunks_dir, options.download)
+                    options.video_stream_index, options.audio_stream_index,
+                    options.video_quality_level, options.audio_quality_level,
+                    options.chunks_dir, options.download)