smooth-dl.py: use more specific exception when looking for the Clip element
[smooth-dl.git] / smooth-dl.py
index 2baa009..85a0d2e 100755 (executable)
@@ -36,12 +36,14 @@ __version = "0.x"
 __author_info = "Written by Antonio Ospite http://ao2.it"
 
 import os
+import re
 import sys
 import xml.etree.ElementTree as etree
 import urllib2
 import struct
 import tempfile
 from optparse import OptionParser
+from urlparse import urlparse, urlunparse
 
 
 def get_chunk_data(data):
@@ -91,31 +93,40 @@ def write_wav_header(out_file, fmt, codec_private_data, data_len):
     out_file.write("data")
     out_file.write(struct.pack('<L', data_len))
 
+def download_file(src_url, dest_file, mode):
+    try:
+        response = urllib2.urlopen(src_url)
+        data = response.read()
+    except urllib2.HTTPError as e:
+        sys.stderr.write("Error while dowloading URL: %s" % src_url)
+        raise
+
+    if dest_file:
+        f = open(dest_file, mode)
+        f.write(data)
+        f.close()
+
+    return data
 
-def get_manifest(base_url, dest_dir=tempfile.gettempdir(),
-        manifest_file='Manifest'):
+def get_manifest(url, dest_dir=tempfile.gettempdir()):
     """Returns the manifest and the new URL if this is changed"""
 
     if os.path.exists(dest_dir) == False:
         os.mkdir(dest_dir, 0755)
 
-    if base_url.startswith('http://'):
+    # Remove the querystring if present
+    manifest_url = urlunparse(urlparse(url)._replace(query=''))
 
-        manifest_url = base_url
-        if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
-            manifest_url += '/Manifest'
+    if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
+        manifest_url += '/Manifest'
 
-        response = urllib2.urlopen(manifest_url)
-        data = response.read()
-
-        manifest_path = os.path.join(dest_dir, manifest_file)
-        f = open(manifest_path, "w")
-        f.write(data)
-        f.close()
+    if manifest_url.startswith('http://'):
+        local_manifest_path = os.path.join(dest_dir, 'Manifest')
+        download_file(manifest_url, local_manifest_path, "w")
     else:
-        manifest_path = base_url
+        local_manifest_path = url
 
-    manifest = etree.parse(manifest_path)
+    manifest = etree.parse(local_manifest_path)
 
     version = manifest.getroot().attrib['MajorVersion']
     if version != "2":
@@ -124,11 +135,13 @@ def get_manifest(base_url, dest_dir=tempfile.gettempdir(),
     try:
         # if some intermediate client Manifest is used, like in Rai Replay
         clip = manifest.find("Clip")
-        actual_manifest_url = clip.attrib["Url"]
-        base_url = actual_manifest_url.lower().replace("/manifest", "")
-    except:
+        manifest_url = clip.attrib["Url"]
+    except AttributeError:
         pass
 
+    manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
+    base_url = manifest_pattern.sub("", manifest_url)
+
     return (manifest, base_url)
 
 
@@ -161,21 +174,45 @@ def print_manifest_info(manifest):
     print
 
 
-def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
-
-    if os.path.exists(dest_dir) == False:
-        os.mkdir(dest_dir, 0755)
-
-    stream = manifest.findall('.//StreamIndex')[stream_index]
-
+def get_chunk_quality_string(stream, quality_level):
     quality = stream.findall("QualityLevel")[quality_level]
     bitrate = quality.attrib["Bitrate"]
 
+    quality_attributes = quality.findall("CustomAttributes/Attribute")
+    custom_attributes = ""
+    for i in quality_attributes:
+        custom_attributes += "%s=%s," % (i.attrib["Name"], i.attrib["Value"])
+    custom_attributes = custom_attributes.rstrip(',')
+
     # Assume URLs are in this form:
     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
+    # or
+    # Url="QualityLevels({bitrate},{CustomAttributes})/Fragments(video={start time})"
     url = stream.attrib["Url"]
 
     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
+    chunks_quality = chunks_quality.replace("{CustomAttributes}", custom_attributes)
+
+    return chunks_quality
+
+
+def get_chunk_name_string(stream, chunk):
+    t = chunk.attrib["t"]
+    url = stream.attrib["Url"]
+    chunk_name = url.split('/')[1].replace("{start time}", t)
+
+    return chunk_name
+
+
+def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
+
+    if os.path.exists(dest_dir) == False:
+        os.mkdir(dest_dir, 0755)
+
+    stream = manifest.findall('.//StreamIndex')[stream_index]
+
+    chunks_quality = get_chunk_quality_string(stream, quality_level)
+
     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
     if os.path.exists(chunks_dest_dir) == False:
         os.mkdir(chunks_dest_dir, 0755)
@@ -186,19 +223,13 @@ def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
     sys.stdout.flush()
     for i, c in enumerate(chunks):
-        t = c.attrib["t"]
 
-        chunk_name = url.split('/')[1].replace("{start time}", t)
+        chunk_name = get_chunk_name_string(stream, c)
         chunk_file = os.path.join(dest_dir,  chunks_quality, chunk_name)
 
         if os.path.exists(chunk_file) == False:
             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
-            response = urllib2.urlopen(chunk_url)
-            data = response.read()
-
-            f = open(chunk_file, "wb")
-            f.write(data)
-            f.close()
+            data = download_file(chunk_url, chunk_file, "wb")
         else:
             f = open(chunk_file, "rb")
             data = f.read()
@@ -218,14 +249,8 @@ def rebuild_stream(manifest, stream_index, quality_level, src_dir,
 
     stream = manifest.findall('.//StreamIndex')[stream_index]
 
-    quality = stream.findall("QualityLevel")[quality_level]
-    bitrate = quality.attrib["Bitrate"]
-
-    # Assume URLs are in this form:
-    # Url="QualityLevels({bitrate})/Fragments(video={start time})"
-    url = stream.attrib["Url"]
+    chunks_quality = get_chunk_quality_string(stream, quality_level)
 
-    chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
     chunks_src_dir = os.path.join(src_dir, chunks_quality)
 
     dest_file = open(dest_file_name, "wb")
@@ -236,9 +261,8 @@ def rebuild_stream(manifest, stream_index, quality_level, src_dir,
     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
     sys.stdout.flush()
     for i, c in enumerate(chunks):
-        t = c.attrib["t"]
 
-        chunk_name = url.split('/')[1].replace("{start time}", t)
+        chunk_name = get_chunk_name_string(stream, c)
         chunk_file = os.path.join(chunks_src_dir, chunk_name)
 
         f = open(chunk_file, "rb")
@@ -251,6 +275,7 @@ def rebuild_stream(manifest, stream_index, quality_level, src_dir,
 
     # Add a nice WAV header
     if stream.attrib['Type'] == "audio":
+        quality = stream.findall("QualityLevel")[quality_level]
         codec_private_data = quality.attrib['CodecPrivateData']
 
         fmt = {}