Add a playready_experiment.py to play with ProtectionHeader
[smooth-dl.git] / smooth-dl.py
index 41dfc4c..d983a28 100755 (executable)
@@ -1,8 +1,8 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # smooth-dl - download videos served using Smooth Streaming technology
 #
-# Copyright (C) 2010  Antonio Ospite <ospite@studenti.unina.it>
+# Copyright (C) 2010-2016  Antonio Ospite <ao2@ao2.it>
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -34,11 +34,13 @@ import os
 import re
 import sys
 import xml.etree.ElementTree as etree
-import urllib2
+import urllib.request
+import urllib.error
+import urllib.parse
 import struct
 import tempfile
 from optparse import OptionParser
-from urlparse import urlparse, urlunparse
+from urllib.parse import urlparse, urlunparse
 
 __description__ = "Download videos served using Smooth Streaming technology"
 __version__ = "0.x"
@@ -62,11 +64,7 @@ def get_chunk_data(data):
 
 
 def hexstring_to_bytes(hex_string):
-    res = ""
-    for i in range(0, len(hex_string), 2):
-        res += chr(int(hex_string[i:i + 2], 16))
-
-    return res
+    return bytearray.fromhex(hex_string)
 
 
 def write_wav_header(out_file, fmt, codec_private_data, data_len):
@@ -77,9 +75,9 @@ def write_wav_header(out_file, fmt, codec_private_data, data_len):
     fmt_len = 18 + fmt['cbSize']
     wave_len = len("WAVEfmt ") + 4 + fmt_len + len('data') + 4
 
-    out_file.write("RIFF")
+    out_file.write(bytes("RIFF", "ascii"))
     out_file.write(struct.pack('<L', wave_len))
-    out_file.write("WAVEfmt ")
+    out_file.write(bytes("WAVEfmt ", "ascii"))
     out_file.write(struct.pack('<L', fmt_len))
     out_file.write(struct.pack('<H', fmt['wFormatTag']))
     out_file.write(struct.pack('<H', fmt['nChannels']))
@@ -89,17 +87,23 @@ def write_wav_header(out_file, fmt, codec_private_data, data_len):
     out_file.write(struct.pack('<H', fmt['wBitsPerSample']))
     out_file.write(struct.pack('<H', fmt['cbSize']))
     out_file.write(extradata)
-    out_file.write("data")
+    out_file.write(bytes("data", "ascii"))
     out_file.write(struct.pack('<L', data_len))
 
 
 def download_file(src_url, dest_file, mode):
-    try:
-        response = urllib2.urlopen(src_url)
-        data = response.read()
-    except urllib2.HTTPError:
-        sys.stderr.write("Error while dowloading URL: %s" % src_url)
-        raise
+
+    if os.path.exists(src_url):
+        f = open(src_url, "rb")
+        data = f.read()
+        f.close()
+    else:
+        try:
+            response = urllib.request.urlopen(src_url)
+            data = response.read()
+        except urllib.error.HTTPError:
+            sys.stderr.write("Error while dowloading URL: %s\n" % src_url)
+            raise
 
     if dest_file:
         f = open(dest_file, mode)
@@ -109,11 +113,8 @@ def download_file(src_url, dest_file, mode):
     return data
 
 
-def get_manifest(url, dest_dir=tempfile.gettempdir()):
-    """Returns the manifest and the new URL if this is changed"""
-
-    if not os.path.exists(dest_dir):
-        os.mkdir(dest_dir, 0755)
+def get_manifest(url, dest_dir):
+    """Returns the manifest element and the base content URL"""
 
     # Remove the querystring if present
     manifest_url = urlunparse(urlparse(url)._replace(query=''))
@@ -121,11 +122,8 @@ def get_manifest(url, dest_dir=tempfile.gettempdir()):
     if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
         manifest_url += '/Manifest'
 
-    if manifest_url.startswith('http://'):
-        local_manifest_path = os.path.join(dest_dir, 'Manifest')
-        download_file(manifest_url, local_manifest_path, "w")
-    else:
-        local_manifest_path = url
+    local_manifest_path = os.path.join(dest_dir, 'Manifest')
+    download_file(manifest_url, local_manifest_path, "wb")
 
     manifest = etree.parse(local_manifest_path)
 
@@ -133,13 +131,18 @@ def get_manifest(url, dest_dir=tempfile.gettempdir()):
     if version != "2":
         raise Exception('Only Smooth Streaming version 2 supported')
 
-    try:
-        # if some intermediate client Manifest is used, like in Rai Replay
-        clip = manifest.find("Clip")
-        manifest_url = clip.attrib["Url"]
-        manifest = download_file(manifest_url, None, None)
-    except AttributeError:
-        pass
+    # if some intermediate client Manifest is used, like in Rai Replay
+    # then get the final manifest
+    clip = manifest.find("Clip")
+    if clip is not None and "Url" in clip.attrib:
+        tmp_manifest_url = clip.attrib["Url"]
+        try:
+            tmp_manifest = download_file(tmp_manifest_url, None, None)
+            # set the new values only if the dowload succeded
+            manifest_url = tmp_manifest_url
+            manifest = tmp_manifest
+        except urllib.error.HTTPError:
+            pass
 
     manifest_pattern = re.compile("/manifest$", re.IGNORECASE)
     base_url = manifest_pattern.sub("", manifest_url)
@@ -154,9 +157,9 @@ def print_manifest_info(manifest):
     for i, s in enumerate(streams):
         stream_type = s.attrib["Type"]
 
-        print "Stream: %s Type: %s" % (i, stream_type)
+        print("Stream: %s Type: %s" % (i, stream_type))
 
-        print "\tQuality Levels:"
+        print("\tQuality Levels:")
         qualities = s.findall("QualityLevel")
         for i, q in enumerate(qualities):
             bitrate = q.attrib["Bitrate"]
@@ -164,16 +167,16 @@ def print_manifest_info(manifest):
 
             if stream_type == "video":
                 size = "%sx%s" % (q.attrib["MaxWidth"], q.attrib["MaxHeight"])
-                print "\t%2s: %4s %10s @ %7s bps" % (i, fourcc, size, bitrate)
+                print("\t%2s: %4s %10s @ %7s bps" % (i, fourcc, size, bitrate))
             if stream_type == "audio":
                 channels = q.attrib["Channels"]
                 sampling_rate = q.attrib["SamplingRate"]
                 bits_per_sample = q.attrib["BitsPerSample"]
-                print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % \
-                    (i, fourcc, sampling_rate, bits_per_sample, channels,
-                     bitrate)
+                print("\t%2s: %4s %sHz %sbits %sch @ %7s bps" %
+                      (i, fourcc, sampling_rate, bits_per_sample, channels,
+                       bitrate))
 
-    print
+    print()
 
 
 def get_chunk_quality_string(stream, quality_level):
@@ -199,35 +202,38 @@ def get_chunk_quality_string(stream, quality_level):
     return chunks_quality
 
 
-def get_chunk_name_string(stream, chunk):
-    t = chunk.attrib["t"]
+def get_chunk_name_string(stream, chunk_time):
     url = stream.attrib["Url"]
-    chunk_name = url.split('/')[1].replace("{start time}", t)
+    chunk_name = url.split('/')[1].replace("{start time}", str(chunk_time))
 
     return chunk_name
 
 
 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
-
-    if not os.path.exists(dest_dir):
-        os.mkdir(dest_dir, 0755)
-
     stream = manifest.findall('.//StreamIndex')[stream_index]
 
     chunks_quality = get_chunk_quality_string(stream, quality_level)
 
     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
     if not os.path.exists(chunks_dest_dir):
-        os.mkdir(chunks_dest_dir, 0755)
+        os.mkdir(chunks_dest_dir, 0o755)
 
     chunks = stream.findall("c")
     data_size = 0
-    print "\nDownloading Stream %d" % stream_index
-    print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
+    print("\nDownloading Stream %d" % stream_index)
+    print("\tChunks %10d/%-10d" % (0, len(chunks)), "\r", end=' ')
     sys.stdout.flush()
-    for i, c in enumerate(chunks):
 
-        chunk_name = get_chunk_name_string(stream, c)
+    stream_duration = 0
+    for i, chunk in enumerate(chunks):
+
+        if "t" in chunk.attrib:
+            chunk_time = chunk.attrib["t"]
+        elif "d" in chunk.attrib:
+            chunk_time = stream_duration
+            stream_duration = chunk_time + int(chunk.attrib["d"])
+
+        chunk_name = get_chunk_name_string(stream, chunk_time)
         chunk_file = os.path.join(dest_dir, chunks_quality, chunk_name)
 
         if not os.path.exists(chunk_file):
@@ -239,9 +245,9 @@ def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
             f.close()
 
         data_size += len(data)
-        print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
+        print("\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r", end=' ')
         sys.stdout.flush()
-    print "\tDownloaded size:", data_size
+    print("\tDownloaded size:", data_size)
 
 
 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
@@ -260,12 +266,20 @@ def rebuild_stream(manifest, stream_index, quality_level, src_dir,
 
     chunks = stream.findall("c")
     data_size = 0
-    print "\nRebuilding Stream %d" % stream_index
-    print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
+    print("\nRebuilding Stream %d" % stream_index)
+    print("\tChunks %10d/%-10d" % (0, len(chunks)), "\r", end=' ')
     sys.stdout.flush()
-    for i, c in enumerate(chunks):
 
-        chunk_name = get_chunk_name_string(stream, c)
+    stream_duration = 0
+    for i, chunk in enumerate(chunks):
+
+        if "t" in chunk.attrib:
+            chunk_time = chunk.attrib["t"]
+        elif "d" in chunk.attrib:
+            chunk_time = stream_duration
+            stream_duration = chunk_time + int(chunk.attrib["d"])
+
+        chunk_name = get_chunk_name_string(stream, chunk_time)
         chunk_file = os.path.join(chunks_src_dir, chunk_name)
 
         f = open(chunk_file, "rb")
@@ -273,7 +287,7 @@ def rebuild_stream(manifest, stream_index, quality_level, src_dir,
         f.close()
         dest_file.write(data)
         data_size += len(data)
-        print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
+        print("\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r", end=' ')
         sys.stdout.flush()
 
     # Add a nice WAV header
@@ -285,7 +299,7 @@ def rebuild_stream(manifest, stream_index, quality_level, src_dir,
         fmt['wFormatTag'] = int(quality.attrib['AudioTag'])
         fmt['nChannels'] = int(quality.attrib['Channels'])
         fmt['nSamplesPerSec'] = int(quality.attrib['SamplingRate'])
-        fmt['nAvgBytesPerSec'] = int(quality.attrib['Bitrate']) / 8
+        fmt['nAvgBytesPerSec'] = int(quality.attrib['Bitrate']) // 8
         fmt['wBitsPerSample'] = int(quality.attrib['BitsPerSample'])
         fmt['nBlockAlign'] = int(quality.attrib['PacketSize'])
         fmt['cbSize'] = 0
@@ -298,8 +312,8 @@ def rebuild_stream(manifest, stream_index, quality_level, src_dir,
         f.close()
         dest_file.close()
 
-    print
-    print "Stream %d, actual data size: %d\n" % (stream_index, data_size)
+    print()
+    print("Stream %d, actual data size: %d\n" % (stream_index, data_size))
 
 
 def calc_tracks_delay(manifest, stream1_index, stream2_index):
@@ -308,9 +322,16 @@ def calc_tracks_delay(manifest, stream1_index, stream2_index):
     s1 = streams[stream1_index]
     s2 = streams[stream2_index]
 
+    if "TimeScale" not in s1 or "TimeScale" not in s2:
+        return 0
+
     s1_start_chunk = s1.find("c")
     s2_start_chunk = s2.find("c")
 
+    if "t" not in s1_start_chunk.attrib \
+       or "t" not in s2_start_chunk.attrib:
+        return 0
+
     s1_start_time = int(s1_start_chunk.attrib['t'])
     s2_start_time = int(s2_start_chunk.attrib['t'])
 
@@ -331,7 +352,7 @@ def get_clip_duration(manifest):
     return float(duration) / 10000000  # here is the default timescale
 
 
-def smooth_download(url, manifest, dest_dir=tempfile.gettempdir(),
+def smooth_download(url, manifest, dest_dir,
                     video_stream_index=0, audio_stream_index=1,
                     video_quality_level=0, audio_quality_level=0,
                     chunks_dir=None, download=True,
@@ -366,7 +387,7 @@ def smooth_download(url, manifest, dest_dir=tempfile.gettempdir(),
                    "  -vcodec copy -acodec copy ffout.mkv") % \
         (dest_video, delay, dest_audio + '.wav')
 
-    print mux_command
+    print(mux_command)
 
 
 def options_parser():
@@ -416,6 +437,9 @@ def main():
         parser.print_help()
         parser.exit(1)
 
+    if not os.path.exists(options.dest_dir):
+        os.mkdir(options.dest_dir, 0o755)
+
     url = args[0]
     manifest, url = get_manifest(url, options.dest_dir)
 
@@ -423,9 +447,9 @@ def main():
         parser.exit(0)
 
     if options.sync_delay:
-        print calc_tracks_delay(manifest,
+        print(calc_tracks_delay(manifest,
                                 options.video_stream_index,
-                                options.audio_stream_index)
+                                options.audio_stream_index))
         parser.exit(0)
 
     if options.info_only: