3 # smooth-dl - download videos served using Smooth Streaming technology
 
   5 # Copyright (C) 2010  Antonio Ospite <ospite@studenti.unina.it>
 
   7 # This program is free software: you can redistribute it and/or modify
 
   8 # it under the terms of the GNU General Public License as published by
 
   9 # the Free Software Foundation, either version 3 of the License, or
 
  10 # (at your option) any later version.
 
  12 # This program is distributed in the hope that it will be useful,
 
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
  15 # GNU General Public License for more details.
 
  17 # You should have received a copy of the GNU General Public License
 
  18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
  22 #  - Handle HTTP errors:
 
  23 #       "Connection reset by peer"
 
  24 #       "Resource not  available"
 
  26 # - Support more Manifest formats:
 
  27 #       WaveFormatEx attribute instead of PrivateCodecdata
 
  28 #       'd' and other attributes in chunk element ('i', 's', 'q')
 
  30 # basically, write a proper implementation of manifest parsing and chunk
 
  34 __description = "Download videos served using Smooth Streaming technology"
 
  36 __author_info = "Written by Antonio Ospite http://ao2.it"
 
  40 import xml.etree.ElementTree as etree
 
  44 from optparse import OptionParser
 
  47 def get_chunk_data(data):
 
  49     moof_size = struct.unpack(">L", data[0:4])[0]
 
  50     mdat_size = struct.unpack(">L", data[moof_size:moof_size + 4])[0]
 
  52     data_start = moof_size + 4 + len('mdat')
 
  53     data_size = mdat_size - 4 - len('mdat')
 
  55     #print len(data[data_start:]), \
 
  56     #        len(data[data_start:data_start + data_size]), data_size
 
  58     assert(len(data[data_start:]) == data_size)
 
  60     return data[data_start:data_start + data_size]
 
  63 def hexstring_to_bytes(hex_string):
 
  65     for i in range(0, len(hex_string), 2):
 
  66             res += chr(int(hex_string[i:i + 2], 16))
 
  71 def write_wav_header(out_file, fmt, codec_private_data, data_len):
 
  73     extradata = hexstring_to_bytes(codec_private_data)
 
  75     fmt['cbSize'] = len(extradata)
 
  76     fmt_len = 18 + fmt['cbSize']
 
  77     wave_len = len("WAVEfmt ") + 4 + fmt_len + len('data') + 4
 
  79     out_file.write("RIFF")
 
  80     out_file.write(struct.pack('<L', wave_len))
 
  81     out_file.write("WAVEfmt ")
 
  82     out_file.write(struct.pack('<L', fmt_len))
 
  83     out_file.write(struct.pack('<H', fmt['wFormatTag']))
 
  84     out_file.write(struct.pack('<H', fmt['nChannels']))
 
  85     out_file.write(struct.pack('<L', fmt['nSamplesPerSec']))
 
  86     out_file.write(struct.pack('<L', fmt['nAvgBytesPerSec']))
 
  87     out_file.write(struct.pack('<H', fmt['nBlockAlign']))
 
  88     out_file.write(struct.pack('<H', fmt['wBitsPerSample']))
 
  89     out_file.write(struct.pack('<H', fmt['cbSize']))
 
  90     out_file.write(extradata)
 
  91     out_file.write("data")
 
  92     out_file.write(struct.pack('<L', data_len))
 
  95 def get_manifest(base_url, dest_dir=tempfile.gettempdir(),
 
  96         manifest_file='Manifest'):
 
  97     """Returns the manifest and the new URL if this is changed"""
 
  99     if os.path.exists(dest_dir) == False:
 
 100         os.mkdir(dest_dir, 0755)
 
 102     if base_url.startswith('http://'):
 
 104         manifest_url = base_url
 
 105         if not manifest_url.lower().endswith(('/manifest', '.ismc', '.csm')):
 
 106             manifest_url += '/Manifest'
 
 108         response = urllib2.urlopen(manifest_url)
 
 109         data = response.read()
 
 111         manifest_path = os.path.join(dest_dir, manifest_file)
 
 112         f = open(manifest_path, "w")
 
 116         manifest_path = base_url
 
 118     manifest = etree.parse(manifest_path)
 
 120     version = manifest.getroot().attrib['MajorVersion']
 
 122         raise Exception('Only Smooth Streaming version 2 supported')
 
 125         # if some intermediate client Manifest is used, like in Rai Replay
 
 126         clip = manifest.find("Clip")
 
 127         actual_manifest_url = clip.attrib["Url"]
 
 128         base_url = actual_manifest_url.lower().replace("/manifest", "")
 
 132     return (manifest, base_url)
 
 135 def print_manifest_info(manifest):
 
 137     streams = manifest.findall('.//StreamIndex')
 
 139     for i, s in enumerate(streams):
 
 140         stream_type = s.attrib["Type"]
 
 141         url = s.attrib["Url"]
 
 143         print "Stream: %s Type: %s" % (i, stream_type)
 
 145         print "\tQuality Levels:"
 
 146         qualities = s.findall("QualityLevel")
 
 147         for i, q in enumerate(qualities):
 
 148             bitrate = q.attrib["Bitrate"]
 
 149             fourcc = q.attrib["FourCC"]
 
 151             if stream_type == "video":
 
 152                 size = "%sx%s" % (q.attrib["MaxWidth"], q.attrib["MaxHeight"])
 
 153                 print "\t%2s: %4s %10s @ %7s bps" % (i, fourcc, size, bitrate)
 
 154             if stream_type == "audio":
 
 155                 channels = q.attrib["Channels"]
 
 156                 sampling_rate = q.attrib["SamplingRate"]
 
 157                 bits_per_sample = q.attrib["BitsPerSample"]
 
 158                 print "\t%2s: %4s %sHz %sbits %sch @ %7s bps" % (i, fourcc,
 
 159                         sampling_rate, bits_per_sample, channels, bitrate)
 
 164 def download_chunks(base_url, manifest, stream_index, quality_level, dest_dir):
 
 166     if os.path.exists(dest_dir) == False:
 
 167         os.mkdir(dest_dir, 0755)
 
 169     stream = manifest.findall('.//StreamIndex')[stream_index]
 
 171     quality = stream.findall("QualityLevel")[quality_level]
 
 172     bitrate = quality.attrib["Bitrate"]
 
 174     # Assume URLs are in this form:
 
 175     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
 
 176     url = stream.attrib["Url"]
 
 178     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
 
 179     chunks_dest_dir = os.path.join(dest_dir, chunks_quality)
 
 180     if os.path.exists(chunks_dest_dir) == False:
 
 181         os.mkdir(chunks_dest_dir, 0755)
 
 183     chunks = stream.findall("c")
 
 185     print "\nDownloading Stream %d" % stream_index
 
 186     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
 
 188     for i, c in enumerate(chunks):
 
 191         chunk_name = url.split('/')[1].replace("{start time}", t)
 
 192         chunk_file = os.path.join(dest_dir,  chunks_quality, chunk_name)
 
 194         if os.path.exists(chunk_file) == False:
 
 195             chunk_url = base_url + '/' + chunks_quality + '/' + chunk_name
 
 196             response = urllib2.urlopen(chunk_url)
 
 197             data = response.read()
 
 199             f = open(chunk_file, "wb")
 
 203             f = open(chunk_file, "rb")
 
 207         data_size += len(data)
 
 208         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
 
 210     print "\tDownloaded size:", data_size
 
 213 def rebuild_stream(manifest, stream_index, quality_level, src_dir,
 
 214         dest_file_name, final_dest_file=None):
 
 216     if final_dest_file == None:
 
 217         final_dest_file = dest_file_name
 
 219     stream = manifest.findall('.//StreamIndex')[stream_index]
 
 221     quality = stream.findall("QualityLevel")[quality_level]
 
 222     bitrate = quality.attrib["Bitrate"]
 
 224     # Assume URLs are in this form:
 
 225     # Url="QualityLevels({bitrate})/Fragments(video={start time})"
 
 226     url = stream.attrib["Url"]
 
 228     chunks_quality = url.split('/')[0].replace("{bitrate}", bitrate)
 
 229     chunks_src_dir = os.path.join(src_dir, chunks_quality)
 
 231     dest_file = open(dest_file_name, "wb")
 
 233     chunks = stream.findall("c")
 
 235     print "\nRebuilding Stream %d" % stream_index
 
 236     print "\tChunks %10d/%-10d" % (0, len(chunks)), "\r",
 
 238     for i, c in enumerate(chunks):
 
 241         chunk_name = url.split('/')[1].replace("{start time}", t)
 
 242         chunk_file = os.path.join(chunks_src_dir, chunk_name)
 
 244         f = open(chunk_file, "rb")
 
 245         data = get_chunk_data(f.read())
 
 247         dest_file.write(data)
 
 248         data_size += len(data)
 
 249         print "\tChunks %10d/%-10d" % (i + 1, len(chunks)), "\r",
 
 252     # Add a nice WAV header
 
 253     if stream.attrib['Type'] == "audio":
 
 254         codec_private_data = quality.attrib['CodecPrivateData']
 
 257         fmt['wFormatTag'] = int(quality.attrib['AudioTag'])
 
 258         fmt['nChannels'] = int(quality.attrib['Channels'])
 
 259         fmt['nSamplesPerSec'] = int(quality.attrib['SamplingRate'])
 
 260         fmt['nAvgBytesPerSec'] = int(quality.attrib['Bitrate']) / 8
 
 261         fmt['wBitsPerSample'] = int(quality.attrib['BitsPerSample'])
 
 262         fmt['nBlockAlign'] = int(quality.attrib['PacketSize'])
 
 265         f = open(final_dest_file, "wb")
 
 266         write_wav_header(f, fmt, codec_private_data, data_size)
 
 268         dest_file = open(dest_file_name, "rb")
 
 269         f.write(dest_file.read())
 
 274     print "Stream %d, actual data size: %d\n" % (stream_index, data_size)
 
 277 def calc_tracks_delay(manifest, stream1_index, stream2_index):
 
 278     streams = manifest.findall('.//StreamIndex')
 
 280     s1 = streams[stream1_index]
 
 281     s2 = streams[stream2_index]
 
 283     s1_start_chunk = s1.find("c")
 
 284     s2_start_chunk = s2.find("c")
 
 286     s1_start_time = int(s1_start_chunk.attrib['t'])
 
 287     s2_start_time = int(s2_start_chunk.attrib['t'])
 
 289     s1_timescale = float(s1.attrib['TimeScale'])
 
 290     s2_timescale = float(s2.attrib['TimeScale'])
 
 292     # calc difference in seconds
 
 293     delay = s2_start_time / s2_timescale - \
 
 294             s1_start_time / s1_timescale
 
 299 def get_clip_duration(manifest):
 
 300     # TODO: use <Clip ClipBegin="" ClipEnd=""> if Duration is not available
 
 301     duration = manifest.getroot().attrib['Duration']
 
 303     return float(duration) / 10000000  # here is the default timescale
 
 306 def smooth_download(url, manifest, dest_dir=tempfile.gettempdir(),
 
 307         video_stream_index=0, audio_stream_index=1,
 
 308         video_quality_level=0, audio_quality_level=0,
 
 309         chunks_dir=None, download=True,
 
 310         out_video_file='_video.vc1', out_audio_file='_audio.raw'):
 
 312         if chunks_dir == None:
 
 313             chunks_dir = dest_dir
 
 316             download_chunks(url, manifest, video_stream_index,
 
 317                     video_quality_level, chunks_dir)
 
 318             download_chunks(url, manifest, audio_stream_index,
 
 319                     audio_quality_level, chunks_dir)
 
 321         dest_video = os.path.join(dest_dir, out_video_file)
 
 322         dest_audio = os.path.join(dest_dir, out_audio_file)
 
 324         rebuild_stream(manifest, video_stream_index, video_quality_level,
 
 325                 chunks_dir, dest_video)
 
 326         rebuild_stream(manifest, audio_stream_index, audio_quality_level,
 
 327                 chunks_dir, dest_audio, dest_audio + '.wav')
 
 329         #duration = get_clip_duration(manifest)
 
 331         delay = calc_tracks_delay(manifest, video_stream_index,
 
 334         # optionally encode audio to vorbis:
 
 335         # ffmpeg -i _audio.raw.wav -acodec libvorbis -aq 60 audio.ogg
 
 336         mux_command = ("ffmpeg -i %s \\\n" +
 
 337                       "  -itsoffset %f -async 1 -i %s \\\n" +
 
 338                       "  -vcodec copy -acodec copy ffout.mkv") % \
 
 339                       (dest_video, delay, dest_audio + '.wav')
 
 344 def options_parser():
 
 345     version = "%%prog %s" % __version
 
 346     usage = "usage: %prog [options] <manifest URL or file>"
 
 347     parser = OptionParser(usage=usage, version=version,
 
 348             description=__description, epilog=__author_info)
 
 349     parser.add_option("-i", "--info",
 
 350                       action="store_true", dest="info_only",
 
 351                       default=False, help="print Manifest info and exit")
 
 352     parser.add_option("-m", "--manifest-only",
 
 353                       action="store_true", dest="manifest_only",
 
 354                       default=False, help="download Manifest file and exit")
 
 355     parser.add_option("-n", "--no-download",
 
 356                       action="store_false", dest="download",
 
 357                       default=True, help="disable downloading chunks")
 
 358     parser.add_option("-s", "--sync-delay",
 
 359                       action="store_true", dest="sync_delay",
 
 360                       default=False, help="show the sync delay between the given streams and exit")
 
 361     parser.add_option("-d", "--dest-dir", metavar="<dir>",
 
 362                       dest="dest_dir", default=tempfile.gettempdir(),
 
 363                       help="destination directory")
 
 364     parser.add_option("-c", "--chunks-dir", metavar="<dir>",
 
 365                       dest="chunks_dir", default=None,
 
 366                       help="directory containing chunks, if different from destination dir")
 
 367     parser.add_option("-v", "--video-stream",  metavar="<n>",
 
 368                       type="int", dest="video_stream_index", default=0,
 
 369                       help="index of the video stream")
 
 370     parser.add_option("-a", "--audio-stream", metavar="<n>",
 
 371                       type="int", dest="audio_stream_index", default=1,
 
 372                       help="index of the audio stream")
 
 373     parser.add_option("-q", "--video-quality", metavar="<n>",
 
 374                       type="int", dest="video_quality_level", default=0,
 
 375                       help="index of the video quality level")
 
 376     parser.add_option("-Q", "--audio-quality", metavar="<n>",
 
 377                       type="int", dest="audio_quality_level", default=0,
 
 378                       help="index of the audio quality level")
 
 383 if __name__ == "__main__":
 
 385     parser = options_parser()
 
 386     (options, args) = parser.parse_args()
 
 393     manifest, url = get_manifest(url, options.dest_dir)
 
 395     if options.manifest_only:
 
 398     if options.sync_delay:
 
 399         print calc_tracks_delay(manifest,
 
 400                 options.video_stream_index,
 
 401                 options.audio_stream_index)
 
 404     if options.info_only:
 
 405         print_manifest_info(manifest)
 
 408     print_manifest_info(manifest)
 
 410     smooth_download(url, manifest, options.dest_dir,
 
 411             options.video_stream_index, options.audio_stream_index,
 
 412             options.video_quality_level, options.audio_quality_level,
 
 413             options.chunks_dir, options.download)