From: Antonio Ospite Date: Wed, 30 Nov 2011 11:04:34 +0000 (+0100) Subject: winfreed.py: hopefully more solid parsing of Content-Disposition header X-Git-Url: https://git.ao2.it/winfreed.git/commitdiff_plain/97e4c14a8fef9931a848e330284ada86e7607637?hp=7b480f7a232dfa08c40ec67991734fb45b2657ce winfreed.py: hopefully more solid parsing of Content-Disposition header --- diff --git a/winfreed.py b/winfreed.py index 94524e1..16fb1c7 100755 --- a/winfreed.py +++ b/winfreed.py @@ -75,14 +75,16 @@ def download(pkg): url = response.geturl() filename = "" - if 'Content-Disposition' in response.info(): - # Use the filename the server tells us if any, - # re pattern from http://stackoverflow.com/questions/8035900 - content_disposition = response.info().getheader('Content-Disposition').strip() - filename = re.findall("filename=(\S+)", content_disposition)[0] - if filename == "": - filename = urllib2.unquote(os.path.basename(response.geturl())) + # From http://paste.pound-python.org/show/9545/ + # TODO: use a proper module to parse HTTP headers + if response.info().has_key('Content-Disposition') and len(response.info()['Content-Disposition'].split('filename=')) > 1: + # If the response has Content-Disposition, we take file name from it + filename = response.info()['Content-Disposition'].split('filename=')[1].decode('utf-8') + if filename[0] == '"' or filename[0] == "'": + filename = urllib2.unquote(filename.split('"')[1]) + else: + filename = urllib2.unquote(url.split('/')[-1].decode('utf_8')) if filename == "": sys.stderr.write("Debug (%s): filename: %s url: %s\n" % (pkg['package_name'], filename, url))