winfreed.py: hopefully more solid parsing of Content-Disposition header
authorAntonio Ospite <ospite@studenti.unina.it>
Wed, 30 Nov 2011 11:04:34 +0000 (12:04 +0100)
committerAntonio Ospite <ospite@studenti.unina.it>
Wed, 30 Nov 2011 11:04:34 +0000 (12:04 +0100)
winfreed.py

index 94524e1..16fb1c7 100755 (executable)
@@ -75,14 +75,16 @@ def download(pkg):
     url = response.geturl()
 
     filename = ""
     url = response.geturl()
 
     filename = ""
-    if 'Content-Disposition' in  response.info():
-        # Use the filename the server tells us if any,
-        # re pattern from http://stackoverflow.com/questions/8035900
-        content_disposition = response.info().getheader('Content-Disposition').strip()
-        filename = re.findall("filename=(\S+)", content_disposition)[0]
 
 
-    if filename == "":
-        filename = urllib2.unquote(os.path.basename(response.geturl()))
+    # From http://paste.pound-python.org/show/9545/
+    # TODO: use a proper module to parse HTTP headers
+    if response.info().has_key('Content-Disposition') and len(response.info()['Content-Disposition'].split('filename=')) > 1:
+        # If the response has Content-Disposition, we take file name from it
+        filename = response.info()['Content-Disposition'].split('filename=')[1].decode('utf-8')
+        if filename[0] == '"' or filename[0] == "'":
+            filename = urllib2.unquote(filename.split('"')[1])
+    else:
+        filename =  urllib2.unquote(url.split('/')[-1].decode('utf_8'))
 
     if filename == "":
         sys.stderr.write("Debug (%s): filename: %s url: %s\n" % (pkg['package_name'], filename, url))
 
     if filename == "":
         sys.stderr.write("Debug (%s): filename: %s url: %s\n" % (pkg['package_name'], filename, url))