- if 'Content-Disposition' in response.info():
- # Use the filename the server tells us if any,
- # re pattern from http://stackoverflow.com/questions/8035900
- content_disposition = response.info().getheader('Content-Disposition').strip()
- filename = re.findall("filename=(\S+)", content_disposition)[0]
-
- if filename == "":
- filename = urllib2.unquote(os.path.basename(response.geturl()))
+
+ # From http://paste.pound-python.org/show/9545/
+ # TODO: use a proper module to parse HTTP headers
+ if 'Content-Disposition' in response.info() and len(response.info()['Content-Disposition'].split('filename=')) > 1:
+ # If the response has Content-Disposition, we take file name from it
+ filename = response.info()['Content-Disposition'].split('filename=')[1].decode('utf-8')
+ if filename[0] == '"' or filename[0] == "'":
+ filename = urllib2.unquote(filename.split('"')[1])
+ else:
+ filename = urllib2.unquote(url.split('/')[-1].decode('utf_8'))