def download(pkg):
# the "%s" in URLs are meant to be replaced with LANGCODE
try:
- url = pkg['URL'] % LANGCODE
+ pkg_url = pkg['URL'] % LANGCODE
except:
- url = pkg['URL']
+ pkg_url = pkg['URL']
pass
- response = urllib2.urlopen(url)
+ response = urllib2.urlopen(pkg_url)
+ url = response.geturl()
filename = ""
- if 'Content-Disposition' in response.info():
- # Use the filename the server tells us if any,
- # re pattern from http://stackoverflow.com/questions/8035900
- content_disposition = response.info().getheader('Content-Disposition').strip()
- filename = re.findall("filename=(\S+)", content_disposition)[0]
- if filename == "":
- filename = urllib2.unquote(os.path.basename(response.geturl()))
+ # From http://paste.pound-python.org/show/9545/
+ # TODO: use a proper module to parse HTTP headers
+ if response.info().has_key('Content-Disposition') and len(response.info()['Content-Disposition'].split('filename=')) > 1:
+ # If the response has Content-Disposition, we take file name from it
+ filename = response.info()['Content-Disposition'].split('filename=')[1].decode('utf-8')
+ if filename[0] == '"' or filename[0] == "'":
+ filename = urllib2.unquote(filename.split('"')[1])
+ else:
+ filename = urllib2.unquote(url.split('/')[-1].decode('utf_8'))
if filename == "":
- sys.stderr.write("Debug (%s): filename: %s url: %s\n" %(pkg['package_name'], filename, response.geturl()))
+ sys.stderr.write("Debug (%s): filename: %s url: %s\n" % (pkg['package_name'], filename, url))
return
destfile = os.path.join(OUTPUT_DIR, filename)