winfreed.py: hopefully more solid parsing of Content-Disposition header
[winfreed.git] / winfreed.py
index 108b719..16fb1c7 100755 (executable)
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+import re
 import os
 import sys
 import glob
@@ -32,6 +33,7 @@ LANGCODE = 'en-US'
 PKG_DIR = 'pkgs'
 CHUNK_SIZE = 8192
 
+
 def get_pkg(json_file):
     with open(json_file, mode='r') as f:
         pkg = json.load(f)
@@ -43,6 +45,7 @@ def get_pkg(json_file):
 
     return None
 
+
 def process_all(path, cb):
     listing = glob.glob(os.path.join(path, '*.json'))
     for json_file in listing:
@@ -52,24 +55,39 @@ def process_all(path, cb):
             continue
         cb(pkg)
 
+
 def show(pkg):
     print 'Package:  ', pkg['package_name']
     print 'Program:  ', pkg['name']
     print 'Homepage: ', pkg['homepage']
     print
 
+
 def download(pkg):
     # the "%s" in URLs are meant to be replaced with LANGCODE
     try:
-        url = pkg['URL'] % LANGCODE
+        pkg_url = pkg['URL'] % LANGCODE
     except:
-        url = pkg['URL']
+        pkg_url = pkg['URL']
         pass
 
-    response = urllib2.urlopen(url)
-    filename = urllib2.unquote(os.path.basename(response.geturl()))
+    response = urllib2.urlopen(pkg_url)
+    url = response.geturl()
+
+    filename = ""
+
+    # From http://paste.pound-python.org/show/9545/
+    # TODO: use a proper module to parse HTTP headers
+    if response.info().has_key('Content-Disposition') and len(response.info()['Content-Disposition'].split('filename=')) > 1:
+        # If the response has Content-Disposition, we take file name from it
+        filename = response.info()['Content-Disposition'].split('filename=')[1].decode('utf-8')
+        if filename[0] == '"' or filename[0] == "'":
+            filename = urllib2.unquote(filename.split('"')[1])
+    else:
+        filename =  urllib2.unquote(url.split('/')[-1].decode('utf_8'))
+
     if filename == "":
-        sys.stderr.write("Debug (%s): filename: %s url: %s\n" %(pkg['package_name'], filename, response.geturl()))
+        sys.stderr.write("Debug (%s): filename: %s url: %s\n" % (pkg['package_name'], filename, url))
         return
 
     destfile = os.path.join(OUTPUT_DIR, filename)
@@ -97,15 +115,18 @@ def download(pkg):
         pbar.update(bytes_so_far)
     pbar.finish()
 
+
 def show_all():
     process_all(PKG_DIR, show)
 
+
 def download_all():
     if os.path.exists(OUTPUT_DIR) == False:
         os.mkdir(OUTPUT_DIR, 0755)
 
     process_all(PKG_DIR, download)
 
+
 def usage():
     usage = "winfreed - download a selection of Free Software for MS Windows.\n\n"
     usage += "usage: %s <COMMAND>\n\n" % sys.argv[0]
@@ -127,5 +148,5 @@ if __name__ == "__main__":
     else:
         usage()
         sys.exit(1)
-    
+
     sys.exit(0)