X-Git-Url: https://git.ao2.it/pdfstrip.git/blobdiff_plain/965494b69b9bb4182f8a2e50e2eaada0eac4b12f..refs/heads/master:/pdfstrip.py diff --git a/pdfstrip.py b/pdfstrip.py index 1b287f8..dd212ff 100755 --- a/pdfstrip.py +++ b/pdfstrip.py @@ -34,35 +34,24 @@ logger.addHandler(console_handler) logger.propagate = False -def resolve_objects_names(pdf, objects_ids): - logger.debug("objects_ids %s", objects_ids) - objects_names = [] - for objnum in objects_ids: - indirect_object = pdf.findindirect(objnum, 0) - if isinstance(indirect_object, PdfIndirect): - try: - real_object = indirect_object.real_value() - if real_object.Name: - objects_names.append(real_object.Name) - else: - logger.warning("Object %d has an empty 'Name' attribute", objnum) - except AttributeError: - logger.warning("Object %d has no 'Name' attribute", objnum) - else: - logger.warning("Object %d is not a PdfIndirect but a %s", - objnum, type(indirect_object)) - - logger.debug("objects_names %s\n", objects_names) - return objects_names - - -def strip_objects(pdf, objects_names): +def strip_objects(pdf, objects_ids): for i, page in enumerate(pdf.pages): logger.debug("Page %d", i + 1) + + # skip empty pages + if not page.Resources.XObject: + continue + + # Map all the objects in the page using the objects id as the key and + # the resource name as the value. + name_map = {indirect_obj.indirect[0]: name for name, indirect_obj in page.Resources.XObject.items()} + logger.debug("name_map: %s", name_map) + logger.debug("Before %s", page.Resources.XObject.keys()) - for obj in objects_names: - if obj in page.Resources.XObject: - del page.Resources.XObject[obj] + + for obj in objects_ids: + if obj in name_map: + del page.Resources.XObject[name_map[obj]] logger.debug("After %s\n", page.Resources.XObject.keys()) @@ -97,8 +86,7 @@ def main(): pdf_data = PdfReader(args.input_filename) - objects_names = resolve_objects_names(pdf_data, args.objects_ids) - pdf_data = strip_objects(pdf_data, objects_names) + pdf_data = strip_objects(pdf_data, args.objects_ids) PdfWriter().write(args.output_filename, pdf_data)