From b0176c6f974df87c6980461dbbd00af0dd9166b0 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Wed, 22 Sep 2021 22:17:12 +0200 Subject: [PATCH 1/1] pdfstrip.py: skip empty pages Skip empty pages and keep going, this fixes processing some PDF files that have placeholder pages. --- pdfstrip.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pdfstrip.py b/pdfstrip.py index 006eefc..dd212ff 100755 --- a/pdfstrip.py +++ b/pdfstrip.py @@ -38,6 +38,10 @@ def strip_objects(pdf, objects_ids): for i, page in enumerate(pdf.pages): logger.debug("Page %d", i + 1) + # skip empty pages + if not page.Resources.XObject: + continue + # Map all the objects in the page using the objects id as the key and # the resource name as the value. name_map = {indirect_obj.indirect[0]: name for name, indirect_obj in page.Resources.XObject.items()} -- 2.1.4