Skip to content

Commit 5e5be11

Browse files
committed
Replace PdfReader and PdfWriter with PyMuPDF for PDF processing; update trim, split, and convert functions accordingly
1 parent 8d1b476 commit 5e5be11

File tree

2 files changed

+41
-33
lines changed

2 files changed

+41
-33
lines changed

main.py

Lines changed: 41 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
QButtonGroup)
99
from PyQt6.QtCore import Qt
1010
from PyQt6.QtGui import QFont
11-
from PyPDF2 import PdfReader, PdfWriter
12-
from pdf2image import convert_from_path
11+
import fitz # PyMuPDF
1312

1413

1514
class Mode:
@@ -121,25 +120,30 @@ def check_overwrite_image_files(output_path, parsed_input):
121120

122121
def trim_pdf(input_path, page_numbers, output_path):
123122
"""Create a new PDF with only the specified pages."""
124-
reader = PdfReader(input_path)
125-
writer = PdfWriter()
126-
total_pages = len(reader.pages)
123+
doc = fitz.open(input_path)
124+
total_pages = len(doc)
127125

128126
valid_pages = []
129127
invalid_pages = []
130128

131129
for page_num in page_numbers:
132130
if 1 <= page_num <= total_pages:
133-
writer.add_page(reader.pages[page_num - 1])
134131
valid_pages.append(page_num)
135132
else:
136133
invalid_pages.append(page_num)
137134

138135
if not valid_pages:
136+
doc.close()
139137
raise ValueError("No valid pages to include in the output PDF.")
140138

141-
with open(output_path, "wb") as f:
142-
writer.write(f)
139+
# Create new document with selected pages (convert to 0-based indexing)
140+
new_doc = fitz.open()
141+
for page_num in valid_pages:
142+
new_doc.insert_pdf(doc, from_page=page_num - 1, to_page=page_num - 1)
143+
144+
new_doc.save(output_path)
145+
new_doc.close()
146+
doc.close()
143147

144148
message = f"Successfully created PDF with {len(valid_pages)} pages"
145149
if invalid_pages:
@@ -150,10 +154,11 @@ def trim_pdf(input_path, page_numbers, output_path):
150154

151155
def split_pdf(input_path, chunk_size, output_path):
152156
"""Split a PDF into multiple files with specified chunk size."""
153-
reader = PdfReader(input_path)
154-
total_pages = len(reader.pages)
157+
doc = fitz.open(input_path)
158+
total_pages = len(doc)
155159

156160
if chunk_size <= 0:
161+
doc.close()
157162
raise ValueError("Chunk size must be a positive integer.")
158163

159164
# Determine output naming
@@ -165,20 +170,22 @@ def split_pdf(input_path, chunk_size, output_path):
165170
chunk_num = 1
166171

167172
for start_page in range(0, total_pages, chunk_size):
168-
writer = PdfWriter()
169173
end_page = min(start_page + chunk_size, total_pages)
170174

171-
for page_idx in range(start_page, end_page):
172-
writer.add_page(reader.pages[page_idx])
175+
# Create new document for this chunk
176+
new_doc = fitz.open()
177+
new_doc.insert_pdf(doc, from_page=start_page, to_page=end_page - 1)
173178

174179
# Generate output filename
175180
output_filename = output_dir / f"{base_name}_part{chunk_num}.pdf"
176-
with open(output_filename, "wb") as f:
177-
writer.write(f)
181+
new_doc.save(str(output_filename))
182+
new_doc.close()
178183

179184
created_files.append(str(output_filename))
180185
chunk_num += 1
181186

187+
doc.close()
188+
182189
num_chunks = len(created_files)
183190
message = f"Successfully split PDF into {num_chunks} file{'s' if num_chunks > 1 else ''}"
184191
message += f"\n\nCreated {num_chunks} PDF{'s' if num_chunks > 1 else ''} in:\n{output_dir}"
@@ -188,8 +195,8 @@ def split_pdf(input_path, chunk_size, output_path):
188195

189196
def convert_to_images(input_path, page_numbers, output_path):
190197
"""Convert specified PDF pages to images."""
191-
reader = PdfReader(input_path)
192-
total_pages = len(reader.pages)
198+
doc = fitz.open(input_path)
199+
total_pages = len(doc)
193200

194201
valid_pages = []
195202
invalid_pages = []
@@ -202,6 +209,7 @@ def convert_to_images(input_path, page_numbers, output_path):
202209
invalid_pages.append(page_num)
203210

204211
if not valid_pages:
212+
doc.close()
205213
raise ValueError("No valid pages to convert to images.")
206214

207215
# Determine output naming
@@ -214,21 +222,20 @@ def convert_to_images(input_path, page_numbers, output_path):
214222

215223
created_files = []
216224

217-
# Convert pages to images
218-
# pdf2image uses 1-based indexing, matching our page_numbers
219-
images = convert_from_path(input_path, first_page=min(valid_pages),
220-
last_page=max(valid_pages))
221-
225+
# Convert pages to images using PyMuPDF
222226
for page_num in valid_pages:
223-
# Get the corresponding image from the converted range
224-
image_index = page_num - min(valid_pages)
225-
if image_index < len(images):
226-
image = images[image_index]
227-
228-
# Generate output filename
229-
output_filename = output_dir / f"{base_name}_page{page_num}.png"
230-
image.save(str(output_filename), "PNG")
231-
created_files.append(str(output_filename))
227+
# Convert to 0-based index
228+
page = doc.load_page(page_num - 1)
229+
230+
# Render page to an image (pixmap)
231+
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) # 2x scaling for better quality
232+
233+
# Generate output filename
234+
output_filename = output_dir / f"{base_name}_page{page_num}.png"
235+
pix.save(str(output_filename))
236+
created_files.append(str(output_filename))
237+
238+
doc.close()
232239

233240
num_images = len(created_files)
234241
message = f"Successfully converted {num_images} page{'s' if num_images > 1 else ''} to image{'s' if num_images > 1 else ''}"
@@ -455,8 +462,9 @@ def browse_input(self):
455462
return
456463

457464
try:
458-
reader = PdfReader(filename)
459-
total_pages = len(reader.pages)
465+
doc = fitz.open(filename)
466+
total_pages = len(doc)
467+
doc.close()
460468

461469
self.input_path = filename
462470
self._update_label(self.input_label, filename, active=True)

requirements.txt

-66 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)