Skip to content

Commit 9ab85e0

Browse files
Merge pull request #116 from seanpedrick-case/dev
Corrected input image creation location so that output redaction pdfs have coordinates correctly placed
2 parents ba5f5dd + a8af3c0 commit 9ab85e0

File tree

3 files changed

+23
-6
lines changed

3 files changed

+23
-6
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ short_description: OCR / redact PDF documents and tabular data
1111
---
1212
# Document redaction
1313

14-
version: 1.6.5
14+
version: 1.6.6
1515

1616
Redact personally identifiable information (PII) from documents (pdf, png, jpg), Word files (docx), or tabular data (xlsx/csv/parquet). Please see the [User Guide](#user-guide) for a full walkthrough of all the features in the app.
1717

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "doc_redaction"
7-
version = "1.6.5"
7+
version = "1.6.6"
88
description = "Redact PDF/image-based documents, Word, or CSV/XLSX files using a Gradio-based GUI interface"
99
readme = "README.md"
1010
authors = [

tools/file_conversion.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,11 +164,27 @@ def process_single_page_for_image_conversion(
164164
if create_images is True:
165165
try:
166166
# Construct the full output directory path
167-
image_output_dir = secure_join(os.getcwd(), input_folder)
168-
out_path = secure_join(
167+
# Normalize input_folder to ensure it's used as-is without sanitization
168+
if os.path.isabs(input_folder):
169+
image_output_dir = Path(input_folder).resolve()
170+
else:
171+
# Join with cwd, but ensure input_folder is used as-is
172+
base_dir = Path(os.getcwd()).resolve()
173+
# Use Path.joinpath which doesn't sanitize folder names
174+
image_output_dir = base_dir / input_folder
175+
image_output_dir = image_output_dir.resolve()
176+
177+
# Ensure the directory exists
178+
image_output_dir.mkdir(parents=True, exist_ok=True)
179+
180+
# Construct the output file path using secure_path_join for the filename only
181+
from tools.secure_path_utils import secure_path_join
182+
183+
out_path = secure_path_join(
169184
image_output_dir, f"{os.path.basename(pdf_path)}_{page_num}.png"
170185
)
171-
os.makedirs(os.path.dirname(out_path), exist_ok=True)
186+
# Convert Path object to string immediately to avoid downstream type issues
187+
out_path = str(out_path)
172188

173189
if os.path.exists(out_path):
174190
# Load existing image
@@ -306,7 +322,8 @@ def convert_pdf_to_images(
306322
widths = [result[2] for result in results]
307323
heights = [result[3] for result in results]
308324

309-
# print("PDF has been converted to images.")
325+
print("PDF has been converted to images.")
326+
print("images:", images)
310327
return images, widths, heights, results
311328

312329

0 commit comments

Comments
 (0)