Skip to content

Commit aa0186b

Browse files
pr final comment, checking run
1 parent bd33b37 commit aa0186b

File tree

3 files changed

+4
-5
lines changed

3 files changed

+4
-5
lines changed

src/entity/utils.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88

99
def pages_to_bytes(pdf_document: Document, page_start: int, page_end: int) -> BytesIO:
10-
"""Select pages from PDF.
10+
"""Extract a range of pages from a PDF document and return them as a BytesIO buffer.
1111
1212
Args:
1313
pdf_document (Document): PDF to split.
@@ -22,6 +22,4 @@ def pages_to_bytes(pdf_document: Document, page_start: int, page_end: int) -> By
2222
for page_number in range(page_start, page_end + 1):
2323
# Insert the page into the new PDF
2424
select_pdf.insert_pdf(pdf_document, from_page=page_number - 1, to_page=page_number - 1)
25-
26-
# Extract bytes and close document
2725
return BytesIO(select_pdf.tobytes())

src/scripts/pixtral_extract_feature.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ def extract_feature(input_directory: Path, prompt: Path, prompt_version: str, gr
129129
matched_files = list(filter(lambda x: x.name == gt.filename, paths))
130130
if matched_files:
131131
gt = update_ground_truth(gt, document=matched_files[0], pixtral_interface=pixtral_interface)
132-
# Compute new features
132+
133+
# Accumulate updated ground truth
133134
gt_list_new.append(gt)
134135

135136
# Write updated items

src/utils/utility.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def get_pdf_files(input_path: Path) -> list[Path]:
9191

9292

9393
def standardize_text(text: str) -> str:
94-
"""Standardize text by removing new lines, double spaces and lowercasing.
94+
"""Standardize text by removing newlines, collapsing whitespace, stripping accents, and lowercasing.
9595
9696
Args:
9797
text (str): Text to standardize.

0 commit comments

Comments
 (0)