Skip to content

Commit 591fe59

Browse files
authored
fix: added return value for crop_text method in segmentedPdfPage Class (#275)
* Added Return value for crop_text method in segmentedPdfPage Class Signed-off-by: rahuldas-dev <[email protected]> * fix: added return value of crop_text method along with return type annotation plus doc string Signed-off-by: rahuldas-dev <[email protected]> --------- Signed-off-by: rahuldas-dev <[email protected]>
1 parent 8f85d05 commit 591fe59

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

docling_core/types/doc/page.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -579,13 +579,17 @@ def load_from_json(cls, filename: Union[str, Path]) -> "SegmentedPdfPage":
579579
with open(filename, "r", encoding="utf-8") as f:
580580
return cls.model_validate_json(f.read())
581581

582-
def crop_text(self, cell_unit: TextCellUnit, bbox: BoundingBox, eps: float = 1.0):
582+
def crop_text(
583+
self, cell_unit: TextCellUnit, bbox: BoundingBox, eps: float = 1.0
584+
) -> str:
583585
"""Extract text from cells within the specified bounding box.
584586
585587
Args:
586588
cell_unit: Type of text unit to extract
587589
bbox: Bounding box to extract from
588590
eps: Epsilon value for position comparison
591+
Returns:
592+
Extracted text from the cells
589593
"""
590594
selection = []
591595
for page_cell in self.iterate_cells(cell_unit):
@@ -605,7 +609,6 @@ def crop_text(self, cell_unit: TextCellUnit, bbox: BoundingBox, eps: float = 1.0
605609

606610
text = ""
607611
for i, cell in enumerate(selection):
608-
609612
if i == 0:
610613
text += cell.text
611614
else:
@@ -619,6 +622,7 @@ def crop_text(self, cell_unit: TextCellUnit, bbox: BoundingBox, eps: float = 1.0
619622
else:
620623
text += " "
621624
text += cell.text
625+
return text
622626

623627
def export_to_textlines(
624628
self,
@@ -640,7 +644,6 @@ def export_to_textlines(
640644
"""
641645
lines: List[str] = []
642646
for cell in self.iterate_cells(cell_unit):
643-
644647
line = ""
645648
if add_location:
646649
line += f"({cell.rect.r_x0:06.02f}, {cell.rect.r_y0:06.02f}) "
@@ -1104,7 +1107,6 @@ def _render_lines(
11041107

11051108
# Draw each rectangle by connecting its four points
11061109
for line in self.lines:
1107-
11081110
line.to_top_left_origin(page_height=page_height)
11091111
for segment in line.iterate_segments():
11101112
draw.line(

0 commit comments

Comments
 (0)