1313 BoundingRectangle ,
1414 ColorRGBA ,
1515 Coord2D ,
16+ PdfHyperlink ,
1617 PdfMetaData ,
1718 PdfPageBoundaryType ,
1819 PdfPageGeometry ,
1920 PdfShape ,
2021 PdfTableOfContents ,
2122 PdfTextCell ,
23+ PdfWidget ,
2224 SegmentedPdfPage ,
2325 TextCell ,
2426 TextDirection ,
@@ -425,13 +427,16 @@ def _to_page_geometry_from_decoder(self, page_dim) -> PdfPageGeometry:
425427 coord_origin = CoordOrigin .BOTTOMLEFT ,
426428 )
427429 art_bbox_obj = BoundingBox (
428- l = crop_bbox [0 ], b = crop_bbox [1 ], r = crop_bbox [2 ], t = crop_bbox [3 ]
430+ l = crop_bbox [0 ], b = crop_bbox [1 ], r = crop_bbox [2 ], t = crop_bbox [3 ],
431+ coord_origin = CoordOrigin .BOTTOMLEFT ,
429432 )
430433 media_bbox_obj = BoundingBox (
431- l = media_bbox [0 ], b = media_bbox [1 ], r = media_bbox [2 ], t = media_bbox [3 ]
434+ l = media_bbox [0 ], b = media_bbox [1 ], r = media_bbox [2 ], t = media_bbox [3 ],
435+ coord_origin = CoordOrigin .BOTTOMLEFT ,
432436 )
433437 crop_bbox_obj = BoundingBox (
434- l = crop_bbox [0 ], b = crop_bbox [1 ], r = crop_bbox [2 ], t = crop_bbox [3 ]
438+ l = crop_bbox [0 ], b = crop_bbox [1 ], r = crop_bbox [2 ], t = crop_bbox [3 ],
439+ coord_origin = CoordOrigin .BOTTOMLEFT ,
435440 )
436441
437442 return PdfPageGeometry (
@@ -531,53 +536,58 @@ def _to_shapes_from_decoder(self, shapes_container) -> List[PdfShape]:
531536
532537 return result
533538
534- def _to_shapes_from_widgets (self , widgets_container ) -> List [PdfShape ]:
535- """Convert typed PdfWidgets container to list of PdfShape (rectangle per widget) ."""
536- result : List [PdfShape ] = []
539+ def _to_widgets_from_decoder (self , widgets_container ) -> List [PdfWidget ]:
540+ """Convert typed PdfWidgets container to list of PdfWidget objects ."""
541+ result : List [PdfWidget ] = []
537542
538543 for ind , widget in enumerate (widgets_container ):
539- points = [
540- Coord2D (widget .x0 , widget .y0 ),
541- Coord2D (widget .x1 , widget .y0 ),
542- Coord2D (widget .x1 , widget .y1 ),
543- Coord2D (widget .x0 , widget .y1 ),
544- Coord2D (widget .x0 , widget .y0 ), # close the rectangle
545- ]
546- pdf_shape = PdfShape (
547- index = ind ,
548- parent_id = 0 ,
549- points = points ,
550- has_graphics_state = True ,
551- line_width = 1.0 ,
552- rgb_stroking = ColorRGBA (r = 255 , g = 165 , b = 0 , a = 255 ), # orange
553- rgb_filling = ColorRGBA (r = 255 , g = 165 , b = 0 , a = 64 ), # orange, translucent
544+ rect = BoundingRectangle (
545+ r_x0 = widget .x0 ,
546+ r_y0 = widget .y0 ,
547+ r_x1 = widget .x1 ,
548+ r_y1 = widget .y0 ,
549+ r_x2 = widget .x1 ,
550+ r_y2 = widget .y1 ,
551+ r_x3 = widget .x0 ,
552+ r_y3 = widget .y1 ,
553+ )
554+ result .append (
555+ PdfWidget (
556+ index = ind ,
557+ rect = rect ,
558+ widget_text = widget .text or None ,
559+ widget_description = widget .description or None ,
560+ widget_field_name = widget .field_name or None ,
561+ widget_field_type = widget .field_type or None ,
562+ )
554563 )
555- result .append (pdf_shape )
556564
557565 return result
558566
559- def _to_shapes_from_hyperlinks (self , hyperlinks_container ) -> List [PdfShape ]:
560- """Convert typed PdfHyperlinks container to list of PdfShape (rectangle per hyperlink)."""
561- result : List [PdfShape ] = []
567+ def _to_hyperlinks_from_decoder (
568+ self , hyperlinks_container
569+ ) -> List [PdfHyperlink ]:
570+ """Convert typed PdfHyperlinks container to list of PdfHyperlink objects."""
571+ result : List [PdfHyperlink ] = []
562572
563573 for ind , hyperlink in enumerate (hyperlinks_container ):
564- points = [
565- Coord2D (hyperlink .x0 , hyperlink .y0 ),
566- Coord2D (hyperlink .x1 , hyperlink .y0 ),
567- Coord2D (hyperlink .x1 , hyperlink .y1 ),
568- Coord2D (hyperlink .x0 , hyperlink .y1 ),
569- Coord2D (hyperlink .x0 , hyperlink .y0 ), # close the rectangle
570- ]
571- pdf_shape = PdfShape (
572- index = ind ,
573- parent_id = 0 ,
574- points = points ,
575- has_graphics_state = True ,
576- line_width = 1.0 ,
577- rgb_stroking = ColorRGBA (r = 0 , g = 0 , b = 255 , a = 255 ), # blue
578- rgb_filling = ColorRGBA (r = 0 , g = 0 , b = 255 , a = 64 ), # blue, translucent
574+ rect = BoundingRectangle (
575+ r_x0 = hyperlink .x0 ,
576+ r_y0 = hyperlink .y0 ,
577+ r_x1 = hyperlink .x1 ,
578+ r_y1 = hyperlink .y0 ,
579+ r_x2 = hyperlink .x1 ,
580+ r_y2 = hyperlink .y1 ,
581+ r_x3 = hyperlink .x0 ,
582+ r_y3 = hyperlink .y1 ,
583+ )
584+ result .append (
585+ PdfHyperlink (
586+ index = ind ,
587+ rect = rect ,
588+ uri = hyperlink .uri or None ,
589+ )
579590 )
580- result .append (pdf_shape )
581591
582592 return result
583593
@@ -660,8 +670,10 @@ def _to_segmented_page_from_decoder(
660670
661671 char_cells = self ._to_cells_from_decoder (page_decoder .get_char_cells ())
662672 shapes = self ._to_shapes_from_decoder (page_decoder .get_page_shapes ())
663- shapes += self ._to_shapes_from_widgets (page_decoder .get_page_widgets ())
664- shapes += self ._to_shapes_from_hyperlinks (page_decoder .get_page_hyperlinks ())
673+ widgets = self ._to_widgets_from_decoder (page_decoder .get_page_widgets ())
674+ hyperlinks = self ._to_hyperlinks_from_decoder (
675+ page_decoder .get_page_hyperlinks ()
676+ )
665677 bitmap_resources = self ._to_bitmap_resources_from_decoder (
666678 page_decoder .get_page_images ()
667679 )
@@ -676,6 +688,8 @@ def _to_segmented_page_from_decoder(
676688 has_chars = len (char_cells ) > 0 ,
677689 bitmap_resources = bitmap_resources ,
678690 shapes = shapes ,
691+ widgets = widgets ,
692+ hyperlinks = hyperlinks ,
679693 )
680694
681695 if page_decoder .has_word_cells ():
0 commit comments