55class BboxProcessor :
66 """Common module for general bounding box operators."""
77
8+ @staticmethod
89 def get_bboxes_from_ocr_results (
9- self ,
1010 ocr_results : Dict [str , List [Union [int , str ]]],
1111 ) -> List [Dict [str , Union [int , float , str ]]]:
1212 """Get bounding boxes on padded image for all detected words from ocr_results.
@@ -30,8 +30,8 @@ def get_bboxes_from_ocr_results(
3030
3131 return bboxes
3232
33+ @staticmethod
3334 def get_bboxes_from_analyzer_results (
34- self ,
3535 analyzer_results : List [ImageRecognizerResult ],
3636 ) -> List [Dict [str , Union [str , float , int ]]]:
3737 """Organize bounding box info from analyzer results.
@@ -56,8 +56,8 @@ def get_bboxes_from_analyzer_results(
5656
5757 return bboxes
5858
59+ @staticmethod
5960 def remove_bbox_padding (
60- self ,
6161 analyzer_bboxes : List [Dict [str , Union [str , float , int ]]],
6262 padding_width : int ,
6363 ) -> List [Dict [str , int ]]:
@@ -71,21 +71,73 @@ def remove_bbox_padding(
7171 if padding_width < 0 :
7272 raise ValueError ("Padding width must be a non-negative integer." )
7373
74- # Remove padding from all bounding boxes
75- bboxes = [
76- {
77- "top" : max (0 , bbox ["top" ] - padding_width ),
78- "left" : max (0 , bbox ["left" ] - padding_width ),
79- "width" : bbox ["width" ],
80- "height" : bbox ["height" ],
81- }
82- for bbox in analyzer_bboxes
83- ]
74+ if len (analyzer_bboxes ) > 0 :
75+ # Get fields
76+ has_label = False
77+ has_entity_type = False
78+ try :
79+ _ = analyzer_bboxes [0 ]["label" ]
80+ has_label = True
81+ except KeyError :
82+ has_label = False
83+ try :
84+ _ = analyzer_bboxes [0 ]["entity_type" ]
85+ has_entity_type = True
86+ except KeyError :
87+ has_entity_type = False
88+
89+ # Remove padding from all bounding boxes
90+ if has_label is True and has_entity_type is True :
91+ bboxes = [
92+ {
93+ "left" : max (0 , bbox ["left" ] - padding_width ),
94+ "top" : max (0 , bbox ["top" ] - padding_width ),
95+ "width" : bbox ["width" ],
96+ "height" : bbox ["height" ],
97+ "label" : bbox ["label" ],
98+ "entity_type" : bbox ["entity_type" ]
99+ }
100+ for bbox in analyzer_bboxes
101+ ]
102+ elif has_label is True and has_entity_type is False :
103+ bboxes = [
104+ {
105+ "left" : max (0 , bbox ["left" ] - padding_width ),
106+ "top" : max (0 , bbox ["top" ] - padding_width ),
107+ "width" : bbox ["width" ],
108+ "height" : bbox ["height" ],
109+ "label" : bbox ["label" ]
110+ }
111+ for bbox in analyzer_bboxes
112+ ]
113+ elif has_label is False and has_entity_type is True :
114+ bboxes = [
115+ {
116+ "left" : max (0 , bbox ["left" ] - padding_width ),
117+ "top" : max (0 , bbox ["top" ] - padding_width ),
118+ "width" : bbox ["width" ],
119+ "height" : bbox ["height" ],
120+ "entity_type" : bbox ["entity_type" ]
121+ }
122+ for bbox in analyzer_bboxes
123+ ]
124+ elif has_label is False and has_entity_type is False :
125+ bboxes = [
126+ {
127+ "left" : max (0 , bbox ["left" ] - padding_width ),
128+ "top" : max (0 , bbox ["top" ] - padding_width ),
129+ "width" : bbox ["width" ],
130+ "height" : bbox ["height" ]
131+ }
132+ for bbox in analyzer_bboxes
133+ ]
134+ else :
135+ bboxes = analyzer_bboxes
84136
85137 return bboxes
86138
139+ @staticmethod
87140 def match_with_source (
88- self ,
89141 all_pos : List [Dict [str , Union [str , int , float ]]],
90142 pii_source_dict : List [Dict [str , Union [str , int , float ]]],
91143 detected_pii : Dict [str , Union [str , float , int ]],
@@ -107,7 +159,11 @@ def match_with_source(
107159 results_top = detected_pii ["top" ]
108160 results_width = detected_pii ["width" ]
109161 results_height = detected_pii ["height" ]
110- results_score = detected_pii ["score" ]
162+ try :
163+ results_score = detected_pii ["score" ]
164+ except KeyError :
165+ # Handle matching when no score available
166+ results_score = 0
111167 match_found = False
112168
113169 # See what in the ground truth this positive matches
0 commit comments