Skip to content

Commit d8541e9

Browse files
authored
Improve bbox processor (#1163)
* Adding method decorators * Updating remove_bbox_padding and test * Linting fix
1 parent 4e8490c commit d8541e9

File tree

2 files changed

+74
-18
lines changed

2 files changed

+74
-18
lines changed

presidio-image-redactor/presidio_image_redactor/bbox.py

Lines changed: 71 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
class BboxProcessor:
66
"""Common module for general bounding box operators."""
77

8+
@staticmethod
89
def get_bboxes_from_ocr_results(
9-
self,
1010
ocr_results: Dict[str, List[Union[int, str]]],
1111
) -> List[Dict[str, Union[int, float, str]]]:
1212
"""Get bounding boxes on padded image for all detected words from ocr_results.
@@ -30,8 +30,8 @@ def get_bboxes_from_ocr_results(
3030

3131
return bboxes
3232

33+
@staticmethod
3334
def get_bboxes_from_analyzer_results(
34-
self,
3535
analyzer_results: List[ImageRecognizerResult],
3636
) -> List[Dict[str, Union[str, float, int]]]:
3737
"""Organize bounding box info from analyzer results.
@@ -56,8 +56,8 @@ def get_bboxes_from_analyzer_results(
5656

5757
return bboxes
5858

59+
@staticmethod
5960
def remove_bbox_padding(
60-
self,
6161
analyzer_bboxes: List[Dict[str, Union[str, float, int]]],
6262
padding_width: int,
6363
) -> List[Dict[str, int]]:
@@ -71,21 +71,73 @@ def remove_bbox_padding(
7171
if padding_width < 0:
7272
raise ValueError("Padding width must be a non-negative integer.")
7373

74-
# Remove padding from all bounding boxes
75-
bboxes = [
76-
{
77-
"top": max(0, bbox["top"] - padding_width),
78-
"left": max(0, bbox["left"] - padding_width),
79-
"width": bbox["width"],
80-
"height": bbox["height"],
81-
}
82-
for bbox in analyzer_bboxes
83-
]
74+
if len(analyzer_bboxes) > 0:
75+
# Get fields
76+
has_label = False
77+
has_entity_type = False
78+
try:
79+
_ = analyzer_bboxes[0]["label"]
80+
has_label = True
81+
except KeyError:
82+
has_label = False
83+
try:
84+
_ = analyzer_bboxes[0]["entity_type"]
85+
has_entity_type = True
86+
except KeyError:
87+
has_entity_type = False
88+
89+
# Remove padding from all bounding boxes
90+
if has_label is True and has_entity_type is True:
91+
bboxes = [
92+
{
93+
"left": max(0, bbox["left"] - padding_width),
94+
"top": max(0, bbox["top"] - padding_width),
95+
"width": bbox["width"],
96+
"height": bbox["height"],
97+
"label": bbox["label"],
98+
"entity_type": bbox["entity_type"]
99+
}
100+
for bbox in analyzer_bboxes
101+
]
102+
elif has_label is True and has_entity_type is False:
103+
bboxes = [
104+
{
105+
"left": max(0, bbox["left"] - padding_width),
106+
"top": max(0, bbox["top"] - padding_width),
107+
"width": bbox["width"],
108+
"height": bbox["height"],
109+
"label": bbox["label"]
110+
}
111+
for bbox in analyzer_bboxes
112+
]
113+
elif has_label is False and has_entity_type is True:
114+
bboxes = [
115+
{
116+
"left": max(0, bbox["left"] - padding_width),
117+
"top": max(0, bbox["top"] - padding_width),
118+
"width": bbox["width"],
119+
"height": bbox["height"],
120+
"entity_type": bbox["entity_type"]
121+
}
122+
for bbox in analyzer_bboxes
123+
]
124+
elif has_label is False and has_entity_type is False:
125+
bboxes = [
126+
{
127+
"left": max(0, bbox["left"] - padding_width),
128+
"top": max(0, bbox["top"] - padding_width),
129+
"width": bbox["width"],
130+
"height": bbox["height"]
131+
}
132+
for bbox in analyzer_bboxes
133+
]
134+
else:
135+
bboxes = analyzer_bboxes
84136

85137
return bboxes
86138

139+
@staticmethod
87140
def match_with_source(
88-
self,
89141
all_pos: List[Dict[str, Union[str, int, float]]],
90142
pii_source_dict: List[Dict[str, Union[str, int, float]]],
91143
detected_pii: Dict[str, Union[str, float, int]],
@@ -107,7 +159,11 @@ def match_with_source(
107159
results_top = detected_pii["top"]
108160
results_width = detected_pii["width"]
109161
results_height = detected_pii["height"]
110-
results_score = detected_pii["score"]
162+
try:
163+
results_score = detected_pii["score"]
164+
except KeyError:
165+
# Handle matching when no score available
166+
results_score = 0
111167
match_found = False
112168

113169
# See what in the ground truth this positive matches

presidio-image-redactor/tests/test_bbox.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,9 @@ def test_get_bboxes_from_analyzer_results_happy_path(
213213
],
214214
25,
215215
[
216-
{"top": 0, "left": 0, "width": 100, "height": 100},
217-
{"top": 24, "left": 0, "width": 75, "height": 51},
218-
{"top": 1, "left": 588, "width": 226, "height": 35},
216+
{"left": 0, "top": 0, "width": 100, "height": 100, "entity_type": "TYPE_1"},
217+
{"left": 0, "top": 24, "width": 75, "height": 51, "entity_type": "TYPE_2"},
218+
{"left": 588, "top": 1, "width": 226, "height": 35, "entity_type": "TYPE_3"},
219219
],
220220
),
221221
],

0 commit comments

Comments
 (0)