Skip to content

Commit 4e8490c

Browse files
authored
Updating verification engines to include latest updates to redactor engines (#1162)
* Enabling use of ad-hoc recognizers in verifier * Adding support to standard image verification engine as well * Linting fix * Removing redundant init * Removing unused import
1 parent 93934a9 commit 4e8490c

File tree

3 files changed

+48
-38
lines changed

3 files changed

+48
-38
lines changed

presidio-image-redactor/presidio_image_redactor/dicom_image_pii_verify_engine.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,21 @@ def verify_dicom_instance(
4949
instance: pydicom.dataset.FileDataset,
5050
padding_width: int = 25,
5151
display_image: bool = True,
52+
use_metadata: bool = True,
5253
ocr_kwargs: Optional[dict] = None,
54+
ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None,
5355
**text_analyzer_kwargs,
5456
) -> Tuple[Optional[PIL.Image.Image], dict, list]:
5557
"""Verify PII on a single DICOM instance.
5658
5759
:param instance: Loaded DICOM instance including pixel data and metadata.
5860
:param padding_width: Padding width to use when running OCR.
5961
:param display_image: If the verificationimage is displayed and returned.
62+
:param use_metadata: Whether to redact text in the image that
63+
are present in the metadata.
6064
:param ocr_kwargs: Additional params for OCR methods.
65+
:param ad_hoc_recognizers: List of PatternRecognizer objects to use
66+
for ad-hoc recognizer.
6167
:param text_analyzer_kwargs: Additional values for the analyze method
6268
in ImageAnalyzerEngine.
6369
@@ -82,24 +88,17 @@ def verify_dicom_instance(
8288
loaded_image = Image.open(png_filepath)
8389
image = self._add_padding(loaded_image, is_greyscale, padding_width)
8490

85-
# Create custom recognizer using DICOM metadata
86-
original_metadata, is_name, is_patient = self._get_text_metadata(instance_copy)
87-
phi_list = self._make_phi_list(original_metadata, is_name, is_patient)
88-
deny_list_recognizer = PatternRecognizer(
89-
supported_entity="PERSON", deny_list=phi_list
90-
)
91+
# Get analyzer results
9192
ocr_results = self.ocr_engine.perform_ocr(image)
92-
analyzer_results = self.image_analyzer_engine.analyze(
93-
image,
94-
ocr_kwargs=ocr_kwargs,
95-
ad_hoc_recognizers=[deny_list_recognizer],
96-
**text_analyzer_kwargs,
93+
analyzer_results = self._get_analyzer_results(
94+
image, instance, use_metadata, ocr_kwargs, ad_hoc_recognizers,
95+
**text_analyzer_kwargs
9796
)
9897

9998
# Get image with verification boxes
10099
verify_image = (
101100
self.verify(
102-
image, ad_hoc_recognizers=[deny_list_recognizer], **text_analyzer_kwargs
101+
image, ad_hoc_recognizers=ad_hoc_recognizers, **text_analyzer_kwargs
103102
)
104103
if display_image
105104
else None
@@ -114,7 +113,9 @@ def eval_dicom_instance(
114113
padding_width: int = 25,
115114
tolerance: int = 50,
116115
display_image: bool = False,
116+
use_metadata: bool = True,
117117
ocr_kwargs: Optional[dict] = None,
118+
ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None,
118119
**text_analyzer_kwargs,
119120
) -> Tuple[Optional[PIL.Image.Image], dict]:
120121
"""Evaluate performance for a single DICOM instance.
@@ -124,7 +125,11 @@ def eval_dicom_instance(
124125
:param padding_width: Padding width to use when running OCR.
125126
:param tolerance: Pixel distance tolerance for matching to ground truth.
126127
:param display_image: If the verificationimage is displayed and returned.
128+
:param use_metadata: Whether to redact text in the image that
129+
are present in the metadata.
127130
:param ocr_kwargs: Additional params for OCR methods.
131+
:param ad_hoc_recognizers: List of PatternRecognizer objects to use
132+
for ad-hoc recognizer.
128133
:param text_analyzer_kwargs: Additional values for the analyze method
129134
in ImageAnalyzerEngine.
130135
@@ -135,7 +140,9 @@ def eval_dicom_instance(
135140
instance,
136141
padding_width,
137142
display_image,
143+
use_metadata,
138144
ocr_kwargs=ocr_kwargs,
145+
ad_hoc_recognizers=ad_hoc_recognizers,
139146
**text_analyzer_kwargs,
140147
)
141148
formatted_ocr_results = self.bbox_processor.get_bboxes_from_ocr_results(

presidio-image-redactor/presidio_image_redactor/image_pii_verify_engine.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from PIL import Image, ImageChops
2-
from presidio_image_redactor.image_analyzer_engine import ImageAnalyzerEngine
2+
from presidio_image_redactor.image_redactor_engine import ImageRedactorEngine
3+
from presidio_analyzer import PatternRecognizer
34
import matplotlib
45
import io
56
from matplotlib import pyplot as plt
6-
from typing import Optional
7+
from typing import Optional, List
78

89

910
def fig2img(fig):
@@ -16,16 +17,15 @@ def fig2img(fig):
1617
return img
1718

1819

19-
class ImagePiiVerifyEngine:
20+
class ImagePiiVerifyEngine(ImageRedactorEngine):
2021
"""ImagePiiVerifyEngine class only supporting Pii verification currently."""
2122

22-
def __init__(self, image_analyzer_engine: Optional[ImageAnalyzerEngine] = None):
23-
if not image_analyzer_engine:
24-
image_analyzer_engine = ImageAnalyzerEngine()
25-
self.image_analyzer_engine = image_analyzer_engine
26-
2723
def verify(
28-
self, image: Image, ocr_kwargs: Optional[dict] = None, **text_analyzer_kwargs
24+
self,
25+
image: Image,
26+
ocr_kwargs: Optional[dict] = None,
27+
ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None,
28+
**text_analyzer_kwargs
2929
) -> Image:
3030
"""Annotate image with the detect PII entity.
3131
@@ -34,6 +34,8 @@ def verify(
3434
3535
:param image: PIL Image to be processed.
3636
:param ocr_kwargs: Additional params for OCR methods.
37+
:param ad_hoc_recognizers: List of PatternRecognizer objects to use
38+
for ad-hoc recognizer.
3739
:param text_analyzer_kwargs: Additional values for the analyze method
3840
in ImageAnalyzerEngine.
3941
@@ -42,9 +44,23 @@ def verify(
4244

4345
image = ImageChops.duplicate(image)
4446
image_x, image_y = image.size
45-
bboxes = self.image_analyzer_engine.analyze(
46-
image, ocr_kwargs, **text_analyzer_kwargs
47-
)
47+
48+
# Detect PII
49+
self._check_ad_hoc_recognizer_list(ad_hoc_recognizers)
50+
if ad_hoc_recognizers is None:
51+
bboxes = self.image_analyzer_engine.analyze(
52+
image,
53+
ocr_kwargs=ocr_kwargs,
54+
**text_analyzer_kwargs,
55+
)
56+
else:
57+
bboxes = self.image_analyzer_engine.analyze(
58+
image,
59+
ocr_kwargs=ocr_kwargs,
60+
ad_hoc_recognizers=ad_hoc_recognizers,
61+
**text_analyzer_kwargs,
62+
)
63+
4864
fig, ax = plt.subplots()
4965
image_r = 70
5066
fig.set_size_inches(image_x / image_r, image_y / image_r)

presidio-image-redactor/tests/test_dicom_image_pii_verify_engine.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -86,21 +86,11 @@ def test_verify_dicom_instance_happy_path(
8686
mock_add_padding = mocker.patch.object(
8787
DicomImagePiiVerifyEngine, "_add_padding", return_value=None
8888
)
89-
mock_get_metadata = mocker.patch.object(
90-
DicomImagePiiVerifyEngine, "_get_text_metadata", return_value=[None, None, None]
91-
)
92-
mock_make_phi_list = mocker.patch.object(
93-
DicomImagePiiVerifyEngine, "_make_phi_list", return_value=None
94-
)
95-
mock_patternrecognizer = mocker.patch(
96-
"presidio_image_redactor.dicom_image_pii_verify_engine.PatternRecognizer",
97-
return_value=None,
98-
)
9989
mock_perform_ocr = mocker.patch.object(
10090
TesseractOCR, "perform_ocr", return_value=None
10191
)
10292
mock_analyze = mocker.patch.object(
103-
ImageAnalyzerEngine, "analyze", return_value=None
93+
DicomImagePiiVerifyEngine, "_get_analyzer_results", return_value=None
10494
)
10595
mock_verify = mocker.patch.object(
10696
DicomImagePiiVerifyEngine, "verify", return_value=None
@@ -115,9 +105,6 @@ def test_verify_dicom_instance_happy_path(
115105
assert mock_save_pixel_array.call_count == 1
116106
assert mock_image_open.call_count == 1
117107
assert mock_add_padding.call_count == 1
118-
assert mock_get_metadata.call_count == 1
119-
assert mock_make_phi_list.call_count == 1
120-
assert mock_patternrecognizer.call_count == 1
121108
assert mock_perform_ocr.call_count == 1
122109
assert mock_analyze.call_count == 1
123110
assert mock_verify.call_count == 1

0 commit comments

Comments
 (0)