Skip to content

Commit 2da466d

Browse files
author
Dan Hertz
committed
add default redaction config param
1 parent beffdbb commit 2da466d

File tree

2 files changed

+139
-43
lines changed

2 files changed

+139
-43
lines changed

nightfall/api.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from requests.adapters import HTTPAdapter
1616
from urllib3 import Retry
1717

18-
from nightfall.detection_rules import DetectionRule
18+
from nightfall.detection_rules import DetectionRule, RedactionConfig
1919
from nightfall.exceptions import NightfallUserError, NightfallSystemError
2020
from nightfall.findings import Finding
2121

@@ -56,7 +56,8 @@ def __init__(self, key: Optional[str] = None, signing_secret: Optional[str] = No
5656
}
5757

5858
def scan_text(self, texts: List[str], detection_rules: Optional[List[DetectionRule]] = None,
59-
detection_rule_uuids: Optional[List[str]] = None, context_bytes: Optional[int] = None) ->\
59+
detection_rule_uuids: Optional[List[str]] = None, context_bytes: Optional[int] = None,
60+
default_redaction_config: Optional[RedactionConfig] = None) ->\
6061
Tuple[List[List[Finding]], List[str]]:
6162
"""Scan text with Nightfall.
6263
@@ -74,6 +75,9 @@ def scan_text(self, texts: List[str], detection_rules: Optional[List[DetectionRu
7475
:type detection_rule_uuids: List[str] or None
7576
:param context_bytes: The number of bytes of context (leading and trailing) to return with any matched findings.
7677
:type context_bytes: int or None
78+
:param default_redaction_config: The default redaction configuration to apply to all detection rules, unless
79+
there is a more specific config within a detector.
80+
:type default_redaction_config: RedactionConfig or None
7781
:returns: list of findings, list of redacted input texts
7882
"""
7983

@@ -87,6 +91,8 @@ def scan_text(self, texts: List[str], detection_rules: Optional[List[DetectionRu
8791
config["detectionRules"] = [d.as_dict() for d in detection_rules]
8892
if context_bytes:
8993
config["contextBytes"] = context_bytes
94+
if default_redaction_config:
95+
config["defaultRedactionConfig"] = default_redaction_config.as_dict()
9096
request_body = {
9197
"payload": texts,
9298
"config": config

tests/test_api.py

Lines changed: 131 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -20,38 +20,56 @@ def nightfall():
2020
@pytest.mark.integration
2121
def test_scan_text_detection_rules_v3(nightfall):
2222
result, redactions = nightfall.scan_text(
23-
["4916-6734-7572-5015 is my credit card number"],
24-
detection_rules=[DetectionRule(logical_op=LogicalOp.ANY, detectors=[
25-
Detector(min_confidence=Confidence.LIKELY,
26-
min_num_findings=1,
27-
display_name="Credit Card Number",
28-
nightfall_detector="CREDIT_CARD_NUMBER",
29-
context_rules=[ContextRule(regex=Regex("fake regex", is_case_sensitive=False),
30-
window_before=10, window_after=10,
31-
fixed_confidence=Confidence.VERY_UNLIKELY)],
32-
exclusion_rules=[ExclusionRule(MatchType.FULL,
33-
word_list=WordList(["never", "match"],
34-
is_case_sensitive=True))],
35-
redaction_config=RedactionConfig(remove_finding=False,
36-
mask_config=MaskConfig(masking_char='👀',
37-
num_chars_to_leave_unmasked=3,
38-
chars_to_ignore=["-"])),
39-
)])],
23+
["4916-6734-7572-5015 is my credit card number, 489-36-8350 ssn"],
24+
detection_rules=[
25+
DetectionRule(logical_op=LogicalOp.ANY, detectors=[
26+
Detector(min_confidence=Confidence.LIKELY,
27+
min_num_findings=1,
28+
display_name="Credit Card Number",
29+
nightfall_detector="CREDIT_CARD_NUMBER",
30+
context_rules=[ContextRule(regex=Regex("fake regex", is_case_sensitive=False),
31+
window_before=10, window_after=10,
32+
fixed_confidence=Confidence.VERY_UNLIKELY)],
33+
exclusion_rules=[ExclusionRule(MatchType.FULL,
34+
word_list=WordList(["never", "match"],
35+
is_case_sensitive=True))],
36+
redaction_config=RedactionConfig(remove_finding=False,
37+
mask_config=MaskConfig(masking_char='👀',
38+
num_chars_to_leave_unmasked=3,
39+
chars_to_ignore=["-"])),
40+
),
41+
Detector(min_confidence=Confidence.LIKELY, nightfall_detector="US_SOCIAL_SECURITY_NUMBER")])],
4042
context_bytes=10,
43+
default_redaction_config=RedactionConfig(remove_finding=False, substitution_phrase="[REDACTED]")
4144
)
4245

4346
assert len(result) == 1
47+
assert len(result[0]) == 2
48+
49+
def finding_orderer(f):
50+
return f.codepoint_range.start
51+
52+
result[0].sort(key=finding_orderer)
4453
assert result[0][0] == Finding(
4554
"4916-6734-7572-5015",
46-
'491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀',
55+
"491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀",
4756
None, " is my cre",
4857
"Credit Card Number",
4958
result[0][0].detector_uuid,
5059
Confidence.VERY_LIKELY,
5160
Range(0, 19), Range(0, 19),
5261
[], ["Inline Detection Rule #1"])
62+
assert result[0][1] == Finding(
63+
"489-36-8350",
64+
"[REDACTED]",
65+
"d number, ", " ssn",
66+
"",
67+
result[0][1].detector_uuid,
68+
Confidence.VERY_LIKELY,
69+
Range(46, 57), Range(46, 57),
70+
[], ["Inline Detection Rule #1"])
5371
assert len(redactions) == 1
54-
assert redactions[0] == "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number"
72+
assert redactions[0] == "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number, [REDACTED] ssn"
5573

5674

5775
@pytest.mark.filetest
@@ -123,41 +141,88 @@ def test_scan_text():
123141
[
124142
"Inline Detection Rule #1"
125143
]
144+
},
145+
{
146+
"finding": "489-36-8350",
147+
"redactedFinding": "[REDACTED]",
148+
"beforeContext": "d number, ",
149+
"afterContext": " ssn",
150+
"detector":
151+
{
152+
"name": "",
153+
"uuid": "e30d9a87-f6c7-46b9-a8f4-16547901e069"
154+
},
155+
"confidence": "VERY_LIKELY",
156+
"location":
157+
{
158+
"byteRange":
159+
{
160+
"start": 46,
161+
"end": 57
162+
},
163+
"codepointRange":
164+
{
165+
"start": 46,
166+
"end": 57
167+
}
168+
},
169+
"redactedLocation":
170+
{
171+
"byteRange":
172+
{
173+
"start": 46,
174+
"end": 56
175+
},
176+
"codepointRange":
177+
{
178+
"start": 46,
179+
"end": 56
180+
}
181+
},
182+
"matchedDetectionRuleUUIDs":
183+
[],
184+
"matchedDetectionRules":
185+
[
186+
"Inline Detection Rule #1"
187+
]
126188
}
127189
]
128190
],
129191
"redactedPayload":
130192
[
131-
"491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number"
193+
"491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number, [REDACTED] ssn"
132194
]
133195
})
134196
result, redactions = nightfall.scan_text(
135-
["4916-6734-7572-5015 is my credit card number"],
136-
detection_rules=[DetectionRule(logical_op=LogicalOp.ANY, detectors=[
137-
Detector(min_confidence=Confidence.LIKELY,
138-
min_num_findings=1,
139-
display_name="Credit Card Number",
140-
nightfall_detector="CREDIT_CARD_NUMBER",
141-
context_rules=[ContextRule(regex=Regex("fake regex", is_case_sensitive=False),
142-
window_before=10, window_after=10,
143-
fixed_confidence=Confidence.VERY_UNLIKELY)],
144-
exclusion_rules=[ExclusionRule(MatchType.FULL,
145-
word_list=WordList(["never", "match"],
146-
is_case_sensitive=True))],
147-
redaction_config=RedactionConfig(remove_finding=False,
148-
mask_config=MaskConfig(masking_char='👀',
149-
num_chars_to_leave_unmasked=3,
150-
chars_to_ignore=["-"])),
151-
)])],
197+
["4916-6734-7572-5015 is my credit card number, 489-36-8350 ssn"],
198+
detection_rules=[
199+
DetectionRule(logical_op=LogicalOp.ANY, detectors=[
200+
Detector(min_confidence=Confidence.LIKELY,
201+
min_num_findings=1,
202+
display_name="Credit Card Number",
203+
nightfall_detector="CREDIT_CARD_NUMBER",
204+
context_rules=[ContextRule(regex=Regex("fake regex", is_case_sensitive=False),
205+
window_before=10, window_after=10,
206+
fixed_confidence=Confidence.VERY_UNLIKELY)],
207+
exclusion_rules=[ExclusionRule(MatchType.FULL,
208+
word_list=WordList(["never", "match"],
209+
is_case_sensitive=True))],
210+
redaction_config=RedactionConfig(remove_finding=False,
211+
mask_config=MaskConfig(masking_char='👀',
212+
num_chars_to_leave_unmasked=3,
213+
chars_to_ignore=["-"])),
214+
),
215+
Detector(min_confidence=Confidence.LIKELY, nightfall_detector="US_SOCIAL_SECURITY_NUMBER")])],
152216
context_bytes=10,
217+
default_redaction_config=RedactionConfig(remove_finding=False, substitution_phrase="[REDACTED]")
153218
)
154219

155220
assert len(responses.calls) == 1
156221
assert responses.calls[0].request.headers.get("Authorization") == "Bearer NF-NOT_REAL"
157222
assert json.loads(responses.calls[0].request.body) == {
158223
"payload":
159224
[
160-
"4916-6734-7572-5015 is my credit card number"
225+
"4916-6734-7572-5015 is my credit card number, 489-36-8350 ssn"
161226
],
162227
"config":
163228
{
@@ -221,16 +286,31 @@ def test_scan_text():
221286
]
222287
}
223288
}
289+
},
290+
{
291+
"minConfidence": "LIKELY",
292+
"minNumFindings": 1,
293+
"nightfallDetector": "US_SOCIAL_SECURITY_NUMBER",
294+
"detectorType": "NIGHTFALL_DETECTOR"
224295
}
225296
],
226297
"logicalOp": "ANY"
227298
}
228299
],
229-
"contextBytes": 10
300+
"contextBytes": 10,
301+
"defaultRedactionConfig":
302+
{
303+
"removeFinding": False,
304+
"substitutionConfig":
305+
{
306+
"substitutionPhrase": "[REDACTED]"
307+
}
308+
}
230309
}
231310
}
232311

233312
assert len(result) == 1
313+
assert len(result[0]) == 2
234314
assert result[0][0] == Finding(
235315
"4916-6734-7572-5015",
236316
'491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀',
@@ -240,8 +320,17 @@ def test_scan_text():
240320
Confidence.VERY_LIKELY,
241321
Range(0, 19), Range(0, 19),
242322
[], ["Inline Detection Rule #1"])
323+
assert result[0][1] == Finding(
324+
"489-36-8350",
325+
"[REDACTED]",
326+
"d number, ", " ssn",
327+
"",
328+
result[0][1].detector_uuid,
329+
Confidence.VERY_LIKELY,
330+
Range(46, 57), Range(46, 57),
331+
[], ["Inline Detection Rule #1"])
243332
assert len(redactions) == 1
244-
assert redactions[0] == "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number"
333+
assert redactions[0] == "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number, [REDACTED] ssn"
245334

246335

247336
def test_scan_text_no_detection_rules():
@@ -263,7 +352,8 @@ def test_scan_file(tmpdir):
263352
responses.add(responses.POST, 'https://api.nightfall.ai/v3/upload/1/scan', status=200,
264353
json={"id": 1, "message": "scan_started"})
265354

266-
id, message = nightfall.scan_file(file, "https://my-website.example/callback", detection_rule_uuids=["a_uuid"], request_metadata="some test data")
355+
id, message = nightfall.scan_file(file, "https://my-website.example/callback", detection_rule_uuids=["a_uuid"],
356+
request_metadata="some test data")
267357

268358
assert len(responses.calls) == 5
269359
for call in responses.calls:

0 commit comments

Comments
 (0)