@@ -20,38 +20,56 @@ def nightfall():
2020@pytest .mark .integration
2121def test_scan_text_detection_rules_v3 (nightfall ):
2222 result , redactions = nightfall .scan_text (
23- ["4916-6734-7572-5015 is my credit card number" ],
24- detection_rules = [DetectionRule (logical_op = LogicalOp .ANY , detectors = [
25- Detector (min_confidence = Confidence .LIKELY ,
26- min_num_findings = 1 ,
27- display_name = "Credit Card Number" ,
28- nightfall_detector = "CREDIT_CARD_NUMBER" ,
29- context_rules = [ContextRule (regex = Regex ("fake regex" , is_case_sensitive = False ),
30- window_before = 10 , window_after = 10 ,
31- fixed_confidence = Confidence .VERY_UNLIKELY )],
32- exclusion_rules = [ExclusionRule (MatchType .FULL ,
33- word_list = WordList (["never" , "match" ],
34- is_case_sensitive = True ))],
35- redaction_config = RedactionConfig (remove_finding = False ,
36- mask_config = MaskConfig (masking_char = '👀' ,
37- num_chars_to_leave_unmasked = 3 ,
38- chars_to_ignore = ["-" ])),
39- )])],
23+ ["4916-6734-7572-5015 is my credit card number, 489-36-8350 ssn" ],
24+ detection_rules = [
25+ DetectionRule (logical_op = LogicalOp .ANY , detectors = [
26+ Detector (min_confidence = Confidence .LIKELY ,
27+ min_num_findings = 1 ,
28+ display_name = "Credit Card Number" ,
29+ nightfall_detector = "CREDIT_CARD_NUMBER" ,
30+ context_rules = [ContextRule (regex = Regex ("fake regex" , is_case_sensitive = False ),
31+ window_before = 10 , window_after = 10 ,
32+ fixed_confidence = Confidence .VERY_UNLIKELY )],
33+ exclusion_rules = [ExclusionRule (MatchType .FULL ,
34+ word_list = WordList (["never" , "match" ],
35+ is_case_sensitive = True ))],
36+ redaction_config = RedactionConfig (remove_finding = False ,
37+ mask_config = MaskConfig (masking_char = '👀' ,
38+ num_chars_to_leave_unmasked = 3 ,
39+ chars_to_ignore = ["-" ])),
40+ ),
41+ Detector (min_confidence = Confidence .LIKELY , nightfall_detector = "US_SOCIAL_SECURITY_NUMBER" )])],
4042 context_bytes = 10 ,
43+ default_redaction_config = RedactionConfig (remove_finding = False , substitution_phrase = "[REDACTED]" )
4144 )
4245
4346 assert len (result ) == 1
47+ assert len (result [0 ]) == 2
48+
49+ def finding_orderer (f ):
50+ return f .codepoint_range .start
51+
52+ result [0 ].sort (key = finding_orderer )
4453 assert result [0 ][0 ] == Finding (
4554 "4916-6734-7572-5015" ,
46- ' 491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀' ,
55+ " 491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀" ,
4756 None , " is my cre" ,
4857 "Credit Card Number" ,
4958 result [0 ][0 ].detector_uuid ,
5059 Confidence .VERY_LIKELY ,
5160 Range (0 , 19 ), Range (0 , 19 ),
5261 [], ["Inline Detection Rule #1" ])
62+ assert result [0 ][1 ] == Finding (
63+ "489-36-8350" ,
64+ "[REDACTED]" ,
65+ "d number, " , " ssn" ,
66+ "" ,
67+ result [0 ][1 ].detector_uuid ,
68+ Confidence .VERY_LIKELY ,
69+ Range (46 , 57 ), Range (46 , 57 ),
70+ [], ["Inline Detection Rule #1" ])
5371 assert len (redactions ) == 1
54- assert redactions [0 ] == "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number"
72+ assert redactions [0 ] == "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number, [REDACTED] ssn "
5573
5674
5775@pytest .mark .filetest
@@ -123,41 +141,88 @@ def test_scan_text():
123141 [
124142 "Inline Detection Rule #1"
125143 ]
144+ },
145+ {
146+ "finding" : "489-36-8350" ,
147+ "redactedFinding" : "[REDACTED]" ,
148+ "beforeContext" : "d number, " ,
149+ "afterContext" : " ssn" ,
150+ "detector" :
151+ {
152+ "name" : "" ,
153+ "uuid" : "e30d9a87-f6c7-46b9-a8f4-16547901e069"
154+ },
155+ "confidence" : "VERY_LIKELY" ,
156+ "location" :
157+ {
158+ "byteRange" :
159+ {
160+ "start" : 46 ,
161+ "end" : 57
162+ },
163+ "codepointRange" :
164+ {
165+ "start" : 46 ,
166+ "end" : 57
167+ }
168+ },
169+ "redactedLocation" :
170+ {
171+ "byteRange" :
172+ {
173+ "start" : 46 ,
174+ "end" : 56
175+ },
176+ "codepointRange" :
177+ {
178+ "start" : 46 ,
179+ "end" : 56
180+ }
181+ },
182+ "matchedDetectionRuleUUIDs" :
183+ [],
184+ "matchedDetectionRules" :
185+ [
186+ "Inline Detection Rule #1"
187+ ]
126188 }
127189 ]
128190 ],
129191 "redactedPayload" :
130192 [
131- "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number"
193+ "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number, [REDACTED] ssn "
132194 ]
133195 })
134196 result , redactions = nightfall .scan_text (
135- ["4916-6734-7572-5015 is my credit card number" ],
136- detection_rules = [DetectionRule (logical_op = LogicalOp .ANY , detectors = [
137- Detector (min_confidence = Confidence .LIKELY ,
138- min_num_findings = 1 ,
139- display_name = "Credit Card Number" ,
140- nightfall_detector = "CREDIT_CARD_NUMBER" ,
141- context_rules = [ContextRule (regex = Regex ("fake regex" , is_case_sensitive = False ),
142- window_before = 10 , window_after = 10 ,
143- fixed_confidence = Confidence .VERY_UNLIKELY )],
144- exclusion_rules = [ExclusionRule (MatchType .FULL ,
145- word_list = WordList (["never" , "match" ],
146- is_case_sensitive = True ))],
147- redaction_config = RedactionConfig (remove_finding = False ,
148- mask_config = MaskConfig (masking_char = '👀' ,
149- num_chars_to_leave_unmasked = 3 ,
150- chars_to_ignore = ["-" ])),
151- )])],
197+ ["4916-6734-7572-5015 is my credit card number, 489-36-8350 ssn" ],
198+ detection_rules = [
199+ DetectionRule (logical_op = LogicalOp .ANY , detectors = [
200+ Detector (min_confidence = Confidence .LIKELY ,
201+ min_num_findings = 1 ,
202+ display_name = "Credit Card Number" ,
203+ nightfall_detector = "CREDIT_CARD_NUMBER" ,
204+ context_rules = [ContextRule (regex = Regex ("fake regex" , is_case_sensitive = False ),
205+ window_before = 10 , window_after = 10 ,
206+ fixed_confidence = Confidence .VERY_UNLIKELY )],
207+ exclusion_rules = [ExclusionRule (MatchType .FULL ,
208+ word_list = WordList (["never" , "match" ],
209+ is_case_sensitive = True ))],
210+ redaction_config = RedactionConfig (remove_finding = False ,
211+ mask_config = MaskConfig (masking_char = '👀' ,
212+ num_chars_to_leave_unmasked = 3 ,
213+ chars_to_ignore = ["-" ])),
214+ ),
215+ Detector (min_confidence = Confidence .LIKELY , nightfall_detector = "US_SOCIAL_SECURITY_NUMBER" )])],
152216 context_bytes = 10 ,
217+ default_redaction_config = RedactionConfig (remove_finding = False , substitution_phrase = "[REDACTED]" )
153218 )
154219
155220 assert len (responses .calls ) == 1
156221 assert responses .calls [0 ].request .headers .get ("Authorization" ) == "Bearer NF-NOT_REAL"
157222 assert json .loads (responses .calls [0 ].request .body ) == {
158223 "payload" :
159224 [
160- "4916-6734-7572-5015 is my credit card number"
225+ "4916-6734-7572-5015 is my credit card number, 489-36-8350 ssn "
161226 ],
162227 "config" :
163228 {
@@ -221,16 +286,31 @@ def test_scan_text():
221286 ]
222287 }
223288 }
289+ },
290+ {
291+ "minConfidence" : "LIKELY" ,
292+ "minNumFindings" : 1 ,
293+ "nightfallDetector" : "US_SOCIAL_SECURITY_NUMBER" ,
294+ "detectorType" : "NIGHTFALL_DETECTOR"
224295 }
225296 ],
226297 "logicalOp" : "ANY"
227298 }
228299 ],
229- "contextBytes" : 10
300+ "contextBytes" : 10 ,
301+ "defaultRedactionConfig" :
302+ {
303+ "removeFinding" : False ,
304+ "substitutionConfig" :
305+ {
306+ "substitutionPhrase" : "[REDACTED]"
307+ }
308+ }
230309 }
231310 }
232311
233312 assert len (result ) == 1
313+ assert len (result [0 ]) == 2
234314 assert result [0 ][0 ] == Finding (
235315 "4916-6734-7572-5015" ,
236316 '491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀' ,
@@ -240,8 +320,17 @@ def test_scan_text():
240320 Confidence .VERY_LIKELY ,
241321 Range (0 , 19 ), Range (0 , 19 ),
242322 [], ["Inline Detection Rule #1" ])
323+ assert result [0 ][1 ] == Finding (
324+ "489-36-8350" ,
325+ "[REDACTED]" ,
326+ "d number, " , " ssn" ,
327+ "" ,
328+ result [0 ][1 ].detector_uuid ,
329+ Confidence .VERY_LIKELY ,
330+ Range (46 , 57 ), Range (46 , 57 ),
331+ [], ["Inline Detection Rule #1" ])
243332 assert len (redactions ) == 1
244- assert redactions [0 ] == "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number"
333+ assert redactions [0 ] == "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number, [REDACTED] ssn "
245334
246335
247336def test_scan_text_no_detection_rules ():
@@ -263,7 +352,8 @@ def test_scan_file(tmpdir):
263352 responses .add (responses .POST , 'https://api.nightfall.ai/v3/upload/1/scan' , status = 200 ,
264353 json = {"id" : 1 , "message" : "scan_started" })
265354
266- id , message = nightfall .scan_file (file , "https://my-website.example/callback" , detection_rule_uuids = ["a_uuid" ], request_metadata = "some test data" )
355+ id , message = nightfall .scan_file (file , "https://my-website.example/callback" , detection_rule_uuids = ["a_uuid" ],
356+ request_metadata = "some test data" )
267357
268358 assert len (responses .calls ) == 5
269359 for call in responses .calls :
0 commit comments