Skip to content

Commit aa308d9

Browse files
committed
Merge remote-tracking branch 'origin/main' into new-release
2 parents ea5a4a8 + 6cda634 commit aa308d9

File tree

4 files changed

+88
-50
lines changed

4 files changed

+88
-50
lines changed

README.md

Lines changed: 32 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -42,24 +42,25 @@ snippet shows an example of how to scan using pre-built detectors.
4242
#### Sample Code
4343

4444
```python
45-
from nightfall import Confidence, DetectionRule, Detector, Nightfall
45+
>>> from nightfall import Confidence, DetectionRule, Detector, Nightfall
4646

47-
# By default, the client reads the API key from the environment variable NIGHTFALL_API_KEY
48-
nightfall = Nightfall()
47+
>>> # By default, the client reads the API key from the environment variable NIGHTFALL_API_KEY
48+
>>> nightfall = Nightfall()
4949

50-
# A rule contains a set of detectors to scan with
51-
detection_rule = DetectionRule([
52-
Detector(min_confidence=Confidence.LIKELY, nightfall_detector="CREDIT_CARD_NUMBER"),
53-
Detector(min_confidence=Confidence.POSSIBLE, nightfall_detector="US_SOCIAL_SECURITY_NUMBER"),
54-
])
50+
>>> # A rule contains a set of detectors to scan with
51+
>>> cc = Detector(min_confidence=Confidence.LIKELY, nightfall_detector="CREDIT_CARD_NUMBER")
52+
>>> ssn = Detector(min_confidence=Confidence.POSSIBLE, nightfall_detector="US_SOCIAL_SECURITY_NUMBER")
53+
>>> detection_rule = DetectionRule([cc, ssn])
5554

56-
findings, _ = nightfall.scan_text(
57-
["hello world", "my SSN is 678-99-8212", "4242-4242-4242-4242"],
58-
[detection_rule]
59-
)
55+
>>> findings, _ = nightfall.scan_text( ["hello world", "my SSN is 678-99-8212", "4242-4242-4242-4242"], detection_rules=[detection_rule])
56+
57+
>>> print(findings)
58+
[[], [Finding(finding='678-99-8212', redacted_finding=...)]]
6059

61-
print(findings)
6260
```
61+
62+
63+
6364
### Scanning Files
6465

6566
Scanning common file types like PDF's or office documents typically requires cumbersome text
@@ -79,26 +80,24 @@ The results from the scan are delivered by webhook; for more information about s
7980
#### Sample Code
8081

8182
```python
82-
from nightfall import Confidence, DetectionRule, Detector, Nightfall
83-
84-
# By default, the client reads the API key from the environment variable NIGHTFALL_API_KEY
85-
nightfall = Nightfall()
86-
87-
# A rule contains a set of detectors to scan with
88-
detection_rule = DetectionRule([
89-
Detector(min_confidence=Confidence.LIKELY, nightfall_detector="CREDIT_CARD_NUMBER"),
90-
Detector(min_confidence=Confidence.POSSIBLE, nightfall_detector="US_SOCIAL_SECURITY_NUMBER"),
91-
])
92-
93-
94-
# Upload the file and start the scan.
95-
# These are conducted asynchronously, so provide a webhook route to an HTTPS server to send results to.
96-
id, message = nightfall.scan_file(
97-
"./super-secret-credit-cards.pdf",
98-
"https://my-service.com/nightfall/listener",
99-
detection_rules=[detection_rule]
100-
)
101-
print("started scan", id, message)
83+
>>> from nightfall import Confidence, DetectionRule, Detector, Nightfall
84+
>>> import os
85+
86+
>>> # By default, the client reads the API key from the environment variable NIGHTFALL_API_KEY
87+
>>> nightfall = Nightfall()
88+
89+
>>> # A rule contains a set of detectors to scan with
90+
>>> cc = Detector(min_confidence=Confidence.LIKELY, nightfall_detector="CREDIT_CARD_NUMBER")
91+
>>> ssn = Detector(min_confidence=Confidence.POSSIBLE, nightfall_detector="US_SOCIAL_SECURITY_NUMBER")
92+
>>> detection_rule = DetectionRule([cc, ssn])
93+
94+
95+
>>> # Upload the file and start the scan.
96+
>>> # These are conducted asynchronously, so provide a webhook route to an HTTPS server to send results to.
97+
>>> id, message = nightfall.scan_file( "./README.md", os.environ["WEBHOOK_ENDPOINT"], detection_rules=[detection_rule])
98+
>>> print("started scan", id, message)
99+
started scan...scan initiated
100+
102101
```
103102

104103
## Contributing

nightfall/findings.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import List
1+
from typing import List, Optional, Any
22
from dataclasses import dataclass
33

44
from nightfall.detection_rules import Confidence
@@ -31,21 +31,25 @@ class Finding:
3131
byte_range (Range): The byte range in which a finding was detected within the item.
3232
codepoint_range (Range): The codepoint range in which a finding was detected within the item. This differs
3333
from byte range since a codepoint may contain multiple bytes.
34+
row_range (Range): The row in which a finding was detected, if it was in a tabular document. Index starts at 1.
35+
column_range (Range): The column(s) in which a finding was detected, if it was in a tabular document. Index starts at 1.
3436
commit_hash (str): The hash of the commit in which the finding was detected, if known.
3537
matched_detection_rule_uuids (List[str]): The list of detection rule UUIDs that contained a detector that
3638
triggered a match.
3739
matched_detection_rules (List[str]): The list of inline detection rules that contained a detector that triggered
3840
a match.
3941
"""
4042
finding: str
41-
redacted_finding: str
42-
before_context: str
43-
after_context: str
44-
detector_name: str
43+
redacted_finding: Optional[str]
44+
before_context: Optional[str]
45+
after_context: Optional[str]
46+
detector_name: Optional[str]
4547
detector_uuid: str
4648
confidence: Confidence
4749
byte_range: Range
4850
codepoint_range: Range
51+
row_range: Optional[Range]
52+
column_range: Optional[Range]
4953
commit_hash: str
5054
matched_detection_rule_uuids: List[str]
5155
matched_detection_rules: List[str]
@@ -62,7 +66,18 @@ def from_dict(cls, resp: dict) -> "Finding":
6266
Confidence[resp["confidence"]],
6367
Range(resp["location"]["byteRange"]["start"], resp["location"]["byteRange"]["end"]),
6468
Range(resp["location"]["codepointRange"]["start"], resp["location"]["codepointRange"]["end"]),
69+
_range_or_none(resp["location"]["rowRange"]),
70+
_range_or_none(resp["location"]["columnRange"]),
6571
resp["location"].get("commitHash", ""),
6672
resp["matchedDetectionRuleUUIDs"],
6773
resp["matchedDetectionRules"]
6874
)
75+
76+
def _range_or_none(range_or_none: Any) -> Optional[Range]:
77+
"""Some ranges are not always present, this function returns either None or a Range."""
78+
if range_or_none is None:
79+
return None
80+
start = range_or_none["start"]
81+
end = range_or_none["end"]
82+
return Range(start, end)
83+

pytest.ini

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,7 @@
22
markers =
33
filetest: marks tests as requiring a valid webhook to run
44
integration: marks tests as calling out to the nightfall api to run
5+
6+
addopts = --doctest-glob=README.md
7+
doctest_optionflags = ELLIPSIS
8+

tests/test_api.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,16 @@ def finding_orderer(f):
5757
"Credit Card Number",
5858
result[0][0].detector_uuid,
5959
Confidence.VERY_LIKELY,
60-
Range(0, 19), Range(0, 19), "",
60+
Range(0, 19), Range(0, 19), None, None, "",
6161
[], ["Inline Detection Rule #1"])
6262
assert result[0][1] == Finding(
6363
"489-36-8350",
6464
"[REDACTED]",
6565
"d number, ", " ssn",
66-
"",
66+
"US_SOCIAL_SECURITY_NUMBER",
6767
result[0][1].detector_uuid,
6868
Confidence.VERY_LIKELY,
69-
Range(46, 57), Range(46, 57), "",
69+
Range(46, 57), Range(46, 57), None, None, "",
7070
[], ["Inline Detection Rule #1"])
7171
assert len(redactions) == 1
7272
assert redactions[0] == "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number, [REDACTED] ssn"
@@ -120,7 +120,9 @@ def test_scan_text():
120120
{
121121
"start": 0,
122122
"end": 19
123-
}
123+
},
124+
"rowRange": None,
125+
"columnRange": None,
124126
},
125127
"redactedLocation":
126128
{
@@ -133,7 +135,9 @@ def test_scan_text():
133135
{
134136
"start": 0,
135137
"end": 19
136-
}
138+
},
139+
"rowRange": None,
140+
"columnRange": None,
137141
},
138142
"matchedDetectionRuleUUIDs":
139143
[],
@@ -164,7 +168,17 @@ def test_scan_text():
164168
{
165169
"start": 46,
166170
"end": 57
167-
}
171+
},
172+
"rowRange":
173+
{
174+
"start": 2,
175+
"end": 4,
176+
},
177+
"columnRange":
178+
{
179+
"start": 1,
180+
"end": 1,
181+
},
168182
},
169183
"redactedLocation":
170184
{
@@ -177,7 +191,9 @@ def test_scan_text():
177191
{
178192
"start": 46,
179193
"end": 56
180-
}
194+
},
195+
"rowRange": None,
196+
"columnRange": None,
181197
},
182198
"matchedDetectionRuleUUIDs":
183199
[],
@@ -318,7 +334,7 @@ def test_scan_text():
318334
"Credit Card Number",
319335
result[0][0].detector_uuid,
320336
Confidence.VERY_LIKELY,
321-
Range(0, 19), Range(0, 19), "",
337+
Range(0, 19), Range(0, 19), None, None, "",
322338
[], ["Inline Detection Rule #1"])
323339
assert result[0][1] == Finding(
324340
"489-36-8350",
@@ -327,7 +343,7 @@ def test_scan_text():
327343
"",
328344
result[0][1].detector_uuid,
329345
Confidence.VERY_LIKELY,
330-
Range(46, 57), Range(46, 57), "",
346+
Range(46, 57), Range(46, 57), Range(2,4), Range(1,1), "",
331347
[], ["Inline Detection Rule #1"])
332348
assert len(redactions) == 1
333349
assert redactions[0] == "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number, [REDACTED] ssn"
@@ -361,7 +377,9 @@ def test_scan_text_with_policy_uuids():
361377
{
362378
"start": 0,
363379
"end": 19
364-
}
380+
},
381+
"rowRange": None,
382+
"columnRange": None,
365383
},
366384
"redactedLocation":
367385
{
@@ -374,7 +392,9 @@ def test_scan_text_with_policy_uuids():
374392
{
375393
"start": 0,
376394
"end": 19
377-
}
395+
},
396+
"rowRange": None,
397+
"columnRange": None,
378398
},
379399
"matchedDetectionRuleUUIDs":
380400
["0d8efd7b-b87a-478b-984e-9cf5534a46bc"],
@@ -412,7 +432,7 @@ def test_scan_text_with_policy_uuids():
412432
"Credit Card Number",
413433
result[0][0].detector_uuid,
414434
Confidence.VERY_LIKELY,
415-
Range(0, 19), Range(0, 19), "",
435+
Range(0, 19), Range(0, 19), None, None, "",
416436
["0d8efd7b-b87a-478b-984e-9cf5534a46bc"], [])
417437
assert len(redactions) == 1
418438
assert redactions[0] == "491👀-👀👀👀👀-👀👀👀👀-👀👀👀👀 is my credit card number, [REDACTED] ssn"

0 commit comments

Comments
 (0)