Skip to content

Commit 8bf1476

Browse files
letmerecallketakipaiguydkoryf
authored
Merging dev into main (#19)
* Updated README to include blocked-list and restructured * Adding minor suggestion by Guy Co-authored-by: Guy Dumais <[email protected]> * update the readme with the blocked-list details * update with new REST route * Update README.md Co-authored-by: koryf <[email protected]> * Update README.md Co-authored-by: koryf <[email protected]> * keeping the old REST path until 3.0 is released. * Update pii_dict format according to deid 3.0.0beta3 * Fix failing pre-commit hook fails on directories (#17) * Add test for get flagged lines * Skip PII flag check for directories --------- Co-authored-by: ketakipai <[email protected]> Co-authored-by: ketakipai <[email protected]> Co-authored-by: Guy Dumais <[email protected]> Co-authored-by: Guy Dumais <[email protected]> Co-authored-by: koryf <[email protected]>
1 parent f2b7fa6 commit 8bf1476

File tree

8 files changed

+36
-6
lines changed

8 files changed

+36
-6
lines changed

pii_check/pii_check_hook.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def get_payload(content, enabled_entity_list, blocked_list):
3737
def get_flagged_lines(files):
3838
flagged = []
3939
for file in files:
40-
if os.path.exists(file):
40+
if os.path.exists(file) and not os.path.isdir(file):
4141
with open(file, "r") as fp:
4242
lines = fp.readlines()
4343
start_flag = False
@@ -77,8 +77,8 @@ def locate_pii_in_files(content, files, checked, pii_dict):
7777
for number, line in enumerate(lines, 1):
7878
if content in line:
7979
if (
80-
pii_dict["stt_idx"],
81-
pii_dict["end_idx"],
80+
pii_dict["location"]["stt_idx"],
81+
pii_dict["location"]["end_idx"],
8282
number,
8383
file,
8484
) in checked:
@@ -115,16 +115,16 @@ def check_for_pii(url, api_key, enabled_entity_list, blocked_list):
115115
continue
116116
for pii_dict in item["entities"]:
117117
line, file = locate_pii_in_files(content, files, checked, pii_dict)
118-
checked.append((pii_dict["stt_idx"], pii_dict["end_idx"], line, file))
118+
checked.append((pii_dict["location"]["stt_idx"], pii_dict["location"]["end_idx"], line, file))
119119
skip = False
120120
for item in flagged:
121121
if line > item[0] and line < item[1] and file == item[2]:
122122
skip = True
123123
break
124124
if skip == False:
125125
msg.append(
126-
f"PII found - type: {pii_dict['best_label']}, line number: {line}, file: {file}, start index: {pii_dict['stt_idx'] + 1}, end "
127-
f"index: {pii_dict['end_idx'] + 1} "
126+
f"PII found - type: {pii_dict['best_label']}, line number: {line}, file: {file}, start index: {pii_dict['location']['stt_idx'] + 1}, end "
127+
f"index: {pii_dict['location']['end_idx'] + 1} "
128128
)
129129

130130
if not msg:

requirements-test.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
pytest==7.2.1
2+
pytest-check==2.1.2
3+
python-dotenv==0.19.0
4+
requests==2.28.1

tests/__init__.py

Whitespace-only changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Credit card number: 1234 5678 9101 1123
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
PII_CHECK:OFF
2+
Some content in between the flags. Ideally this content won't be checked for PII.
3+
Below is a dummy PII to check this
4+
Credit card number: 1234 5678 9101 1123
5+
CVV: 123
6+
PII_CHECK:ON
7+
8+
Some content where the check will be performed.
9+
Credit card number: 1234 5678 9101 1123
10+
CVV: 123
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Here's some content.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
./dir_with_files

tests/test_get_flagged_lines.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import pytest_check as check
2+
from pii_check.pii_check_hook import get_flagged_lines
3+
4+
5+
def test_get_flagged_lines():
6+
files = [
7+
"tests/test_data/dir_with_files/file_with_pii.txt", "tests/test_data/dir_with_files/file_without_pii.txt",
8+
"tests/test_data/dir_with_files/file_with_pii_flag_on", "tests/test_data/dir_with_files/file_with_pii_flag_off",
9+
"tests/test_data/dir_with_files/file_with_pii_flag", "tests/test_data/symlink_of_dir_with_files"
10+
]
11+
res = get_flagged_lines(files)
12+
check.equal(res, [(1, 6, 'tests/test_data/dir_with_files/file_with_pii_flag')])
13+

0 commit comments

Comments
 (0)