Skip to content

Commit 0440d2c

Browse files
author
rootware
committed
working refactor so far. Need to start testing
1 parent cdf96e8 commit 0440d2c

File tree

8 files changed

+148
-170
lines changed

8 files changed

+148
-170
lines changed

classes.png

41.4 KB
Loading

packages.png

26.6 KB
Loading

pyredactkit/common_jobs.py

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
""" Common jobs class implementation """
2-
import mimetypes
32
import os
43
import sys
5-
import re
64
import math
75
import json
8-
import uuid
6+
97

108
from pyredactkit.identifiers import Identifier
119
id_object = Identifier()
1210

11+
1312
class CommonJobs:
1413
"""Common Jobs class
1514
Class containing all methods to support redaction
@@ -18,6 +17,7 @@ class CommonJobs:
1817
Static variables:
1918
block (unicode string): To redact sensitive data
2019
"""
20+
dir_create = " directory does not exist, creating it."
2121

2222
def __init__(self) -> None:
2323
"""
@@ -55,7 +55,7 @@ def valid_options(self) -> tuple:
5555
option_tuple += id['type']
5656
return option_tuple
5757

58-
def process_report(self, filename, savedir="./"):
58+
def process_report(self, filename):
5959
"""Function to process calculate and generate report of man hour saved.
6060
Args:
6161
filename (str): File to count the words
@@ -66,18 +66,6 @@ def process_report(self, filename, savedir="./"):
6666
try:
6767
# Open a file read pointer as target_file
6868
with open(filename, encoding="utf-8") as target_file:
69-
if savedir != "./" and savedir[-1] != "/":
70-
savedir = savedir + "/"
71-
72-
# created the directory if not present
73-
if not os.path.exists(os.path.dirname(savedir)):
74-
print(
75-
"[+] "
76-
+ os.path.dirname(savedir)
77-
+ f"{self.dir_create}"
78-
)
79-
os.makedirs(os.path.dirname(savedir))
80-
8169
text_chunk = target_file.read()
8270

8371
# Words per minute
@@ -101,16 +89,6 @@ def process_report(self, filename, savedir="./"):
10189
print(word_report)
10290
print(minutes_saved)
10391
print(man_hours_saved)
104-
# Open a file write pointer as result
105-
# with open(
106-
# f"{savedir}manhours_saved_{os.path.basename(filename)}",
107-
# "w",
108-
# encoding="utf-8",
109-
# ) as result:
110-
# result.write(word_report + "\n" +
111-
# minutes_saved + "\n" + man_hours_saved)
112-
# print(
113-
# f"[+] Estimated man hours saved report saved to {savedir}manhours_saved_{os.path.basename(filename)}")
11492

11593
except UnicodeDecodeError:
11694
os.remove(f"manhour_saved_report_{os.path.basename(filename)}")

pyredactkit/core_redactor.py

Lines changed: 3 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
""" Core redactor engine class implementation """
2-
32
from pyredactkit.common_jobs import CommonJobs
4-
import mimetypes
3+
from pyredactkit.identifiers import Identifier
54
import os
65
import sys
76
import re
8-
import math
9-
import json
107
import uuid
118

12-
from pyredactkit.identifiers import Identifier
9+
# Instantiate identifier and commonjobs objects
1310
id_object = Identifier()
1411
cj_object = CommonJobs()
1512
""" Coreredactor library """
@@ -38,58 +35,6 @@ def __init__(self) -> None:
3835
"""
3936
return None
4037

41-
def read_custom_patterns(self, custom_file) -> list:
42-
'''Load Rules
43-
Loads either a default ruleset or a self defined ruleset.
44-
Rules are loaded to patterns variable
45-
Args:
46-
custom_file (str): Custom rule file to be loaded
47-
Returns:
48-
patterns (list): list of custom patterns
49-
'''
50-
try:
51-
with open(custom_file, encoding="utf-8") as customfile:
52-
return json.load(customfile)
53-
except FileNotFoundError:
54-
sys.exit("[-] Pattern file was not found")
55-
except json.JSONDecodeError:
56-
sys.exit("[-] Issue decoding json file. This might be an error with your regex pattern.")
57-
58-
def write_hashmap(self, hash_map=dict, filename=str, savedir="./") -> dict:
59-
"""Function that writes a .hashshadow_file.txt.json to os directory.
60-
Args:
61-
hash_map (dictionary): dictionary object to be written to file.
62-
filename (str): name of supplied file
63-
64-
Returns:
65-
Writes .hashshadow_file.txt.json to os directory
66-
"""
67-
with open(f"{savedir}.hashshadow_{os.path.basename(filename)}.json", "w", encoding="utf-8") as file:
68-
json.dump(hash_map, file)
69-
70-
def redact_custom(self, line=str, customfile=str) -> tuple:
71-
"""Function to redact custom option
72-
Args:
73-
line (str) : line to be supplied to redact
74-
option (str): (optional) choice for redaction
75-
filename (str): name of supplied file
76-
77-
Returns:
78-
line (str): redacted line
79-
kv_pair (dict) : key value pair of uuid to sensitive data.
80-
"""
81-
custom_pattern = self.read_custom_patterns(customfile)
82-
kv_pairs = {}
83-
for id in range(len(custom_pattern)):
84-
redact_pattern = custom_pattern[id]['pattern']
85-
if re.search(redact_pattern, line, re.IGNORECASE):
86-
pattern_string = re.search(redact_pattern, line)
87-
pattern_string = pattern_string.group(0)
88-
masked_data = str(uuid.uuid4())
89-
kv_pairs.update({masked_data: pattern_string})
90-
line = re.sub(redact_pattern, masked_data, line)
91-
return line, kv_pairs
92-
9338
def redact_all(self, line=str) -> tuple:
9439
"""Function to redact specific option
9540
Args:
@@ -110,21 +55,6 @@ def redact_all(self, line=str) -> tuple:
11055
line = re.sub(redact_pattern, masked_data, line)
11156
return line, hash_map
11257

113-
def redact_name(self, data=str) -> tuple:
114-
"""Main function to redact
115-
Args:
116-
data (str) : data to be supplied to identify names
117-
118-
Returns:
119-
data (str) : redacted names from the data
120-
name_count (int) : number of names redacted from the data
121-
"""
122-
name_list = id_object.names(data)
123-
name_count = len(name_list)
124-
for name in name_list:
125-
data = data.replace(name, self.block)
126-
return data, name_count
127-
12858
def process_text(self, text=str, savedir="./"):
12959
"""Function to process supplied text from cli.
13060
Args:
@@ -153,72 +83,6 @@ def process_text(self, text=str, savedir="./"):
15383
print(
15484
f"[+] Redacted and results saved to {os.path.basename(generated_file)}")
15585

156-
def process_custom_file(self, file_name, customfile=str, make_dir="./"):
157-
"""Function to process supplied file with custom regex file from cli.
158-
Args:
159-
file_name (str): File to redact
160-
customfile (str): custom regex pattern for redaction
161-
make_dir (str): [Optional] directory to place results
162-
163-
Returns:
164-
Creates redacted file.
165-
"""
166-
redact_count = 0
167-
secret_map = {}
168-
try:
169-
# Open a file read pointer as target_file
170-
with open(file_name, encoding="utf-8") as target_file:
171-
if make_dir != "./" and make_dir[-1] != "/":
172-
make_dir = make_dir + "/"
173-
174-
# created the directory if not present
175-
if not os.path.exists(os.path.dirname(make_dir)):
176-
print(
177-
"[+] "
178-
+ os.path.dirname(make_dir)
179-
+ f"{self.dir_create}"
180-
)
181-
os.makedirs(os.path.dirname(make_dir))
182-
183-
print(
184-
"[+] Processing starts now. This may take some time "
185-
"depending on the file size. Monitor the redacted file "
186-
"size to monitor progress"
187-
)
188-
189-
# Open a file write pointer as result
190-
with open(
191-
f"{make_dir}redacted_{os.path.basename(file_name)}",
192-
"w",
193-
encoding="utf-8",
194-
) as result:
195-
# The supplied custom regex pattern file will be used to redact the file
196-
print(f"[+] {customfile} file supplied, will be redacting all supplied custom regex patterns")
197-
secret_map = {}
198-
custom_pattern = self.read_custom_patterns(customfile)
199-
for line in target_file:
200-
# redact_count elements to be redacted
201-
for id in range(len(custom_pattern)):
202-
if re.search(custom_pattern[id]['pattern'], line):
203-
redact_count += 1
204-
# redact all and write hashshadow
205-
data = self.redact_custom(line, customfile)
206-
redacted_line = data[0]
207-
kv_pairs = data[1]
208-
secret_map.update(kv_pairs)
209-
result.write(redacted_line)
210-
cj_object.write_hashmap(secret_map, file_name, make_dir)
211-
print(
212-
f"[+] .hashshadow_{os.path.basename(file_name)}.json file generated. Keep this safe if you need to undo the redaction.")
213-
print(f"[+] Redacted {redact_count} targets...")
214-
print(
215-
f"[+] Redacted results saved to {make_dir}redacted_{os.path.basename(file_name)}")
216-
217-
except UnicodeDecodeError:
218-
os.remove(f"{make_dir}redacted_{os.path.basename(file_name)}")
219-
print("[-] Removed incomplete redact file")
220-
sys.exit("[-] Unable to read file")
221-
22286
def process_core_file(self, filename, savedir="./"):
22387
"""Function to process supplied file from cli.
22488
Args:
@@ -277,6 +141,7 @@ def process_core_file(self, filename, savedir="./"):
277141
print(f"[+] Redacted {count} targets...")
278142
print(
279143
f"[+] Redacted results saved to {savedir}redacted_{os.path.basename(filename)}")
144+
cj_object.process_report(filename)
280145

281146
except UnicodeDecodeError:
282147
os.remove(f"{savedir}redacted_{os.path.basename(filename)}")

pyredactkit/custom_redactor.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
""" Custom redactor engine class implementation """
2+
import os
3+
import sys
4+
import re
5+
import json
6+
import uuid
7+
from pyredactkit.common_jobs import CommonJobs
8+
cj_object = CommonJobs()
9+
10+
11+
class CustomRedactorEngine:
12+
"""CustomRedactorEngine class
13+
Class containing all methods to support redaction
14+
of custom sensitive data type defined by user
15+
16+
Static variables:
17+
None
18+
"""
19+
dir_create = " directory does not exist, creating it."
20+
21+
def __init__(self) -> None:
22+
"""
23+
Class Initialization
24+
Args:
25+
None
26+
27+
Returns:
28+
None
29+
"""
30+
return None
31+
32+
def read_custom_patterns(self, custom_file) -> list:
33+
'''Load Rules
34+
Loads either a default ruleset or a self defined ruleset.
35+
Rules are loaded to patterns variable
36+
Args:
37+
custom_file (str): Custom rule file to be loaded
38+
Returns:
39+
patterns (list): list of custom patterns
40+
'''
41+
try:
42+
with open(custom_file, encoding="utf-8") as customfile:
43+
return json.load(customfile)
44+
except FileNotFoundError:
45+
sys.exit("[-] Pattern file was not found")
46+
except json.JSONDecodeError:
47+
sys.exit("[-] Issue decoding json file. This might be an error with your regex pattern.")
48+
49+
def redact_custom(self, line=str, customfile=str) -> tuple:
50+
"""Function to redact custom option
51+
Args:
52+
line (str) : line to be supplied to redact
53+
option (str): (optional) choice for redaction
54+
filename (str): name of supplied file
55+
56+
Returns:
57+
line (str): redacted line
58+
kv_pair (dict) : key value pair of uuid to sensitive data.
59+
"""
60+
custom_pattern = self.read_custom_patterns(customfile)
61+
kv_pairs = {}
62+
for id in range(len(custom_pattern)):
63+
redact_pattern = custom_pattern[id]['pattern']
64+
if re.search(redact_pattern, line, re.IGNORECASE):
65+
pattern_string = re.search(redact_pattern, line)
66+
pattern_string = pattern_string.group(0)
67+
masked_data = str(uuid.uuid4())
68+
kv_pairs.update({masked_data: pattern_string})
69+
line = re.sub(redact_pattern, masked_data, line)
70+
return line, kv_pairs
71+
72+
def process_custom_file(self, file_name, customfile=str, make_dir="./"):
73+
"""Function to process supplied file with custom regex file from cli.
74+
Args:
75+
file_name (str): File to redact
76+
customfile (str): custom regex pattern for redaction
77+
make_dir (str): [Optional] directory to place results
78+
79+
Returns:
80+
Creates redacted file.
81+
"""
82+
redact_count = 0
83+
secret_map = {}
84+
try:
85+
# Open a file read pointer as target_file
86+
with open(file_name, encoding="utf-8") as target_file:
87+
if make_dir != "./" and make_dir[-1] != "/":
88+
make_dir = make_dir + "/"
89+
90+
# created the directory if not present
91+
if not os.path.exists(os.path.dirname(make_dir)):
92+
print(
93+
"[+] "
94+
+ os.path.dirname(make_dir)
95+
+ f"{self.dir_create}"
96+
)
97+
os.makedirs(os.path.dirname(make_dir))
98+
99+
print(
100+
"[+] Processing starts now. This may take some time "
101+
"depending on the file size. Monitor the redacted file "
102+
"size to monitor progress"
103+
)
104+
105+
# Open a file write pointer as result
106+
with open(
107+
f"{make_dir}redacted_{os.path.basename(file_name)}",
108+
"w",
109+
encoding="utf-8",
110+
) as result:
111+
# The supplied custom regex pattern file will be used to redact the file
112+
print(f"[+] {customfile} file supplied, will be redacting all supplied custom regex patterns")
113+
secret_map = {}
114+
custom_pattern = self.read_custom_patterns(customfile)
115+
for line in target_file:
116+
# redact_count elements to be redacted
117+
for id in range(len(custom_pattern)):
118+
if re.search(custom_pattern[id]['pattern'], line):
119+
redact_count += 1
120+
# redact all and write hashshadow
121+
data = self.redact_custom(line, customfile)
122+
redacted_line = data[0]
123+
kv_pairs = data[1]
124+
secret_map.update(kv_pairs)
125+
result.write(redacted_line)
126+
cj_object.write_hashmap(secret_map, file_name, make_dir)
127+
print(
128+
f"[+] .hashshadow_{os.path.basename(file_name)}.json file generated. Keep this safe if you need to undo the redaction.")
129+
print(f"[+] Redacted {redact_count} targets...")
130+
print(
131+
f"[+] Redacted results saved to {make_dir}redacted_{os.path.basename(file_name)}")
132+
cj_object.process_report(file_name)
133+
134+
except UnicodeDecodeError:
135+
os.remove(f"{make_dir}redacted_{os.path.basename(file_name)}")
136+
print("[-] Removed incomplete redact file")
137+
sys.exit("[-] Unable to read file")

0 commit comments

Comments
 (0)