Skip to content

Commit d4bfebb

Browse files
authored
Merge pull request #37 from brootware/dev
Dev
2 parents b19f1f3 + ef8a50c commit d4bfebb

File tree

11 files changed

+87
-204
lines changed

11 files changed

+87
-204
lines changed

adhocscripts/file_handling.py

Lines changed: 0 additions & 34 deletions
This file was deleted.

adhocscripts/hash_poc.py

Lines changed: 0 additions & 90 deletions
This file was deleted.

adhocscripts/unredact.py

Lines changed: 0 additions & 30 deletions
This file was deleted.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "pyredactkit"
3-
version = "0.2.4"
3+
version = "0.2.5"
44
description = "Python cli tool to redact sensitive data"
55
authors = ["brootware <[email protected]>"]
66
license = "GPL-3.0-or-later"

pyredactkit/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#! /usr/bin/env python3
22

33
"""
4-
Py Redact Kit: Py Redact Kit type.
4+
Py Redact Kit: Py Redact Kit version checker.
55
"""
66

77
import sys

pyredactkit/identifiers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class Identifier:
2828
"type": ("ip", "ipv4")
2929
},
3030
{
31-
"pattern": r'((?:(?:\\d{4}[- ]?){3}\\d{4}|\\d{15,16}))(?![\\d])',
31+
"pattern": '((?:(?:\\d{4}[- ]?){3}\\d{4}|\\d{15,16}))(?![\\d])',
3232
"type": ("cc", "creditcard")
3333
},
3434
{

pyredactkit/pyredactkit.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
"""
55

66
import argparse
7-
from ast import arg
87
from pyredactkit.redact import Redactor
98
from pyredactkit.unredact import Unredactor
109
import os

pyredactkit/redact.py

Lines changed: 67 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,6 @@ def write_hashmap(self, hash_map=dict, filename=str):
9292
"""
9393
with open(f".hashshadow_{os.path.basename(filename)}.json", "w", encoding="utf-8") as file:
9494
json.dump(hash_map, file)
95-
print(
96-
f"[ + ].hashshadow_{os.path.basename(filename)}.json file generated. Keep this safe if you need to undo the redaction.")
9795

9896
def valid_options(self):
9997
"""Function to read in valid options from Identifier.regexes
@@ -108,7 +106,7 @@ def valid_options(self):
108106
option_tuple += id['type']
109107
return option_tuple
110108

111-
def redact_specific(self, line=str, option=str, filename=str):
109+
def redact_specific(self, line=str, option=str):
112110
"""Function to redact specific option
113111
Args:
114112
line (str) : line to be supplied to redact
@@ -117,23 +115,41 @@ def redact_specific(self, line=str, option=str, filename=str):
117115
118116
Returns:
119117
line (str): redacted line
118+
kv_pair (dict) : key value pair of uuid to sensitive data.
120119
"""
121-
hash_map = {}
122-
120+
kv_pairs = {}
123121
for id in id_object.regexes:
124122
redact_pattern = id['pattern']
125123
if option in id['type'] and re.search(
126-
redact_pattern, line, flags=re.IGNORECASE):
124+
redact_pattern, line):
127125
pattern_string = re.search(
128-
redact_pattern, line, flags=re.IGNORECASE)
126+
redact_pattern, line)
129127
pattern_string = pattern_string.group(0)
130128
masked_data = str(uuid.uuid4())
131-
hash_map.update({masked_data: pattern_string})
129+
kv_pairs.update({masked_data: pattern_string})
132130
line = re.sub(
133-
redact_pattern, masked_data, line, flags=re.IGNORECASE)
131+
redact_pattern, masked_data, line)
132+
return line, kv_pairs
133+
134+
def redact_all(self, line=str):
135+
"""Function to redact specific option
136+
Args:
137+
line (str) : line to be supplied to redact
134138
135-
self.write_hashmap(hash_map, filename)
136-
return line
139+
Returns:
140+
line (str): redacted line
141+
kv_pair (dict) : key value pair of uuid to sensitive data.
142+
"""
143+
hash_map = {}
144+
for id in id_object.regexes:
145+
redact_pattern = id['pattern']
146+
if re.search(redact_pattern, line):
147+
pattern_string = re.search(redact_pattern, line)
148+
pattern_string = pattern_string.group(0)
149+
masked_data = str(uuid.uuid4())
150+
hash_map.update({masked_data: pattern_string})
151+
line = re.sub(redact_pattern, masked_data, line)
152+
return line, hash_map
137153

138154
def redact_name(self, data=str):
139155
"""Main function to redact
@@ -171,14 +187,14 @@ def process_file(self, filename, option=str, savedir="./"):
171187
# created the directory if not present
172188
if not os.path.exists(os.path.dirname(savedir)):
173189
print(
174-
"[ + ] "
190+
"[+] "
175191
+ os.path.dirname(savedir)
176192
+ " directory does not exist, creating it."
177193
)
178194
os.makedirs(os.path.dirname(savedir))
179195

180196
print(
181-
"[ + ] Processing starts now. This may take some time "
197+
"[+] Processing starts now. This may take some time "
182198
"depending on the file size. Monitor the redacted file "
183199
"size to monitor progress"
184200
)
@@ -192,22 +208,22 @@ def process_file(self, filename, option=str, savedir="./"):
192208
# Check if any redaction type option is given in argument. If none, will redact all sensitive data.
193209
if type(option) is not str:
194210
print(
195-
f"[ + ] No option supplied, will be redacting all the sensitive data supported")
211+
f"[+] No option supplied, will be redacting all the sensitive data supported")
212+
hash_map = {}
196213
for line in target_file:
197-
for p in id_object.regexes:
198-
redact_pattern = p['pattern']
199-
if re.search(redact_pattern, line, flags=re.IGNORECASE):
214+
# count elements to be redacted
215+
for id in id_object.regexes:
216+
if re.search(id['pattern'], line):
200217
count += 1
201-
pattern_string = re.search(
202-
redact_pattern, line, flags=re.IGNORECASE)
203-
pattern_string = pattern_string.group(0)
204-
masked_data = str(uuid.uuid4())
205-
hash_map.update(
206-
{masked_data: pattern_string})
207-
line = re.sub(redact_pattern, masked_data, line,
208-
flags=re.IGNORECASE)
209-
result.write(line)
218+
# redact all and write hashshadow
219+
data = self.redact_all(line)
220+
redacted_line = data[0]
221+
kv_pairs = data[1]
222+
hash_map.update(kv_pairs)
223+
result.write(redacted_line)
210224
self.write_hashmap(hash_map, filename)
225+
print(
226+
f"[+] .hashshadow_{os.path.basename(filename)}.json file generated. Keep this safe if you need to undo the redaction.")
211227
# Separate option to redact names
212228
elif option in ("name", "names"):
213229
content = target_file.read()
@@ -218,25 +234,34 @@ def process_file(self, filename, option=str, savedir="./"):
218234
os.remove(
219235
f"{savedir}redacted_{os.path.basename(filename)}")
220236
sys.exit(
221-
"[ - ] Not a valid option for redaction type.")
237+
"[-] Not a valid option for redaction type.")
222238
# Redacts all other options here
223239
else:
224-
print(f"[ + ] Redacting {option} from the file")
240+
print(f"[+] Redacting {option} from the file")
241+
hash_map = {}
225242
for line in target_file:
243+
# count elements to be redacted
226244
for id in id_object.regexes:
227-
if option in id['type'] and re.search(id['pattern'], line, flags=re.IGNORECASE):
245+
if option in id['type'] and re.search(id['pattern'], line):
228246
count += 1
229-
line = self.redact_specific(line, option, filename)
230-
result.write(line)
247+
# redact specific option and write hashshadow
248+
data = self.redact_specific(line, option)
249+
redacted_line = data[0]
250+
kv_pairs = data[1]
251+
hash_map.update(kv_pairs)
252+
result.write(redacted_line)
253+
self.write_hashmap(hash_map, filename)
254+
print(
255+
f"[+].hashshadow_{os.path.basename(filename)}.json file generated. Keep this safe if you need to undo the redaction.")
231256

232-
print(f"[ + ] Redacted {count} targets...")
257+
print(f"[+] Redacted {count} targets...")
233258
print(
234-
f"[ + ] Redacted results saved to {savedir}redacted_{os.path.basename(filename)}")
259+
f"[+] Redacted results saved to {savedir}redacted_{os.path.basename(filename)}")
235260

236261
except UnicodeDecodeError:
237262
os.remove(f"{savedir}redacted_{os.path.basename(filename)}")
238-
print("[ - ] Removed incomplete redact file")
239-
sys.exit("[ - ] Unable to read file")
263+
print("[-] Removed incomplete redact file")
264+
sys.exit("[-] Unable to read file")
240265

241266
def process_report(self, filename, savedir="./"):
242267
"""Function to process calculate and generate report of man hour saved.
@@ -255,7 +280,7 @@ def process_report(self, filename, savedir="./"):
255280
# created the directory if not present
256281
if not os.path.exists(os.path.dirname(savedir)):
257282
print(
258-
"[ + ] "
283+
"[+] "
259284
+ os.path.dirname(savedir)
260285
+ " directory does not exist, creating it."
261286
)
@@ -277,9 +302,9 @@ def process_report(self, filename, savedir="./"):
277302
reading_minutes = math.ceil(total_words/WPM)
278303
reading_hours = math.floor(reading_minutes/60)
279304

280-
word_report = f"[ + ] Estimated total words : {total_words}"
281-
minutes_saved = f"[ + ] Estimated total minutes saved : {reading_minutes}"
282-
man_hours_saved = f"[ + ] Estimated total man hours saved : {reading_hours}"
305+
word_report = f"[+] Estimated total words : {total_words}"
306+
minutes_saved = f"[+] Estimated total minutes saved : {reading_minutes}"
307+
man_hours_saved = f"[+] Estimated total man hours saved : {reading_hours}"
283308

284309
# Open a file write pointer as result
285310
with open(
@@ -290,9 +315,9 @@ def process_report(self, filename, savedir="./"):
290315
result.write(word_report + "\n" +
291316
minutes_saved + "\n" + man_hours_saved)
292317
print(
293-
f"[ + ] Estimated man hours saved report saved to {savedir}manhours_saved_{os.path.basename(filename)}")
318+
f"[+] Estimated man hours saved report saved to {savedir}manhours_saved_{os.path.basename(filename)}")
294319

295320
except UnicodeDecodeError:
296321
os.remove(f"manhour_saved_report_{os.path.basename(filename)}")
297-
print("[ - ] Removed incomplete report")
298-
sys.exit("[ - ] Unable to read target file")
322+
print("[-] Removed incomplete report")
323+
sys.exit("[-] Unable to read target file")

0 commit comments

Comments
 (0)