@@ -92,8 +92,6 @@ def write_hashmap(self, hash_map=dict, filename=str):
9292 """
9393 with open (f".hashshadow_{ os .path .basename (filename )} .json" , "w" , encoding = "utf-8" ) as file :
9494 json .dump (hash_map , file )
95- print (
96- f"[ + ].hashshadow_{ os .path .basename (filename )} .json file generated. Keep this safe if you need to undo the redaction." )
9795
9896 def valid_options (self ):
9997 """Function to read in valid options from Identifier.regexes
@@ -108,7 +106,7 @@ def valid_options(self):
108106 option_tuple += id ['type' ]
109107 return option_tuple
110108
111- def redact_specific (self , line = str , option = str , filename = str ):
109+ def redact_specific (self , line = str , option = str ):
112110 """Function to redact specific option
113111 Args:
114112 line (str) : line to be supplied to redact
@@ -117,23 +115,41 @@ def redact_specific(self, line=str, option=str, filename=str):
117115
118116 Returns:
119117 line (str): redacted line
118+ kv_pair (dict) : key value pair of uuid to sensitive data.
120119 """
121- hash_map = {}
122-
120+ kv_pairs = {}
123121 for id in id_object .regexes :
124122 redact_pattern = id ['pattern' ]
125123 if option in id ['type' ] and re .search (
126- redact_pattern , line , flags = re . IGNORECASE ):
124+ redact_pattern , line ):
127125 pattern_string = re .search (
128- redact_pattern , line , flags = re . IGNORECASE )
126+ redact_pattern , line )
129127 pattern_string = pattern_string .group (0 )
130128 masked_data = str (uuid .uuid4 ())
131- hash_map .update ({masked_data : pattern_string })
129+ kv_pairs .update ({masked_data : pattern_string })
132130 line = re .sub (
133- redact_pattern , masked_data , line , flags = re .IGNORECASE )
131+ redact_pattern , masked_data , line )
132+ return line , kv_pairs
133+
134+ def redact_all (self , line = str ):
135+ """Function to redact specific option
136+ Args:
137+ line (str) : line to be supplied to redact
134138
135- self .write_hashmap (hash_map , filename )
136- return line
139+ Returns:
140+ line (str): redacted line
141+ kv_pair (dict) : key value pair of uuid to sensitive data.
142+ """
143+ hash_map = {}
144+ for id in id_object .regexes :
145+ redact_pattern = id ['pattern' ]
146+ if re .search (redact_pattern , line ):
147+ pattern_string = re .search (redact_pattern , line )
148+ pattern_string = pattern_string .group (0 )
149+ masked_data = str (uuid .uuid4 ())
150+ hash_map .update ({masked_data : pattern_string })
151+ line = re .sub (redact_pattern , masked_data , line )
152+ return line , hash_map
137153
138154 def redact_name (self , data = str ):
139155 """Main function to redact
@@ -171,14 +187,14 @@ def process_file(self, filename, option=str, savedir="./"):
171187 # created the directory if not present
172188 if not os .path .exists (os .path .dirname (savedir )):
173189 print (
174- "[ + ] "
190+ "[+ ] "
175191 + os .path .dirname (savedir )
176192 + " directory does not exist, creating it."
177193 )
178194 os .makedirs (os .path .dirname (savedir ))
179195
180196 print (
181- "[ + ] Processing starts now. This may take some time "
197+ "[+ ] Processing starts now. This may take some time "
182198 "depending on the file size. Monitor the redacted file "
183199 "size to monitor progress"
184200 )
@@ -192,22 +208,22 @@ def process_file(self, filename, option=str, savedir="./"):
192208 # Check if any redaction type option is given in argument. If none, will redact all sensitive data.
193209 if type (option ) is not str :
194210 print (
195- f"[ + ] No option supplied, will be redacting all the sensitive data supported" )
211+ f"[+] No option supplied, will be redacting all the sensitive data supported" )
212+ hash_map = {}
196213 for line in target_file :
197- for p in id_object . regexes :
198- redact_pattern = p [ 'pattern' ]
199- if re .search (redact_pattern , line , flags = re . IGNORECASE ):
214+ # count elements to be redacted
215+ for id in id_object . regexes :
216+ if re .search (id [ 'pattern' ] , line ):
200217 count += 1
201- pattern_string = re .search (
202- redact_pattern , line , flags = re .IGNORECASE )
203- pattern_string = pattern_string .group (0 )
204- masked_data = str (uuid .uuid4 ())
205- hash_map .update (
206- {masked_data : pattern_string })
207- line = re .sub (redact_pattern , masked_data , line ,
208- flags = re .IGNORECASE )
209- result .write (line )
218+ # redact all and write hashshadow
219+ data = self .redact_all (line )
220+ redacted_line = data [0 ]
221+ kv_pairs = data [1 ]
222+ hash_map .update (kv_pairs )
223+ result .write (redacted_line )
210224 self .write_hashmap (hash_map , filename )
225+ print (
226+ f"[+] .hashshadow_{ os .path .basename (filename )} .json file generated. Keep this safe if you need to undo the redaction." )
211227 # Separate option to redact names
212228 elif option in ("name" , "names" ):
213229 content = target_file .read ()
@@ -218,25 +234,34 @@ def process_file(self, filename, option=str, savedir="./"):
218234 os .remove (
219235 f"{ savedir } redacted_{ os .path .basename (filename )} " )
220236 sys .exit (
221- "[ - ] Not a valid option for redaction type." )
237+ "[- ] Not a valid option for redaction type." )
222238 # Redacts all other options here
223239 else :
224- print (f"[ + ] Redacting { option } from the file" )
240+ print (f"[+] Redacting { option } from the file" )
241+ hash_map = {}
225242 for line in target_file :
243+ # count elements to be redacted
226244 for id in id_object .regexes :
227- if option in id ['type' ] and re .search (id ['pattern' ], line , flags = re . IGNORECASE ):
245+ if option in id ['type' ] and re .search (id ['pattern' ], line ):
228246 count += 1
229- line = self .redact_specific (line , option , filename )
230- result .write (line )
247+ # redact specific option and write hashshadow
248+ data = self .redact_specific (line , option )
249+ redacted_line = data [0 ]
250+ kv_pairs = data [1 ]
251+ hash_map .update (kv_pairs )
252+ result .write (redacted_line )
253+ self .write_hashmap (hash_map , filename )
254+ print (
255+ f"[+].hashshadow_{ os .path .basename (filename )} .json file generated. Keep this safe if you need to undo the redaction." )
231256
232- print (f"[ + ] Redacted { count } targets..." )
257+ print (f"[+ ] Redacted { count } targets..." )
233258 print (
234- f"[ + ] Redacted results saved to { savedir } redacted_{ os .path .basename (filename )} " )
259+ f"[+ ] Redacted results saved to { savedir } redacted_{ os .path .basename (filename )} " )
235260
236261 except UnicodeDecodeError :
237262 os .remove (f"{ savedir } redacted_{ os .path .basename (filename )} " )
238- print ("[ - ] Removed incomplete redact file" )
239- sys .exit ("[ - ] Unable to read file" )
263+ print ("[- ] Removed incomplete redact file" )
264+ sys .exit ("[- ] Unable to read file" )
240265
241266 def process_report (self , filename , savedir = "./" ):
242267 """Function to process calculate and generate report of man hour saved.
@@ -255,7 +280,7 @@ def process_report(self, filename, savedir="./"):
255280 # created the directory if not present
256281 if not os .path .exists (os .path .dirname (savedir )):
257282 print (
258- "[ + ] "
283+ "[+ ] "
259284 + os .path .dirname (savedir )
260285 + " directory does not exist, creating it."
261286 )
@@ -277,9 +302,9 @@ def process_report(self, filename, savedir="./"):
277302 reading_minutes = math .ceil (total_words / WPM )
278303 reading_hours = math .floor (reading_minutes / 60 )
279304
280- word_report = f"[ + ] Estimated total words : { total_words } "
281- minutes_saved = f"[ + ] Estimated total minutes saved : { reading_minutes } "
282- man_hours_saved = f"[ + ] Estimated total man hours saved : { reading_hours } "
305+ word_report = f"[+ ] Estimated total words : { total_words } "
306+ minutes_saved = f"[+ ] Estimated total minutes saved : { reading_minutes } "
307+ man_hours_saved = f"[+ ] Estimated total man hours saved : { reading_hours } "
283308
284309 # Open a file write pointer as result
285310 with open (
@@ -290,9 +315,9 @@ def process_report(self, filename, savedir="./"):
290315 result .write (word_report + "\n " +
291316 minutes_saved + "\n " + man_hours_saved )
292317 print (
293- f"[ + ] Estimated man hours saved report saved to { savedir } manhours_saved_{ os .path .basename (filename )} " )
318+ f"[+ ] Estimated man hours saved report saved to { savedir } manhours_saved_{ os .path .basename (filename )} " )
294319
295320 except UnicodeDecodeError :
296321 os .remove (f"manhour_saved_report_{ os .path .basename (filename )} " )
297- print ("[ - ] Removed incomplete report" )
298- sys .exit ("[ - ] Unable to read target file" )
322+ print ("[- ] Removed incomplete report" )
323+ sys .exit ("[- ] Unable to read target file" )
0 commit comments