11""" Core redactor engine class implementation """
2-
32from pyredactkit .common_jobs import CommonJobs
4- import mimetypes
3+ from pyredactkit . identifiers import Identifier
54import os
65import sys
76import re
8- import math
9- import json
107import uuid
118
12- from pyredactkit . identifiers import Identifier
9+ # Instantiate identifier and commonjobs objects
1310id_object = Identifier ()
1411cj_object = CommonJobs ()
1512""" Coreredactor library """
@@ -38,58 +35,6 @@ def __init__(self) -> None:
3835 """
3936 return None
4037
41- def read_custom_patterns (self , custom_file ) -> list :
42- '''Load Rules
43- Loads either a default ruleset or a self defined ruleset.
44- Rules are loaded to patterns variable
45- Args:
46- custom_file (str): Custom rule file to be loaded
47- Returns:
48- patterns (list): list of custom patterns
49- '''
50- try :
51- with open (custom_file , encoding = "utf-8" ) as customfile :
52- return json .load (customfile )
53- except FileNotFoundError :
54- sys .exit ("[-] Pattern file was not found" )
55- except json .JSONDecodeError :
56- sys .exit ("[-] Issue decoding json file. This might be an error with your regex pattern." )
57-
58- def write_hashmap (self , hash_map = dict , filename = str , savedir = "./" ) -> dict :
59- """Function that writes a .hashshadow_file.txt.json to os directory.
60- Args:
61- hash_map (dictionary): dictionary object to be written to file.
62- filename (str): name of supplied file
63-
64- Returns:
65- Writes .hashshadow_file.txt.json to os directory
66- """
67- with open (f"{ savedir } .hashshadow_{ os .path .basename (filename )} .json" , "w" , encoding = "utf-8" ) as file :
68- json .dump (hash_map , file )
69-
70- def redact_custom (self , line = str , customfile = str ) -> tuple :
71- """Function to redact custom option
72- Args:
73- line (str) : line to be supplied to redact
74- option (str): (optional) choice for redaction
75- filename (str): name of supplied file
76-
77- Returns:
78- line (str): redacted line
79- kv_pair (dict) : key value pair of uuid to sensitive data.
80- """
81- custom_pattern = self .read_custom_patterns (customfile )
82- kv_pairs = {}
83- for id in range (len (custom_pattern )):
84- redact_pattern = custom_pattern [id ]['pattern' ]
85- if re .search (redact_pattern , line , re .IGNORECASE ):
86- pattern_string = re .search (redact_pattern , line )
87- pattern_string = pattern_string .group (0 )
88- masked_data = str (uuid .uuid4 ())
89- kv_pairs .update ({masked_data : pattern_string })
90- line = re .sub (redact_pattern , masked_data , line )
91- return line , kv_pairs
92-
9338 def redact_all (self , line = str ) -> tuple :
9439 """Function to redact specific option
9540 Args:
@@ -110,21 +55,6 @@ def redact_all(self, line=str) -> tuple:
11055 line = re .sub (redact_pattern , masked_data , line )
11156 return line , hash_map
11257
113- def redact_name (self , data = str ) -> tuple :
114- """Main function to redact
115- Args:
116- data (str) : data to be supplied to identify names
117-
118- Returns:
119- data (str) : redacted names from the data
120- name_count (int) : number of names redacted from the data
121- """
122- name_list = id_object .names (data )
123- name_count = len (name_list )
124- for name in name_list :
125- data = data .replace (name , self .block )
126- return data , name_count
127-
12858 def process_text (self , text = str , savedir = "./" ):
12959 """Function to process supplied text from cli.
13060 Args:
@@ -153,72 +83,6 @@ def process_text(self, text=str, savedir="./"):
15383 print (
15484 f"[+] Redacted and results saved to { os .path .basename (generated_file )} " )
15585
156- def process_custom_file (self , file_name , customfile = str , make_dir = "./" ):
157- """Function to process supplied file with custom regex file from cli.
158- Args:
159- file_name (str): File to redact
160- customfile (str): custom regex pattern for redaction
161- make_dir (str): [Optional] directory to place results
162-
163- Returns:
164- Creates redacted file.
165- """
166- redact_count = 0
167- secret_map = {}
168- try :
169- # Open a file read pointer as target_file
170- with open (file_name , encoding = "utf-8" ) as target_file :
171- if make_dir != "./" and make_dir [- 1 ] != "/" :
172- make_dir = make_dir + "/"
173-
174- # created the directory if not present
175- if not os .path .exists (os .path .dirname (make_dir )):
176- print (
177- "[+] "
178- + os .path .dirname (make_dir )
179- + f"{ self .dir_create } "
180- )
181- os .makedirs (os .path .dirname (make_dir ))
182-
183- print (
184- "[+] Processing starts now. This may take some time "
185- "depending on the file size. Monitor the redacted file "
186- "size to monitor progress"
187- )
188-
189- # Open a file write pointer as result
190- with open (
191- f"{ make_dir } redacted_{ os .path .basename (file_name )} " ,
192- "w" ,
193- encoding = "utf-8" ,
194- ) as result :
195- # The supplied custom regex pattern file will be used to redact the file
196- print (f"[+] { customfile } file supplied, will be redacting all supplied custom regex patterns" )
197- secret_map = {}
198- custom_pattern = self .read_custom_patterns (customfile )
199- for line in target_file :
200- # redact_count elements to be redacted
201- for id in range (len (custom_pattern )):
202- if re .search (custom_pattern [id ]['pattern' ], line ):
203- redact_count += 1
204- # redact all and write hashshadow
205- data = self .redact_custom (line , customfile )
206- redacted_line = data [0 ]
207- kv_pairs = data [1 ]
208- secret_map .update (kv_pairs )
209- result .write (redacted_line )
210- cj_object .write_hashmap (secret_map , file_name , make_dir )
211- print (
212- f"[+] .hashshadow_{ os .path .basename (file_name )} .json file generated. Keep this safe if you need to undo the redaction." )
213- print (f"[+] Redacted { redact_count } targets..." )
214- print (
215- f"[+] Redacted results saved to { make_dir } redacted_{ os .path .basename (file_name )} " )
216-
217- except UnicodeDecodeError :
218- os .remove (f"{ make_dir } redacted_{ os .path .basename (file_name )} " )
219- print ("[-] Removed incomplete redact file" )
220- sys .exit ("[-] Unable to read file" )
221-
22286 def process_core_file (self , filename , savedir = "./" ):
22387 """Function to process supplied file from cli.
22488 Args:
@@ -277,6 +141,7 @@ def process_core_file(self, filename, savedir="./"):
277141 print (f"[+] Redacted { count } targets..." )
278142 print (
279143 f"[+] Redacted results saved to { savedir } redacted_{ os .path .basename (filename )} " )
144+ cj_object .process_report (filename )
280145
281146 except UnicodeDecodeError :
282147 os .remove (f"{ savedir } redacted_{ os .path .basename (filename )} " )
0 commit comments