44"""
55
66import argparse
7+ from genericpath import isdir
8+
9+ from numpy import full
710
811from pyredactkit .core_redactor import CoreRedactorEngine
912from pyredactkit .custom_redactor import CustomRedactorEngine
1316import sys
1417
1518# Creating instances of redact and unredact classes
16- redact_obj = CoreRedactorEngine ()
17- customrd_obj = CustomRedactorEngine ()
19+ core_redact = CoreRedactorEngine ()
20+ custom_redact = CustomRedactorEngine ()
1821unredact_obj = Unredactor ()
1922
2023
3841 PyRedactKit - Redact and Un-redact any sensitive data from your text files!
3942 Example usage:\n
4043 prk 'This is my ip: 127.0.0.1. My email is [email protected] . My favorite secret link is github.com'\n 41- prk --file [file/filestoredact ]\n
42- prk --file redacted_file --unredact .hashshadow.json\n
43- prk --file file --customfile custom.json\n
44+ prk [file/directory_with_files ]\n
45+ prk redacted_file --unredact .hashshadow.json\n
46+ prk file --customfile custom.json\n
4447 """
4548
4649
4750def arg_helper () -> argparse .Namespace :
4851 parser = argparse .ArgumentParser (
49- description = "Supply a sentence or paragraph to redact sensitive data from it. Or read in a file or set of files with -f to redact " ,
52+ description = "Supply either a text chunk or file name path to redact sensitive data from it." ,
5053 formatter_class = argparse .ArgumentDefaultsHelpFormatter
5154 )
5255 parser .add_argument (
5356 "text" ,
54- type = str ,
55- help = """Redact sensitive data of a sentence from command prompt.""" ,
57+ help = """Supply either a text chunk or file name path to redact sensitive data from command prompt.""" ,
5658 nargs = "*"
5759 )
5860 if len (sys .argv ) == 1 :
5961 print (help_menu )
6062 parser .print_help (sys .stderr )
6163 sys .exit (1 )
62- parser .add_argument (
63- "-f" ,
64- "--file" ,
65- nargs = "+" ,
66- help = """Path of a file or a directory of files."""
67- )
6864 parser .add_argument (
6965 "-u" ,
7066 "--unredact" ,
7167 help = """
7268 Option to unredact masked data.
73- Usage: pyredactkit -f [redacted_file] -u [.hashshadow.json]
69+ Usage: pyredactkit [redacted_file] -u [.hashshadow.json]
7470 """
7571 )
7672 parser .add_argument (
7773 "-d" ,
7874 "--dirout" ,
7975 help = """
8076 Output directory of the file.
81- Usage: pyredactkit -f [file/filestoredact] -d [redacted_dir]
77+ Usage: pyredactkit [file/filestoredact] -d [redacted_dir]
8278 """
8379 )
8480 parser .add_argument (
8581 "-c" ,
8682 "--customfile" ,
8783 help = """
8884 User defined custom regex pattern for redaction.
89- Usage: pyredactkit -f [file/filestoredact] -c [customfile.json]
85+ Usage: pyredactkit [file/filestoredact] -c [customfile.json]
9086 """
9187 )
9288 parser .add_argument (
@@ -107,44 +103,52 @@ def arg_helper() -> argparse.Namespace:
107103 return args
108104
109105
110- def execute_file_arg () -> None :
111- args = arg_helper ()
112- full_paths = [os .path .join (os .getcwd (), path ) for path in args .file ]
113- files = set ()
106+ def is_it_text (file_path : str ) -> bool :
107+ return os .path .isfile (file_path ) or os .path .isdir (file_path )
108+
114109
110+ def recursive_file_search (full_path : str , extension : str , recursive : bool ) -> set :
111+ full_paths = [os .path .join (os .getcwd (), path ) for path in full_path ]
112+ files = set ()
115113 for path in full_paths :
116114 if os .path .isfile (path ):
117115 file_name , file_ext = os .path .splitext (path )
118- if args . extension in ('' , file_ext ):
116+ if extension in ('' , file_ext ):
119117 files .add (path )
120- elif args . recursive :
118+ elif recursive :
121119 full_paths += glob .glob (path + '/*' )
120+ return files
121+
122+
123+ def execute_redact_logic () -> None :
124+ args = arg_helper ()
125+
126+ is_text = is_it_text (args .text [0 ])
127+ if not is_text :
128+ core_redact .process_text (args .text )
129+
130+ files = recursive_file_search (args .text , args .extension , args .recursive )
122131
123132 for file in files :
124133 if args .customfile and args .dirout :
125- customrd_obj .process_custom_file (file , args .customfile , args .dirout )
134+ custom_redact .process_custom_file (file , args .customfile , args .dirout )
126135 elif args .customfile :
127- customrd_obj .process_custom_file (file , args .customfile )
136+ custom_redact .process_custom_file (file , args .customfile )
128137 elif args .dirout :
129- redact_obj .process_core_file (file , args .dirout )
138+ core_redact .process_core_file (file , args .dirout )
130139 elif args .unredact :
131140 unredact_obj .unredact (file , args .unredact )
132141 else :
133- redact_obj .process_core_file (file )
142+ core_redact .process_core_file (file )
134143
135144
136145def main ():
137146 print (banner )
138-
139- args = arg_helper ()
140- if args .file or (args .file and args .dirout ):
141- execute_file_arg ()
142- else :
143- redact_obj .process_text (args .text )
147+ execute_redact_logic ()
144148
145149
146150def api_identify_sensitive_data (text : str ) -> list :
147- return redact_obj .identify_data (text )
151+ return core_redact .identify_data (text )
148152
149153
150154if __name__ == "__main__" :
0 commit comments