1+ #!/usr/bin/env python3
2+
3+ ##############################################################
4+ ## ODFSearch Console | IPExtractX Standalone Python Script ##
5+ ##############################################################
6+ ## Version: 1.2 ##
7+ ## Release Date: 1/09/2024 ##
8+ ## Release Author: @Onetrak-Digital-Forensics ##
9+ ## Release License: GNUGPL-V3 ##
10+ ## Release Status: Public ##
11+ ## Release Type: Standalone Python Script ##
12+ ##############################################################
13+
14+
15+
16+ try :
17+
18+ import os
19+ import ipaddress
20+ import re
21+
22+ from rich import panel as rpanel
23+ from rich import console as rcon
24+ from rich import print as rprint
25+
26+ import typer as TypeCLI
27+
28+ prog_version = '1.2.0'
29+
30+ cli_main = TypeCLI .Typer (pretty_exceptions_short = True )
31+ rcon_obj = rcon .Console ()
32+
33+ except ImportError as Exception :
34+
35+ print (f'Python Environment Error: { Exception } ' )
36+
37+ raise SystemExit (3 )
38+
39+ class IPExtractX :
40+
41+ """
42+ IPExtractX - ODFSearch Console
43+ ====
44+
45+ Init Parameters
46+ ====
47+
48+ `input_path`
49+ ----
50+ `File` or `Directory` `Path` Of Data To Be Parsed.
51+
52+ `outfile`
53+ ----
54+ `File Path` Where The Parser Will Save Positive Content Detection Messages To.
55+
56+ `ws_kwfile`
57+ ----
58+ `File Path` Containing A List Format Of Words And/Or False Positive Emails, Seperated By New Lines.
59+
60+ `ipaddr`
61+ ----
62+ `Bool`, Where `True` Enables The Detection Of IPV4/IPV6 Type Addresses.\n
63+ Default Is `True` if `NoneType` Is Given
64+
65+ `hn`
66+ ----
67+ `Bool`, Where `True` Enables The Detection Of Hostname Type Addresses.
68+ Please Note: May Throw Duplicates Of EMAIL Hostnames. Should NOT Be \n
69+ Used In Conjunction With `emlformat`
70+
71+ `ws`
72+ ----
73+ `Bool`, Where `True` Enables The Detection of Keywords, Defined In `ws_kwfile`.\n
74+ Must Specify `ws_kwfile`, Or This Option Becomes Disabled.\n
75+
76+ `emlformat`
77+ ----
78+ `Bool`, Where `True` Enables The Detection of Email Type Addresses.\n
79+
80+ `pgpheader`
81+ ----
82+ `Bool`, Where `True` Enables The Detection Of Pretty Good Privacy (PGP) Encrypted Email Exports.\n
83+ Method: `BEGIN PGP MESSAGE`\n
84+
85+ `WRITEIO`
86+ ----
87+ `Bool`, Where `True` Enables The Writing Of Parser Data Positive To `outfile`.\n
88+
89+ `PRINTIO`
90+ ----
91+ `Bool`, Where `True` Enables Parser Output Messages To Print To The Terminal.\n
92+
93+ `APPENDIO`
94+ ----
95+ `Bool`, Where `True` Enables The Appending Of `content` Read From `parser.activefn` to `parser.content_list`.\n
96+ WARNING: May Cause Python To `Allocate Large Amounts Of Memory` If Large Quantities Of Textual Data Are Supplied.
97+
98+ Return Object
99+ ====
100+ Examples:
101+ >>> from IPExtractX import IPExtractX as IPX
102+ >>> mycustomobj = IPX(content_dir, output_file, keywords_file, detect_ipaddr, detect_hostname, detect_wordsearch, detect_emails, detect_pgpheader).execute_parser()
103+ >>> if mycustomobj.parsed_ipaddr in customiplistobj:
104+
105+ Return Argument
106+ ----
107+ The Parser Returns `set()` Versions Of The Detection Definitions. Example, You Parse IP Addresses, The Parser Will Append Postive Captures to `self.parsed_ipaddr` As A List, Then SET To Remove Duplicates.
108+
109+ Return Output
110+ ----
111+ `outfile` Saves The Detection Log During Execution If `parser.write_output` Is Given `True`
112+ """
113+
114+ def __init__ (parser , input_path : str , outfile : str , ws_kwfile : str = None , ipaddr : bool = False , hn : bool = False , ws : bool = False , emlformat : bool = False , pgpheader : bool = False , WRITEIO : bool = True , PRINTIO : bool = True , APPENDIO : bool = False ):
115+
116+ parser .dirpath = input_path
117+ parser .outfile = outfile
118+ parser .wswordlistfile = ws_kwfile
119+ parser .detect_ipaddressformat = ipaddr
120+ parser .detect_hostnameformat = hn
121+ parser .detect_wordsearchformat = ws
122+ parser .detect_emailformat = emlformat
123+ parser .detect_prettygoodprivacyheaders = pgpheader
124+
125+ parser .pgp_regex_pattern = '(-----BEGIN PGP PUBLIC KEY BLOCK-----)(.*?)(-----END PGP PUBLIC KEY BLOCK-----)'
126+ parser .ipv4_regex_pattern = r"([0-9]{1,3}\.){3}[0-9]{1,3}"
127+ parser .ipv6_regex_pattern = r'\b(?:[0-9a-fA-F]{1,4}(:[0-9a-fA-F]{1,4})*)?::(?:[0-9a-fA-F]{1,4}(:[0-9a-fA-F]{1,4})*)?(?:(?<=::)|(?<=:)(?=\d+\.\d+\.\d+\.\d+)|\b)\b'
128+ parser .hostname_regex_pattern = r'(?:(?:[A-Z0-9](?:[A-Z0-9\-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?)'
129+ parser .email_regex_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
130+
131+ parser .verify_dirpath_isdir = os .path .isdir (input_path )
132+ parser .verify_dirpath_isfile = os .path .isfile (input_path )
133+ parser .verify_wswordlistfile = os .path .isfile (ws_kwfile ) if ws_kwfile else False
134+ parser .verify_outfile = os .path .isfile (outfile )
135+
136+ parser .printio = PRINTIO
137+ parser .appendio = APPENDIO
138+ parser .writeio = WRITEIO
139+ parser .statistics = {"PGP" : 0 , "IPV4" : 0 , "IPV6" : 0 , "Hostnames" : 0 , "Keywords" : 0 , "Emails" : 0 , "Errors" : 0 }
140+ parser .parsed_ipaddr = []
141+ parser .parsed_hostnames = []
142+ parser .parsed_emailids = []
143+ parser .fileids_with_pgp = []
144+ parser .content_list = []
145+ parser .activefn = str
146+
147+ parser .progbanner = f""" ___ ____ _____ _ _ __ __\n |_ _| _ \| ____|_ _ | |_ _ __ __ _ ___| |_\ \/ /\n | || |_) | _| \ \/ /| __| '__/ _` |/ __| __|\ /\n | || __/| |___ > < | |_| | | (_| | (__| |_ / \\ \n |___|_| |_____/_/\_\ \__|_| \__,_|\___|\__/_/\_\\ \n \n [GNUGPL_v3] IPExtractX.py - { prog_version } \n Onetrak Digital Forensics Corporation\n """
148+
149+ def execute_parser (parser ):
150+
151+ if parser .printio :
152+
153+ rprint (f'[red bold]{ parser .progbanner } [/red bold]' )
154+
155+ if parser .verify_dirpath_isdir is False and parser .verify_dirpath_isfile is False :
156+
157+ rprint (f'[red bold] ✘ Input Error: { parser .dirpath } Is a Non-Existent File Path\n ' )
158+
159+ return parser
160+
161+ keywords = parser .load_keywords ()
162+
163+ if keywords == None :
164+
165+ if parser .printio :
166+
167+ rprint ('[red bold] ✘ Error: Wordlist File Not Loaded: {parser_verify_wswordlistfile} = {False}[/red bold]' )
168+ rprint ('[blue bold] Info: os.path.isfile(parser.wswordlistfile) reports {False}[/blue bold]\n ' )
169+
170+ if parser .verify_outfile :
171+
172+ if parser .writeio :
173+
174+ if parser .printio :
175+
176+ rprint (f'[bold yellow] ✘ Warning: Output File "{ parser .outfile } " Exists! Clearing File...[/bold yellow]\n ' )
177+
178+ os .remove (parser .outfile )
179+
180+ with rcon_obj .status (f'[yellow bold] Running Parser... ' , spinner = "bouncingBar" ) as statusanim :
181+
182+ try :
183+
184+ with open (parser .outfile , mode = 'w' ) as outfile :
185+
186+ if parser .verify_dirpath_isfile :
187+
188+ parser .parse_file (parser .dirpath , outfile , keywords )
189+
190+ elif parser .verify_dirpath_isdir :
191+
192+ for root , dirs , files in os .walk (parser .dirpath ):
193+
194+ for file in files :
195+
196+ statusanim .update (f'[purple bold] Parsing Text File: { parser .activefn } ... ' , spinner = "hamburger" )
197+
198+ parser .parse_file (os .path .join (root , file ), outfile , keywords )
199+
200+ statusanim .stop ()
201+
202+ if parser .writeio :
203+
204+ outfile .write ('\n --- STATS ---\n ' )
205+
206+ for key , value in parser .statistics .items ():
207+
208+ outfile .write (f"{ key } : { value } \n " )
209+
210+ if parser .printio :
211+
212+ rprint ('\n [green bold] ✔ Success: Parser Execution Complete![/green bold]' )
213+
214+ if parser .writeio == True :
215+
216+ rprint (f'[blue bold] Output File > { parser .outfile } [/blue bold]\n ' )
217+
218+ if parser .printio :
219+
220+ if parser .statistics ['Errors' ] > 0 :
221+
222+ rprint (f'[red bold] ✘ Preliminary Warning: { parser .statistics ["Errors" ]} Error(s) Occured During The Parser Execution!!![/red bold]\n ' )
223+
224+ outfile .close ()
225+
226+ parser .parsed_ipaddr = list (set (parser .parsed_ipaddr ))
227+ parser .parsed_hostnames = list (set (parser .parsed_hostnames ))
228+ parser .parsed_emailids = list (set (parser .parsed_emailids ))
229+ parser .fileids_with_pgp = list (set (parser .fileids_with_pgp ))
230+
231+ except Exception as ERRRESP :
232+
233+ rprint (f'\n [red bold] ✘ Parser Error: { ERRRESP } \n ' )
234+
235+ finally :
236+
237+ return parser
238+
239+ def parse_file (parser , input_path , outfile , keywords ):
240+
241+ parser .activefn = input_path
242+
243+ try :
244+
245+ with open (input_path , "r" ) as open_file :
246+
247+ content = open_file .read ()
248+
249+ if parser .appendio :
250+
251+ parser .content_list .append (content )
252+
253+ if parser .detect_ipaddressformat :
254+
255+ parser .detect_ip_addresses (content , outfile )
256+
257+ if parser .detect_hostnameformat :
258+
259+ parser .detect_hostnames (content , outfile )
260+
261+ if parser .detect_wordsearchformat :
262+
263+ parser .detect_keywords (content , outfile , keywords )
264+
265+ if parser .detect_emailformat :
266+
267+ parser .detect_emails (content , outfile , keywords )
268+
269+ if parser .detect_prettygoodprivacyheaders and re .search (parser .pgp_regex_pattern , content , re .DOTALL ):
270+
271+ if parser .writeio :
272+
273+ outfile .write (f"PGP message found in { input_path } \n " )
274+
275+ parser .statistics ["PGP" ] += 1
276+
277+ parser .fileids_with_pgp .append (parser .activefn )
278+
279+ except Exception as e :
280+
281+ outfile .write (f"Cannot read file { input_path } . Error: { str (e )} \n " )
282+
283+ parser .statistics ["Errors" ] += 1
284+
285+ def detect_ip_addresses (parser , content , outfile ):
286+
287+ for pattern in [(parser .ipv4_regex_pattern , "IPV4" ), (parser .ipv6_regex_pattern , "IPV6" )]:
288+
289+ if pattern [1 ] == "IPV4" or pattern [1 ] == "IPV6" :
290+
291+ matches = re .finditer (pattern [0 ], content )
292+
293+ for match in matches :
294+
295+ ip = match .group ()
296+
297+ try :
298+
299+ ipaddress .ip_address (ip )
300+
301+ if parser .writeio :
302+
303+ outfile .write (f"{ pattern [1 ]} '{ ip } ' found in { parser .activefn } \n " )
304+
305+ parser .statistics [pattern [1 ]] += 1
306+
307+ parser .parsed_ipaddr .append (ip )
308+
309+ except ValueError :
310+
311+ pass
312+
313+ def detect_hostnames (parser , content , outfile ):
314+
315+ hostnames = re .findall (parser .hostname_regex_pattern , content , re .IGNORECASE )
316+
317+ for hostname in hostnames :
318+
319+ if parser .writeio :
320+
321+ outfile .write (f"Hostname '{ hostname } ' found in { parser .activefn } \n " )
322+
323+ parser .statistics ["Hostnames" ] += 1
324+
325+ parser .parsed_hostnames .append (hostname )
326+
327+ def detect_keywords (parser , content , outfile , keywords ):
328+
329+ if keywords :
330+
331+ for keyword in keywords :
332+
333+ if keyword .lower () in content .lower ():
334+
335+ if parser .writeio :
336+
337+ outfile .write (f"Keyword '{ keyword } ' found in { parser .activefn } \n " )
338+
339+ parser .statistics ["Keywords" ] += 1
340+
341+ def detect_emails (parser , content , outfile , no_email ):
342+
343+ emails = re .findall (parser .email_regex_pattern , content )
344+
345+ for email in emails :
346+
347+ if parser .detect_wordsearchformat == True and email in no_email :
348+
349+ pass
350+
351+ else :
352+
353+ if parser .writeio :
354+
355+ outfile .write (f"Email '{ email } ' found in { parser .activefn } \n " )
356+
357+ parser .statistics ["Emails" ] += 1
358+
359+ parser .parsed_emailids .append (email )
360+
361+ def load_keywords (parser ):
362+
363+ if parser .verify_wswordlistfile :
364+
365+ with open (parser .wswordlistfile , mode = 'r' ) as wlf :
366+
367+ load_return = [line .strip () for line in wlf .readlines ()]
368+
369+ wlf .close ()
370+
371+ return load_return
372+
373+ else :
374+
375+ return None
376+
377+ @cli_main .command (name = 'X' , help = f'HTML Mail Regular Expression Search Pattern Detection Software Version { prog_version } \n \n This Script Is Also An Importable Class!, try: >>> from IPExtractX import IPExtractX' )
378+ def extract_main (content_dir : str , keywords_file : str = f'{ os .getcwd ()} /wordlists/words1.list' , output_file : str = 'output.txt' , detect_ipaddr : bool = True , detect_hostname : bool = False , detect_wordsearch : bool = False , detect_emails : bool = False , detect_pgpheader : bool = False ):
379+
380+ parser_fileop = IPExtractX (content_dir , output_file , keywords_file , detect_ipaddr , detect_hostname , detect_wordsearch , detect_emails , detect_pgpheader ).execute_parser ()
381+
382+ ### Custom Code Goes Here ###
383+ # print(parser_fileop.parsed_ipaddr)
384+ # print(parser_fileop.parsed_hostnames)
385+
386+ raise SystemExit (1 )
387+
388+
389+
390+ if __name__ == '__main__' :
391+
392+ cli_main ()
0 commit comments