Skip to content

Commit 72a44b7

Browse files
committed
IPExtractX Release Version 1.2.0
IPExtractX is now an importable class! Create the class initializer and try it out!
0 parents  commit 72a44b7

File tree

6 files changed

+1128
-0
lines changed

6 files changed

+1128
-0
lines changed

.github/IPExtractX_HelpImage01.png

16.8 KB
Loading

IPExtractX.py

Lines changed: 392 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,392 @@
1+
#!/usr/bin/env python3
2+
3+
##############################################################
4+
## ODFSearch Console | IPExtractX Standalone Python Script ##
5+
##############################################################
6+
## Version: 1.2 ##
7+
## Release Date: 1/09/2024 ##
8+
## Release Author: @Onetrak-Digital-Forensics ##
9+
## Release License: GNUGPL-V3 ##
10+
## Release Status: Public ##
11+
## Release Type: Standalone Python Script ##
12+
##############################################################
13+
14+
15+
16+
try:
17+
18+
import os
19+
import ipaddress
20+
import re
21+
22+
from rich import panel as rpanel
23+
from rich import console as rcon
24+
from rich import print as rprint
25+
26+
import typer as TypeCLI
27+
28+
prog_version = '1.2.0'
29+
30+
cli_main = TypeCLI.Typer(pretty_exceptions_short=True)
31+
rcon_obj = rcon.Console()
32+
33+
except ImportError as Exception:
34+
35+
print(f'Python Environment Error: {Exception}')
36+
37+
raise SystemExit(3)
38+
39+
class IPExtractX:
40+
41+
"""
42+
IPExtractX - ODFSearch Console
43+
====
44+
45+
Init Parameters
46+
====
47+
48+
`input_path`
49+
----
50+
`File` or `Directory` `Path` Of Data To Be Parsed.
51+
52+
`outfile`
53+
----
54+
`File Path` Where The Parser Will Save Positive Content Detection Messages To.
55+
56+
`ws_kwfile`
57+
----
58+
`File Path` Containing A List Format Of Words And/Or False Positive Emails, Seperated By New Lines.
59+
60+
`ipaddr`
61+
----
62+
`Bool`, Where `True` Enables The Detection Of IPV4/IPV6 Type Addresses.\n
63+
Default Is `True` if `NoneType` Is Given
64+
65+
`hn`
66+
----
67+
`Bool`, Where `True` Enables The Detection Of Hostname Type Addresses.
68+
Please Note: May Throw Duplicates Of EMAIL Hostnames. Should NOT Be \n
69+
Used In Conjunction With `emlformat`
70+
71+
`ws`
72+
----
73+
`Bool`, Where `True` Enables The Detection of Keywords, Defined In `ws_kwfile`.\n
74+
Must Specify `ws_kwfile`, Or This Option Becomes Disabled.\n
75+
76+
`emlformat`
77+
----
78+
`Bool`, Where `True` Enables The Detection of Email Type Addresses.\n
79+
80+
`pgpheader`
81+
----
82+
`Bool`, Where `True` Enables The Detection Of Pretty Good Privacy (PGP) Encrypted Email Exports.\n
83+
Method: `BEGIN PGP MESSAGE`\n
84+
85+
`WRITEIO`
86+
----
87+
`Bool`, Where `True` Enables The Writing Of Parser Data Positive To `outfile`.\n
88+
89+
`PRINTIO`
90+
----
91+
`Bool`, Where `True` Enables Parser Output Messages To Print To The Terminal.\n
92+
93+
`APPENDIO`
94+
----
95+
`Bool`, Where `True` Enables The Appending Of `content` Read From `parser.activefn` to `parser.content_list`.\n
96+
WARNING: May Cause Python To `Allocate Large Amounts Of Memory` If Large Quantities Of Textual Data Are Supplied.
97+
98+
Return Object
99+
====
100+
Examples:
101+
>>> from IPExtractX import IPExtractX as IPX
102+
>>> mycustomobj = IPX(content_dir, output_file, keywords_file, detect_ipaddr, detect_hostname, detect_wordsearch, detect_emails, detect_pgpheader).execute_parser()
103+
>>> if mycustomobj.parsed_ipaddr in customiplistobj:
104+
105+
Return Argument
106+
----
107+
The Parser Returns `set()` Versions Of The Detection Definitions. Example, You Parse IP Addresses, The Parser Will Append Postive Captures to `self.parsed_ipaddr` As A List, Then SET To Remove Duplicates.
108+
109+
Return Output
110+
----
111+
`outfile` Saves The Detection Log During Execution If `parser.write_output` Is Given `True`
112+
"""
113+
114+
def __init__(parser, input_path: str, outfile: str, ws_kwfile: str = None, ipaddr: bool = False, hn: bool = False, ws: bool = False, emlformat: bool = False, pgpheader: bool = False, WRITEIO: bool = True, PRINTIO: bool = True, APPENDIO: bool = False):
115+
116+
parser.dirpath = input_path
117+
parser.outfile = outfile
118+
parser.wswordlistfile = ws_kwfile
119+
parser.detect_ipaddressformat = ipaddr
120+
parser.detect_hostnameformat = hn
121+
parser.detect_wordsearchformat = ws
122+
parser.detect_emailformat = emlformat
123+
parser.detect_prettygoodprivacyheaders = pgpheader
124+
125+
parser.pgp_regex_pattern = '(-----BEGIN PGP PUBLIC KEY BLOCK-----)(.*?)(-----END PGP PUBLIC KEY BLOCK-----)'
126+
parser.ipv4_regex_pattern = r"([0-9]{1,3}\.){3}[0-9]{1,3}"
127+
parser.ipv6_regex_pattern = r'\b(?:[0-9a-fA-F]{1,4}(:[0-9a-fA-F]{1,4})*)?::(?:[0-9a-fA-F]{1,4}(:[0-9a-fA-F]{1,4})*)?(?:(?<=::)|(?<=:)(?=\d+\.\d+\.\d+\.\d+)|\b)\b'
128+
parser.hostname_regex_pattern = r'(?:(?:[A-Z0-9](?:[A-Z0-9\-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?)'
129+
parser.email_regex_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
130+
131+
parser.verify_dirpath_isdir = os.path.isdir(input_path)
132+
parser.verify_dirpath_isfile = os.path.isfile(input_path)
133+
parser.verify_wswordlistfile = os.path.isfile(ws_kwfile) if ws_kwfile else False
134+
parser.verify_outfile = os.path.isfile(outfile)
135+
136+
parser.printio = PRINTIO
137+
parser.appendio = APPENDIO
138+
parser.writeio = WRITEIO
139+
parser.statistics = {"PGP": 0, "IPV4": 0, "IPV6": 0, "Hostnames": 0, "Keywords": 0, "Emails": 0, "Errors": 0}
140+
parser.parsed_ipaddr = []
141+
parser.parsed_hostnames = []
142+
parser.parsed_emailids = []
143+
parser.fileids_with_pgp = []
144+
parser.content_list = []
145+
parser.activefn = str
146+
147+
parser.progbanner = f""" ___ ____ _____ _ _ __ __\n |_ _| _ \| ____|_ _ | |_ _ __ __ _ ___| |_\ \/ /\n | || |_) | _| \ \/ /| __| '__/ _` |/ __| __|\ /\n | || __/| |___ > < | |_| | | (_| | (__| |_ / \\ \n |___|_| |_____/_/\_\ \__|_| \__,_|\___|\__/_/\_\\ \n\n [GNUGPL_v3] IPExtractX.py - {prog_version}\n Onetrak Digital Forensics Corporation\n"""
148+
149+
def execute_parser(parser):
150+
151+
if parser.printio:
152+
153+
rprint(f'[red bold]{parser.progbanner}[/red bold]')
154+
155+
if parser.verify_dirpath_isdir is False and parser.verify_dirpath_isfile is False:
156+
157+
rprint(f'[red bold] ✘ Input Error: {parser.dirpath} Is a Non-Existent File Path\n')
158+
159+
return parser
160+
161+
keywords = parser.load_keywords()
162+
163+
if keywords == None:
164+
165+
if parser.printio:
166+
167+
rprint('[red bold] ✘ Error: Wordlist File Not Loaded: {parser_verify_wswordlistfile} = {False}[/red bold]')
168+
rprint('[blue bold] Info: os.path.isfile(parser.wswordlistfile) reports {False}[/blue bold]\n')
169+
170+
if parser.verify_outfile:
171+
172+
if parser.writeio:
173+
174+
if parser.printio:
175+
176+
rprint(f'[bold yellow] ✘ Warning: Output File "{parser.outfile}" Exists! Clearing File...[/bold yellow]\n')
177+
178+
os.remove(parser.outfile)
179+
180+
with rcon_obj.status(f'[yellow bold] Running Parser... ', spinner="bouncingBar") as statusanim:
181+
182+
try:
183+
184+
with open(parser.outfile, mode='w') as outfile:
185+
186+
if parser.verify_dirpath_isfile:
187+
188+
parser.parse_file(parser.dirpath, outfile, keywords)
189+
190+
elif parser.verify_dirpath_isdir:
191+
192+
for root, dirs, files in os.walk(parser.dirpath):
193+
194+
for file in files:
195+
196+
statusanim.update(f'[purple bold] Parsing Text File: {parser.activefn}... ', spinner="hamburger")
197+
198+
parser.parse_file(os.path.join(root, file), outfile, keywords)
199+
200+
statusanim.stop()
201+
202+
if parser.writeio:
203+
204+
outfile.write('\n--- STATS ---\n')
205+
206+
for key, value in parser.statistics.items():
207+
208+
outfile.write(f"{key}: {value}\n")
209+
210+
if parser.printio:
211+
212+
rprint('\n[green bold] ✔ Success: Parser Execution Complete![/green bold]')
213+
214+
if parser.writeio == True:
215+
216+
rprint(f'[blue bold] Output File > {parser.outfile}[/blue bold]\n')
217+
218+
if parser.printio:
219+
220+
if parser.statistics['Errors'] > 0:
221+
222+
rprint(f'[red bold] ✘ Preliminary Warning: {parser.statistics["Errors"]} Error(s) Occured During The Parser Execution!!![/red bold]\n')
223+
224+
outfile.close()
225+
226+
parser.parsed_ipaddr = list(set(parser.parsed_ipaddr))
227+
parser.parsed_hostnames = list(set(parser.parsed_hostnames))
228+
parser.parsed_emailids = list(set(parser.parsed_emailids))
229+
parser.fileids_with_pgp = list(set(parser.fileids_with_pgp))
230+
231+
except Exception as ERRRESP:
232+
233+
rprint(f'\n[red bold] ✘ Parser Error: {ERRRESP}\n')
234+
235+
finally:
236+
237+
return parser
238+
239+
def parse_file(parser, input_path, outfile, keywords):
240+
241+
parser.activefn = input_path
242+
243+
try:
244+
245+
with open(input_path, "r") as open_file:
246+
247+
content = open_file.read()
248+
249+
if parser.appendio:
250+
251+
parser.content_list.append(content)
252+
253+
if parser.detect_ipaddressformat:
254+
255+
parser.detect_ip_addresses(content, outfile)
256+
257+
if parser.detect_hostnameformat:
258+
259+
parser.detect_hostnames(content, outfile)
260+
261+
if parser.detect_wordsearchformat:
262+
263+
parser.detect_keywords(content, outfile, keywords)
264+
265+
if parser.detect_emailformat:
266+
267+
parser.detect_emails(content, outfile, keywords)
268+
269+
if parser.detect_prettygoodprivacyheaders and re.search(parser.pgp_regex_pattern, content, re.DOTALL):
270+
271+
if parser.writeio:
272+
273+
outfile.write(f"PGP message found in {input_path}\n")
274+
275+
parser.statistics["PGP"] += 1
276+
277+
parser.fileids_with_pgp.append(parser.activefn)
278+
279+
except Exception as e:
280+
281+
outfile.write(f"Cannot read file {input_path}. Error: {str(e)}\n")
282+
283+
parser.statistics["Errors"] += 1
284+
285+
def detect_ip_addresses(parser, content, outfile):
286+
287+
for pattern in [(parser.ipv4_regex_pattern, "IPV4"), (parser.ipv6_regex_pattern, "IPV6")]:
288+
289+
if pattern[1] == "IPV4" or pattern[1] == "IPV6":
290+
291+
matches = re.finditer(pattern[0], content)
292+
293+
for match in matches:
294+
295+
ip = match.group()
296+
297+
try:
298+
299+
ipaddress.ip_address(ip)
300+
301+
if parser.writeio:
302+
303+
outfile.write(f"{pattern[1]} '{ip}' found in {parser.activefn}\n")
304+
305+
parser.statistics[pattern[1]] += 1
306+
307+
parser.parsed_ipaddr.append(ip)
308+
309+
except ValueError:
310+
311+
pass
312+
313+
def detect_hostnames(parser, content, outfile):
314+
315+
hostnames = re.findall(parser.hostname_regex_pattern, content, re.IGNORECASE)
316+
317+
for hostname in hostnames:
318+
319+
if parser.writeio:
320+
321+
outfile.write(f"Hostname '{hostname}' found in {parser.activefn}\n")
322+
323+
parser.statistics["Hostnames"] += 1
324+
325+
parser.parsed_hostnames.append(hostname)
326+
327+
def detect_keywords(parser, content, outfile, keywords):
328+
329+
if keywords:
330+
331+
for keyword in keywords:
332+
333+
if keyword.lower() in content.lower():
334+
335+
if parser.writeio:
336+
337+
outfile.write(f"Keyword '{keyword}' found in {parser.activefn}\n")
338+
339+
parser.statistics["Keywords"] += 1
340+
341+
def detect_emails(parser, content, outfile, no_email):
342+
343+
emails = re.findall(parser.email_regex_pattern, content)
344+
345+
for email in emails:
346+
347+
if parser.detect_wordsearchformat == True and email in no_email:
348+
349+
pass
350+
351+
else:
352+
353+
if parser.writeio:
354+
355+
outfile.write(f"Email '{email}' found in {parser.activefn}\n")
356+
357+
parser.statistics["Emails"] += 1
358+
359+
parser.parsed_emailids.append(email)
360+
361+
def load_keywords(parser):
362+
363+
if parser.verify_wswordlistfile:
364+
365+
with open(parser.wswordlistfile, mode='r') as wlf:
366+
367+
load_return = [line.strip() for line in wlf.readlines()]
368+
369+
wlf.close()
370+
371+
return load_return
372+
373+
else:
374+
375+
return None
376+
377+
@cli_main.command(name='X', help=f'HTML Mail Regular Expression Search Pattern Detection Software Version {prog_version}\n\nThis Script Is Also An Importable Class!, try: >>> from IPExtractX import IPExtractX')
378+
def extract_main(content_dir: str, keywords_file: str = f'{os.getcwd()}/wordlists/words1.list', output_file: str = 'output.txt', detect_ipaddr: bool = True, detect_hostname: bool = False, detect_wordsearch: bool = False, detect_emails: bool = False, detect_pgpheader: bool = False):
379+
380+
parser_fileop = IPExtractX(content_dir, output_file, keywords_file, detect_ipaddr, detect_hostname, detect_wordsearch, detect_emails, detect_pgpheader).execute_parser()
381+
382+
### Custom Code Goes Here ###
383+
# print(parser_fileop.parsed_ipaddr)
384+
# print(parser_fileop.parsed_hostnames)
385+
386+
raise SystemExit(1)
387+
388+
389+
390+
if __name__ == '__main__':
391+
392+
cli_main()

0 commit comments

Comments
 (0)