Onetrak-Digital-Forensics
diff --git a/‎.github/IPExtractX_HelpImage01.png‎
16.8 KB b/‎.github/IPExtractX_HelpImage01.png‎
16.8 KB
diff --git a/‎IPExtractX.py‎
Lines changed: 392 additions & 0 deletions b/‎IPExtractX.py‎
Lines changed: 392 additions & 0 deletions
@@ -0,0 +1,392 @@
+#!/usr/bin/env python3
+
+##############################################################
+## ODFSearch Console | IPExtractX Standalone Python Script  ##
+##############################################################
+## Version: 1.2                                             ##
+## Release Date: 1/09/2024                                  ##
+## Release Author: @Onetrak-Digital-Forensics               ##
+## Release License: GNUGPL-V3                               ##
+## Release Status: Public                                   ##
+## Release Type: Standalone Python Script                   ##
+##############################################################
+
+
+
+try:
+
+    import os
+    import ipaddress
+    import re
+    
+    from rich import panel as rpanel
+    from rich import console as rcon
+    from rich import print as rprint
+
+    import typer as TypeCLI
+
+    prog_version = '1.2.0'
+
+    cli_main = TypeCLI.Typer(pretty_exceptions_short=True)
+    rcon_obj = rcon.Console()
+
+except ImportError as Exception:
+
+    print(f'Python Environment Error: {Exception}')
+
+    raise SystemExit(3)
+
+class IPExtractX:
+
+    """
+    IPExtractX - ODFSearch Console
+    ====
+
+    Init Parameters
+    ====
+
+    `input_path`
+    ----
+    `File` or `Directory` `Path` Of Data To Be Parsed.
+
+    `outfile`
+    ----
+    `File Path` Where The Parser Will Save Positive Content Detection Messages To.
+
+    `ws_kwfile`
+    ----
+    `File Path` Containing A List Format Of Words And/Or False Positive Emails, Seperated By New Lines.
+
+    `ipaddr`
+    ----
+    `Bool`, Where `True` Enables The Detection Of IPV4/IPV6 Type Addresses.\n
+    Default Is `True` if `NoneType` Is Given
+
+    `hn`
+    ----
+    `Bool`, Where `True` Enables The Detection Of Hostname Type Addresses.
+    Please Note: May Throw Duplicates Of EMAIL Hostnames. Should NOT Be \n
+    Used In Conjunction With `emlformat`
+
+    `ws`
+    ----
+    `Bool`, Where `True` Enables The Detection of Keywords, Defined In `ws_kwfile`.\n
+    Must Specify `ws_kwfile`, Or This Option Becomes Disabled.\n
+
+    `emlformat`
+    ----
+    `Bool`, Where `True` Enables The Detection of Email Type Addresses.\n
+
+    `pgpheader`
+    ----
+    `Bool`, Where `True` Enables The Detection Of Pretty Good Privacy (PGP) Encrypted Email Exports.\n
+    Method: `BEGIN PGP MESSAGE`\n
+
+    `WRITEIO`
+    ----
+    `Bool`, Where `True` Enables The Writing Of Parser Data Positive To `outfile`.\n
+
+    `PRINTIO`
+    ----
+    `Bool`, Where `True` Enables Parser Output Messages To Print To The Terminal.\n
+
+    `APPENDIO`
+    ----
+    `Bool`, Where `True` Enables The Appending Of `content` Read From `parser.activefn` to `parser.content_list`.\n
+    WARNING: May Cause Python To `Allocate Large Amounts Of Memory` If Large Quantities Of Textual Data Are Supplied.
+
+    Return Object
+    ====
+    Examples:
+    >>> from IPExtractX import IPExtractX as IPX
+    >>> mycustomobj = IPX(content_dir, output_file, keywords_file, detect_ipaddr, detect_hostname, detect_wordsearch, detect_emails, detect_pgpheader).execute_parser()
+    >>> if mycustomobj.parsed_ipaddr in customiplistobj:
+
+    Return Argument
+    ----
+    The Parser Returns `set()` Versions Of The Detection Definitions. Example, You Parse IP Addresses, The Parser Will Append Postive Captures to `self.parsed_ipaddr` As A List, Then SET To Remove Duplicates.
+
+    Return Output
+    ----
+    `outfile` Saves The Detection Log During Execution If `parser.write_output` Is Given `True`
+    """
+
+    def __init__(parser, input_path: str, outfile: str, ws_kwfile: str = None, ipaddr: bool = False, hn: bool = False, ws: bool = False, emlformat: bool = False, pgpheader: bool = False, WRITEIO: bool = True, PRINTIO: bool = True, APPENDIO: bool = False):
+
+        parser.dirpath = input_path
+        parser.outfile = outfile
+        parser.wswordlistfile = ws_kwfile
+        parser.detect_ipaddressformat = ipaddr
+        parser.detect_hostnameformat = hn
+        parser.detect_wordsearchformat = ws
+        parser.detect_emailformat = emlformat
+        parser.detect_prettygoodprivacyheaders = pgpheader
+
+        parser.pgp_regex_pattern = '(-----BEGIN PGP PUBLIC KEY BLOCK-----)(.*?)(-----END PGP PUBLIC KEY BLOCK-----)'
+        parser.ipv4_regex_pattern = r"([0-9]{1,3}\.){3}[0-9]{1,3}"
+        parser.ipv6_regex_pattern = r'\b(?:[0-9a-fA-F]{1,4}(:[0-9a-fA-F]{1,4})*)?::(?:[0-9a-fA-F]{1,4}(:[0-9a-fA-F]{1,4})*)?(?:(?<=::)|(?<=:)(?=\d+\.\d+\.\d+\.\d+)|\b)\b'
+        parser.hostname_regex_pattern = r'(?:(?:[A-Z0-9](?:[A-Z0-9\-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?)'
+        parser.email_regex_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
+
+        parser.verify_dirpath_isdir = os.path.isdir(input_path)
+        parser.verify_dirpath_isfile = os.path.isfile(input_path)
+        parser.verify_wswordlistfile = os.path.isfile(ws_kwfile) if ws_kwfile else False
+        parser.verify_outfile = os.path.isfile(outfile)
+
+        parser.printio = PRINTIO
+        parser.appendio = APPENDIO
+        parser.writeio = WRITEIO
+        parser.statistics = {"PGP": 0, "IPV4": 0, "IPV6": 0, "Hostnames": 0, "Keywords": 0, "Emails": 0, "Errors": 0}
+        parser.parsed_ipaddr = []
+        parser.parsed_hostnames = []
+        parser.parsed_emailids = []
+        parser.fileids_with_pgp = []
+        parser.content_list = []
+        parser.activefn = str
+
+        parser.progbanner = f"""  ___ ____  _____       _                  _  __  __\n |_ _|  _ \| ____|_  _ | |_ _ __ __ _  ___| |_\ \/ /\n  | || |_) |  _| \ \/ /| __| '__/ _` |/ __| __|\  /\n  | ||  __/| |___ >  < | |_| | | (_| | (__| |_ /  \\ \n |___|_|   |_____/_/\_\ \__|_|  \__,_|\___|\__/_/\_\\ \n\n          [GNUGPL_v3] IPExtractX.py - {prog_version}\n        Onetrak Digital Forensics Corporation\n"""
+
+    def execute_parser(parser):
+
+        if parser.printio:
+
+            rprint(f'[red bold]{parser.progbanner}[/red bold]')
+
+        if parser.verify_dirpath_isdir is False and parser.verify_dirpath_isfile is False:
+
+            rprint(f'[red bold] ✘  Input Error: {parser.dirpath} Is a Non-Existent File Path\n')
+
+            return parser
+
+        keywords = parser.load_keywords()
+
+        if keywords == None:
+
+            if parser.printio:
+
+                rprint('[red bold] ✘  Error: Wordlist File Not Loaded: {parser_verify_wswordlistfile} = {False}[/red bold]')
+                rprint('[blue bold]    Info: os.path.isfile(parser.wswordlistfile) reports {False}[/blue bold]\n')
+
+        if parser.verify_outfile:
+
+            if parser.writeio:
+
+                if parser.printio:
+
+                    rprint(f'[bold yellow] ✘  Warning: Output File "{parser.outfile}" Exists! Clearing File...[/bold yellow]\n')
+
+                os.remove(parser.outfile)
+
+        with rcon_obj.status(f'[yellow bold] Running Parser... ', spinner="bouncingBar") as statusanim:
+
+            try:
+
+                with open(parser.outfile, mode='w') as outfile:
+
+                    if parser.verify_dirpath_isfile:
+
+                        parser.parse_file(parser.dirpath, outfile, keywords)
+
+                    elif parser.verify_dirpath_isdir:
+
+                        for root, dirs, files in os.walk(parser.dirpath):
+
+                            for file in files:
+
+                                statusanim.update(f'[purple bold] Parsing Text File: {parser.activefn}... ', spinner="hamburger")
+
+                                parser.parse_file(os.path.join(root, file), outfile, keywords)
+
+                    statusanim.stop()
+
+                    if parser.writeio:
+
+                        outfile.write('\n--- STATS ---\n')
+
+                        for key, value in parser.statistics.items():
+
+                            outfile.write(f"{key}: {value}\n")
+
+                    if parser.printio:
+
+                        rprint('\n[green bold] ✔  Success: Parser Execution Complete![/green bold]')
+                        
+                        if parser.writeio == True:
+                            
+                            rprint(f'[blue bold]    Output File > {parser.outfile}[/blue bold]\n')
+
+                    if parser.printio:
+
+                        if parser.statistics['Errors'] > 0:
+
+                            rprint(f'[red bold] ✘  Preliminary Warning: {parser.statistics["Errors"]} Error(s) Occured During The Parser Execution!!![/red bold]\n')
+
+                    outfile.close()
+
+                parser.parsed_ipaddr = list(set(parser.parsed_ipaddr))
+                parser.parsed_hostnames = list(set(parser.parsed_hostnames))
+                parser.parsed_emailids = list(set(parser.parsed_emailids))
+                parser.fileids_with_pgp = list(set(parser.fileids_with_pgp))
+
+            except Exception as ERRRESP:
+
+                rprint(f'\n[red bold] ✘  Parser Error: {ERRRESP}\n')
+
+            finally:
+
+                return parser
+
+    def parse_file(parser, input_path, outfile, keywords):
+
+        parser.activefn = input_path
+
+        try:
+
+            with open(input_path, "r") as open_file:
+
+                content = open_file.read()
+
+                if parser.appendio:
+                    
+                    parser.content_list.append(content)
+
+            if parser.detect_ipaddressformat:
+
+                parser.detect_ip_addresses(content, outfile)
+
+            if parser.detect_hostnameformat:
+
+                parser.detect_hostnames(content, outfile)
+
+            if parser.detect_wordsearchformat:
+
+                parser.detect_keywords(content, outfile, keywords)
+
+            if parser.detect_emailformat:
+
+                parser.detect_emails(content, outfile, keywords)
+
+            if parser.detect_prettygoodprivacyheaders and re.search(parser.pgp_regex_pattern, content, re.DOTALL):
+
+                if parser.writeio:
+
+                    outfile.write(f"PGP message found in {input_path}\n")
+
+                parser.statistics["PGP"] += 1
+
+                parser.fileids_with_pgp.append(parser.activefn)
+
+        except Exception as e:
+
+            outfile.write(f"Cannot read file {input_path}. Error: {str(e)}\n")
+
+            parser.statistics["Errors"] += 1
+
+    def detect_ip_addresses(parser, content, outfile):
+
+        for pattern in [(parser.ipv4_regex_pattern, "IPV4"), (parser.ipv6_regex_pattern, "IPV6")]:
+
+            if pattern[1] == "IPV4" or pattern[1] == "IPV6":
+
+                matches = re.finditer(pattern[0], content)
+
+                for match in matches:
+
+                    ip = match.group()
+
+                    try:
+
+                        ipaddress.ip_address(ip)
+
+                        if parser.writeio:
+
+                            outfile.write(f"{pattern[1]} '{ip}' found in {parser.activefn}\n")
+
+                        parser.statistics[pattern[1]] += 1
+
+                        parser.parsed_ipaddr.append(ip)
+
+                    except ValueError:
+
+                        pass
+
+    def detect_hostnames(parser, content, outfile):
+
+        hostnames = re.findall(parser.hostname_regex_pattern, content, re.IGNORECASE)
+
+        for hostname in hostnames:
+
+            if parser.writeio:
+
+                outfile.write(f"Hostname '{hostname}' found in {parser.activefn}\n")
+
+            parser.statistics["Hostnames"] += 1
+
+            parser.parsed_hostnames.append(hostname)
+
+    def detect_keywords(parser, content, outfile, keywords):
+
+        if keywords:
+
+            for keyword in keywords:
+
+                if keyword.lower() in content.lower():
+
+                    if parser.writeio:
+
+                        outfile.write(f"Keyword '{keyword}' found in {parser.activefn}\n")
+
+                    parser.statistics["Keywords"] += 1
+
+    def detect_emails(parser, content, outfile, no_email):
+
+        emails = re.findall(parser.email_regex_pattern, content)
+
+        for email in emails:
+
+            if parser.detect_wordsearchformat == True and email in no_email:
+
+                pass
+
+            else:
+
+                if parser.writeio:
+
+                    outfile.write(f"Email '{email}' found in {parser.activefn}\n")
+
+                parser.statistics["Emails"] += 1
+
+                parser.parsed_emailids.append(email)
+
+    def load_keywords(parser):
+
+        if parser.verify_wswordlistfile:
+
+            with open(parser.wswordlistfile, mode='r') as wlf:
+
+                load_return = [line.strip() for line in wlf.readlines()]
+
+                wlf.close()
+
+                return load_return
+
+        else:
+
+            return None
+
+@cli_main.command(name='X', help=f'HTML Mail Regular Expression Search Pattern Detection Software Version {prog_version}\n\nThis Script Is Also An Importable Class!, try: >>> from IPExtractX import IPExtractX')
+def extract_main(content_dir: str, keywords_file: str = f'{os.getcwd()}/wordlists/words1.list', output_file: str = 'output.txt', detect_ipaddr: bool = True, detect_hostname: bool = False, detect_wordsearch: bool = False, detect_emails: bool = False, detect_pgpheader: bool = False):
+
+    parser_fileop = IPExtractX(content_dir, output_file, keywords_file, detect_ipaddr, detect_hostname, detect_wordsearch, detect_emails, detect_pgpheader).execute_parser()
+
+    ### Custom Code Goes Here ###
+    # print(parser_fileop.parsed_ipaddr)
+    # print(parser_fileop.parsed_hostnames)
+
+    raise SystemExit(1)
+
+
+
+if __name__ == '__main__':
+
+    cli_main()