Fixing coderabbit suggestions

DefinetlyNotAI · DefinetlyNotAI · commit a11ce93ba425 · 2025-01-03T20:36:18.000+04:00
Flag.py had 2 slow imports, made them local imports that are imported when needed only, as well as changed sentence-transformer to only show logs when DEBUG is true

Updated exception logging to discourage it, making only one script now using it, and updated parse_execution logging to include a internal error if the list given is not in the correct format, also added an "except" as replacement to "exception" in the string() function

vulnscan.py, sensitive_data_miner.py, event_log.py : Changed code to follow Good Practises

dump_memory.py: added a truncation system to not allow files to be too large

SECURITY.md now includes the release date of the versions!

Fixed HUGE bug in sensitive_data_miner.py where it didn't construct paths properly thus didn't search the correct paths.

Signed-off-by: Shahm Najeeb &lt;Nirt_12023@outlook.com&gt;
diff --git a/CODE/config.ini b/CODE/config.ini
@@ -27,6 +27,7 @@ files = "bluetooth_details.py, bluetooth_logger.py, browser_miner.ps1, cmd_comma
 # The default is 30.0, and is what we advise
 # If the accuracy is below this, the flag will move to the next suggestion process
 # The process is: difflib, then model, then history suggestions
+# Make sure to keep between 0.0 and 100.0
 accuracy_min = 30.0
 
 # This is the model to use to suggest flags,
@@ -38,6 +39,7 @@ model_to_use = all-MiniLM-L6-v2
 # Finally, should debug mode be enabled for the flag module?
 # This will print out more information to the console,
 # This is for the model itself, and is based on tqdm, it shows extra info on batches
+# As well as more information on behind the scenes
 model_debug = false
 
 ###################################################
diff --git a/CODE/dump_memory.py b/CODE/dump_memory.py
@@ -9,6 +9,14 @@
 if __name__ == "__main__":
     log = Log({"log_level": DEBUG})
 
+# TODO v3.3.1
+#  psutil.virtual_memory(): used, free, percent, total
+#  psutil.swap_memory(): used, free, percent, total
+
+# If the file size exceeds this limit, the file will be truncated with a message
+# Put 0 to disable the limit
+LIMIT_FILE_SIZE = 20  # Always in MiB
+
 
 # Capture RAM Snapshot
 def capture_ram_snapshot():
@@ -23,6 +31,7 @@ def capture_ram_snapshot():
             file.write(f"Total Swap: {swap.total / (1024 ** 3):.2f} GB\n")
             file.write(f"Used Swap: {swap.used / (1024 ** 3):.2f} GB\n")
             file.write(f"Free Swap: {swap.free / (1024 ** 3):.2f} GB\n")
+            file.write(f"Percent RAM Used: {memory.percent:.2f}%\n")
         except Exception as e:
             log.error(f"Error writing RAM snapshot: {e}")
             file.write("Error writing RAM snapshot.")
@@ -56,6 +65,7 @@ def memory_dump():
     try:
         process = psutil.Process(pid)
         with open("Ram_Dump.txt", "wb") as dump_file:
+            total_size = 0
             for mem_region in process.memory_maps(grouped=False):
                 # Check if the memory region is readable ('r' permission)
                 if 'r' in mem_region.perms:
@@ -91,10 +101,15 @@ def memory_dump():
 
                     # Write the metadata to the dump file
                     try:
-                        dump_file.write(f"Memory Region Metadata:\n".encode())
-                        for key, value in region_metadata.items():
-                            dump_file.write(f"{key}: {value}\n".encode())
-                        dump_file.write(b"\n")
+                        metadata_str = "Memory Region Metadata:\n" + "\n".join(
+                            f"{key}: {value}" for key, value in region_metadata.items()) + "\n\n"
+                        metadata_bytes = metadata_str.encode()
+                        if total_size + len(metadata_bytes) > LIMIT_FILE_SIZE * 1024 * 1024 and LIMIT_FILE_SIZE != 0:
+                            dump_file.write(f"Truncated due to file exceeding {LIMIT_FILE_SIZE}\n"
+                                            "Additional memory regions not included.\n".encode())
+                            break
+                        dump_file.write(metadata_bytes)
+                        total_size += len(metadata_bytes)
                     except Exception as e:
                         log.error(f"Error writing memory region metadata: {str(e)}")
     except psutil.Error as e:
diff --git a/CODE/event_log.py b/CODE/event_log.py
@@ -1,7 +1,6 @@
 import os
 import shutil
 import threading
-from os import mkdir
 
 import wmi  # Import the wmi library
 
@@ -51,6 +50,8 @@ def parse_event_logs(log_type: str, output_file: str):
                 f.write(str(event_data) + '\n\n')
 
         log.info(f"{log_type} events (Windows Events) have been written to {output_file}")
+    except wmi.x_wmi as err:
+        log.error(f"Error opening or reading the event log: {err}")
     except Exception as err:
         log.error(f"Fatal issue: {err}")
 
@@ -59,7 +60,7 @@ def parse_event_logs(log_type: str, output_file: str):
     try:
         if os.path.exists('event_logs'):
             shutil.rmtree('event_logs')
-        mkdir('event_logs')
+        os.mkdir('event_logs')
     except Exception as e:
         log.error(f"Fatal issue: {e}")
         exit(1)
diff --git a/CODE/logicytics/Execute.py b/CODE/logicytics/Execute.py
@@ -6,7 +6,7 @@
 
 class Execute:
     @classmethod
-    def script(cls, script_path: str) -> list[list[str]] | None:
+    def script(cls, script_path: str) -> list[list[str, str]] | None:
         """
         Executes a script file and handles its output based on the file extension.
         Parameters:
diff --git a/CODE/logicytics/Flag.py b/CODE/logicytics/Flag.py
@@ -9,9 +9,6 @@
 from collections import Counter
 from datetime import datetime
 
-import matplotlib.pyplot as plt
-from sentence_transformers import SentenceTransformer, util
-
 # Check if the script is being run directly, if not, set up the library
 if __name__ == '__main__':
     exit("This is a library, Please import rather than directly run.")
@@ -31,11 +28,11 @@
     DEBUG_MODE = config.getboolean("Flag Settings", "model_debug")  # Debug mode for Sentence Transformer
     # File for storing user history data
     HISTORY_FILE = 'logicytics/User_History.json.gz'  # User history file
-    if DEBUG_MODE:
-        print(f"Loading Sentence Transformer model...")
     # Minimum accuracy threshold for flag suggestions
     MIN_ACCURACY_THRESHOLD = float(
         config.get("Flag Settings", "accuracy_min"))  # Minimum accuracy threshold for flag suggestions
+    if not 0 <= MIN_ACCURACY_THRESHOLD <= 100:
+        raise ValueError("accuracy_min must be between 0 and 100")
 
 
 class Match:
@@ -45,7 +42,23 @@ def __get_sim(user_input: str, all_descriptions: list[str]) -> list[float]:
         Get the similarity between the user input and the flag description.
         """
         # Encode the current user input and historical inputs
-        MODEL = SentenceTransformer(config.get("Flag Settings", "model_to_use"))
+        from sentence_transformers import SentenceTransformer, util
+
+        import logging  # Suppress logging messages from Sentence Transformer due to verbosity
+        # Set the logging level based on the debug mode, either DEBUG or ERROR (aka only important messages)
+        if DEBUG_MODE:
+            logging.getLogger("sentence_transformers").setLevel(logging.DEBUG)
+        else:
+            logging.getLogger("sentence_transformers").setLevel(logging.ERROR)
+
+        try:
+            MODEL = SentenceTransformer(config.get("Flag Settings", "model_to_use"))
+        except Exception as e:
+            print(f"Error: {e}")
+            print("Please check the model name in the config file.")
+            print(f"Model name {config.get('Flag Settings', 'model_to_use')} may not be valid.")
+            exit(1)
+
         user_embedding = MODEL.encode(user_input, convert_to_tensor=True, show_progress_bar=DEBUG_MODE)
         historical_embeddings = MODEL.encode(all_descriptions, convert_to_tensor=True, show_progress_bar=DEBUG_MODE)
 
@@ -95,6 +108,8 @@ def _generate_summary_and_graph(cls):
         """Generates a full summary and graph based on user history data."""
         # TODO Yet in beta
         # Load the decompressed history data using the load_history function
+        import matplotlib.pyplot as plt
+
         if not os.path.exists(HISTORY_FILE):
             exit("No history data found.")
 
diff --git a/CODE/logicytics/Logger.py b/CODE/logicytics/Logger.py
@@ -3,6 +3,7 @@
 import inspect
 import logging
 import os
+import time
 from datetime import datetime
 from typing import Type
 
@@ -119,7 +120,7 @@ def __trunc_message(self, message: str) -> str:
 
     def __internal(self, message):
         """
-        Logs an internal message.
+        Logs an internal message. Internal messages are displayed in the console only.
 
         :param message: The internal message to be logged.
         """
@@ -138,6 +139,8 @@ def debug(self, message):
     def raw(self, message):
         """
         Logs a raw message directly to the log file.
+        This should only be called from within the Log class.
+        So do not use this method in your code.
 
         :param message: The raw message to be logged.
         """
@@ -150,7 +153,7 @@ def raw(self, message):
             try:
                 with open(self.filename, "a", encoding="utf-8") as f:
                     f.write(f"{str(message)}\n")
-            except UnicodeDecodeError or UnicodeEncodeError as UDE:
+            except (UnicodeDecodeError, UnicodeEncodeError) as UDE:
                 self.__internal(
                     f"UnicodeDecodeError: {UDE} - Message: {str(message)}"
                 )
@@ -221,7 +224,7 @@ def string(self, message, type: str):
         :param type: The type of the log message.
         """
         if self.color and message != "None" and message is not None:
-            type_map = {"err": "error", "warn": "warning", "crit": "critical"}
+            type_map = {"err": "error", "warn": "warning", "crit": "critical", "except": "exception"}
             type = type_map.get(type.lower(), type)
             try:
                 getattr(self, type.lower())(str(message))
@@ -245,20 +248,27 @@ def exception(self, message, exception_type: Type = Exception):
             )
         raise exception_type(message)
 
-    def parse_execution(self, message_log: list[list[str]]):
+    def parse_execution(self, message_log: list[list[str, str]]):
         """
         Parses and logs a list of messages with their corresponding log types.
+        Only use this method if you have a list of lists where each inner list contains a message and its log type.
+        Use case include "Execute.script()" function.
 
         :param message_log: A list of lists, where each inner list contains a message and its log type.
         """
         if message_log:
             for message_list in message_log:
                 if len(message_list) == 2:
                     self.string(message_list[0], message_list[1])
+                else:
+                    self.__internal(
+                        f"Message List is not in the correct format: {message_list}"
+                    )
 
     def function(self, func: callable):
         """
-        A decorator that logs the execution of a function, including its start time, end time, and elapsed time.
+        A decorator that logs the execution of a function,
+        including its start time, end time, and elapsed time.
 
         :param func: The function to be decorated.
         :return: The wrapper function.
@@ -275,12 +285,12 @@ def wrapper(*args, **kwargs):
             """
             if not callable(func):
                 self.exception(f"Function {func.__name__} is not callable.", TypeError)
-            start_time = datetime.now()
+            start_time = time.perf_counter()
             self.debug(f"Running the function {func.__name__}().")
             result = func(*args, **kwargs)
-            end_time = datetime.now()
+            end_time = time.perf_counter()
             elapsed_time = end_time - start_time
-            self.debug(f"Function {func.__name__}() executed in {elapsed_time}.")
+            self.debug(f"Function {func.__name__}() executed in {elapsed_time:.6f} seconds.")
             return result
 
         return wrapper
diff --git a/CODE/packet_sniffer.py b/CODE/packet_sniffer.py
@@ -248,12 +248,12 @@ def packet_sniffer():
     if packet_count <= 0 or timeout <= 0:
         try:
             log.error(
-                f"Oops! Can't work with these values:\n"
+                "Oops! Can't work with these values:\n"
                 f"- Packet count: {packet_count} {'❌ (must be > 0)' if packet_count <= 0 else '✅'}\n"
                 f"- Timeout: {timeout} {'❌ (must be > 0)' if timeout <= 0 else '✅'}"
             )
         except Exception:
-            log.error(f"Error reading configuration: Improper values for packet count or timeout")
+            log.error("Error reading configuration: Improper values for packet count or timeout")
         exit(1)
 
     try:
@@ -263,8 +263,11 @@ def packet_sniffer():
         if interface == "WiFi" or interface == "Wi-Fi":
             log.warning("Attempting to correct the interface name...")
             interface = "Wi-Fi" if interface == "WiFi" else "WiFi"
-            log.info(f"Interface name corrected to '{interface}'.")
-            start_sniffing(interface, packet_count, timeout)
+            log.info(f"Interface name auto-corrected to '{interface}', retrying packet sniffing...")
+            try:
+                start_sniffing(interface, packet_count, timeout)
+            except Exception as err:
+                log.error(f"Error sniffing packets on auto-corrected interface '{interface}': {err}")
 
 
 # Entry point of the script
diff --git a/CODE/sensitive_data_miner.py b/CODE/sensitive_data_miner.py
@@ -34,14 +34,24 @@ def __search_files_by_keyword(root: Path, keyword: str) -> list:
             list: List of files that match the search criteria.
         """
         matching_files = []
-        for filename in os.listdir(root):
+        path_list = []
+        try:
+            path_list = os.listdir(root)
+        except WindowsError as e:
+            if DEBUG:
+                # Log the error if in debug mode, as it is a common occurrence.
+                log.warning(f"Permission Denied: {e}")
+
+        for filename in path_list:
             file_path = root / filename
             if (
                     keyword.lower() in filename.lower()
                     and file_path.is_file()
                     and file_path.suffix in allowed_extensions
             ):
                 matching_files.append(file_path)
+            else:
+                log.debug(f"Skipped {file_path}, Unsupported due to {file_path.suffix} extension")
         return matching_files
 
     @staticmethod
@@ -79,8 +89,10 @@ def __search_and_copy_files(cls, keyword: str):
 
         with ThreadPoolExecutor() as executor:
             for root, dirs, files in os.walk(drives_root):
-                future_to_file = {executor.submit(cls.__search_files_by_keyword, Path(root), keyword): root_path for
-                                  root_path in dirs}
+                future_to_file = {
+                    executor.submit(cls.__search_files_by_keyword, Path(root) / sub_dir, keyword): sub_dir
+                    for sub_dir in dirs
+                }
                 for future in future_to_file:
                     for file_path in future.result():
                         dst_file_path = destination / file_path.name
@@ -110,5 +122,6 @@ def passwords(cls):
 
 
 if __name__ == "__main__":
-    log.warning("Sensitive Data Miner Started, This may take a while... (aka touch some grass and drink coffee)")
+    log.warning(
+        "Sensitive Data Miner Initialized. Processing may take a while... (Consider a break: coffee or fresh air recommended!)")
     Mine.passwords()
diff --git a/CODE/vulnscan.py b/CODE/vulnscan.py
@@ -170,7 +170,7 @@ def vulnscan(model, SCAN_PATH, vectorizer):
     ]
 
     for base_path in base_paths:
-        for root, dirs, files_main in os.walk(base_path):
+        for root, _, files_main in os.walk(base_path):
             for file_main in files_main:
                 paths.append(os.path.join(root, file_main))
 
diff --git a/PLANS.md b/PLANS.md
@@ -8,6 +8,7 @@
 | Task                                                                                         | Version | Might or Will be done? |
 |----------------------------------------------------------------------------------------------|---------|------------------------|
 | Remove deprecated feature: `_generate_data.py`                                               | v3.4.0  | ✅                      |
+| New feature: Psutil Network functions, most likely `net_info.py`                             | v3.4.0  | ✅                      |
 | Implement the 2 missing flags                                                                | v3.5.0  | ✅                      |
 | Move VulnScan tools and v3 module to separate repository, keep only the model and vectorizer | v3.5.0  | ✅                      |
 | Encrypted Volume Detection and Analysis, Advanced USB Device History Tracker                 | v3.6.0  | ✅                      |
diff --git a/SECURITY.md b/SECURITY.md
@@ -4,25 +4,25 @@
 
 This section outlines the versions of our project that are currently supported with security updates.
 
-| Version | Supported |
-|---------|-----------|
-| 3.3.x   | ✅         |
-| 3.2.x   | ✅         |
-| 3.1.x   | ⚠️        |
-| 3.0.x   | ⚠️        |
-| 2.5.x   | ✖️        |
-| 2.4.x   | ✖️        |
-| 2.3.x   | ✖️        |
-| 2.2.x   | ✖️        |
-| 2.1.x   | ✖️        |
-| 2.0.x   | ✖️        |
-| 1.6.x   | ❌         |
-| 1.5.x   | ❌         |
-| 1.4.x   | ❌         |
-| 1.3.x   | ❌         |
-| 1.2.x   | ❌         |
-| 1.1.x   | ❌         |
-| 1.0.x   | ❌         |
+| Version | Supported | Release Date    |
+|---------|-----------|-----------------|
+| 3.3.x   | ✅         | January 3, 2025 |
+| 3.2.x   | ✅         | Dec 19, 2024    |
+| 3.1.x   | ⚠️        | Dec 11, 2024    |
+| 3.0.x   | ⚠️        | Dec 6, 2024     |
+| 2.5.x   | ✖️        | Nov 25, 2024    |
+| 2.4.x   | ✖️        | Nov 12, 2024    |
+| 2.3.x   | ✖️        | Sep 21, 2024    |
+| 2.2.x   | ✖️        | Sep 9, 2024     |
+| 2.1.x   | ✖️        | Aug 29, 2024    |
+| 2.0.x   | ✖️        | Aug 25, 2024    |
+| 1.6.x   | ❌         | Jun 18, 2024    |
+| 1.5.x   | ❌         | Jun 10, 2024    |
+| 1.4.x   | ❌         | May 30, 2024    |
+| 1.3.x   | ❌         | May 21, 2024    |
+| 1.2.x   | ❌         | May 16, 2024    |
+| 1.1.x   | ❌         | May 10, 2024    |
+| 1.0.x   | ❌         | May 4, 2024     |
 
 
 ### Key:
diff --git a/requirements.txt b/requirements.txt

Original file line number	Diff line number	Diff line change
`@@ -170,7 +170,7 @@ def vulnscan(model, SCAN_PATH, vectorizer):`
`170`	`170`	`]`
`171`	`171`
`172`	`172`	`for base_path in base_paths:`
`173`		`- for root, dirs, files_main in os.walk(base_path):`
	`173`	`+ for root, _, files_main in os.walk(base_path):`
`174`	`174`	`for file_main in files_main:`
`175`	`175`	`paths.append(os.path.join(root, file_main))`
`176`	`176`