diff --git a/.github/config.yml b/.github/config.yml
index 738f3c05..3ad48885 100644
--- a/.github/config.yml
+++ b/.github/config.yml
@@ -15,7 +15,7 @@ requestInfoDefaultTitles:
- update
# *OPTIONAL* Label to be added to Issues and Pull Requests with insufficient information given
-requestInfoLabelToAdd: progress:Invalid
+requestInfoLabelToAdd: issue/Invalid
# *OPTIONAL* Require Issues to contain more information than what is provided in the issue templates
# Will fail if the issue's body is equal to a provided template
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
deleted file mode 100644
index 86f165e6..00000000
--- a/.github/workflows/codeql.yml
+++ /dev/null
@@ -1,78 +0,0 @@
-# For most projects, this workflow file will not need changing; you simply need
-# to commit it to your repository.
-#
-# You may wish to alter this file to override the set of languages analyzed,
-# or to provide custom queries or build logic.
-#
-# ******** NOTE ********
-# We have attempted to detect the languages in your repository. Please check
-# the `language` matrix defined below to confirm you have the correct set of
-# supported CodeQL languages.
-#
-name: "CodeQL"
-
-on:
- push:
- branches: ["main"]
- pull_request:
- # The branches below must be a subset of the branches above
- branches: ["main"]
- schedule:
- - cron: "0 0 * * 1"
-
-permissions:
- contents: read
-
-jobs:
- analyze:
- name: Analyze
- runs-on: ubuntu-latest
- permissions:
- actions: read
- contents: read
- security-events: write
-
- strategy:
- fail-fast: false
- matrix:
- language: ["python"]
- # CodeQL supports [ $supported-codeql-languages ]
- # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
-
- steps:
- - name: Harden Runner
- uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
- with:
- egress-policy: audit
-
- - name: Checkout repository
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
- # Initializes the CodeQL tools for scanning.
- - name: Initialize CodeQL
- uses: github/codeql-action/init@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d # v3.28.10
- with:
- languages: ${{ matrix.language }}
- # If you wish to specify custom queries, you can do so here or in a config file.
- # By default, queries listed here will override any specified in a config file.
- # Prefix the list here with "+" to use these queries and those in the config file.
-
- # Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
- # If this step fails, then you should remove it and run the build manually (see below)
- - name: Autobuild
- uses: github/codeql-action/autobuild@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d # v3.28.10
-
- # âšī¸ Command-line programs to run using the OS shell.
- # đ See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
-
- # If the Autobuild fails above, remove it and uncomment the following three lines.
- # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
-
- # - run: |
- # echo "Run, Build Application using script"
- # ./location_of_script_within_repo/buildscript.sh
-
- - name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d # v3.28.10
- with:
- category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml
deleted file mode 100644
index 4a15a1df..00000000
--- a/.github/workflows/dependency-review.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Dependency Review Action
-#
-# This Action will scan dependency manifest files that change as part of a Pull Request,
-# surfacing known-vulnerable versions of the packages declared or updated in the PR.
-# Once installed, if the workflow run is marked as required,
-# PRs introducing known-vulnerable packages will be blocked from merging.
-#
-# Source repository: https://github.com/actions/dependency-review-action
-name: 'Dependency Review'
-on: [pull_request]
-
-permissions:
- contents: read
-
-jobs:
- dependency-review:
- runs-on: ubuntu-latest
- steps:
- - name: Harden Runner
- uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
- with:
- egress-policy: audit
-
- - name: 'Checkout Repository'
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- - name: 'Dependency Review'
- uses: actions/dependency-review-action@3b139cfc5fae8b618d3eae3675e383bb1769c019 # v4.5.0
diff --git a/.github/workflows/greetings.yml b/.github/workflows/greetings.yml
index 794f83e4..9d4a3ede 100644
--- a/.github/workflows/greetings.yml
+++ b/.github/workflows/greetings.yml
@@ -7,7 +7,7 @@ permissions:
jobs:
greeting:
- runs-on: ubuntu-latest
+ runs-on: windows-latest
permissions:
issues: write
pull-requests: write
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index d979ed4d..1d56287e 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -15,7 +15,7 @@ permissions:
jobs:
stale:
- runs-on: ubuntu-latest
+ runs-on: windows-latest
permissions:
issues: write
pull-requests: write
diff --git a/.idea/csv-editor.xml b/.idea/csv-editor.xml
new file mode 100644
index 00000000..487ddc99
--- /dev/null
+++ b/.idea/csv-editor.xml
@@ -0,0 +1,16 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/CODE/Logicytics.py b/CODE/Logicytics.py
index bbe11631..bd770446 100644
--- a/CODE/Logicytics.py
+++ b/CODE/Logicytics.py
@@ -490,8 +490,6 @@ def handle_sub_action():
subprocess.call("shutdown /s /t 3", shell=False)
elif SUB_ACTION == "reboot":
subprocess.call("shutdown /r /t 3", shell=False)
- # elif sub_action == "webhook":
- # TODO: Implement this in future v3.5
@log.function
diff --git a/CODE/VulnScan/README.md b/CODE/VulnScan/README.md
deleted file mode 100644
index 8c463bf3..00000000
--- a/CODE/VulnScan/README.md
+++ /dev/null
@@ -1,148 +0,0 @@
-# VulnScan Documentation
-
-> [!WARNING]
-> # VulnScan Migration Notice đ¨
->
-> VulnScan is moving to a new home! Here's what you need to know:
-> - This repository section will be archived
-> - A link to the new repository will be added here
-> - The `pkl` and `pth` files will stay here (they'll work with `vulnscan.py`)
-> - VulnScan itself will NOT be moved to the other repository
->
-> This will occur in version `3.2.0` of Logicytics.
-
-## Overview
-
-VulnScan is designed to detect sensitive data across various file formats. It offers a modular framework to train models using diverse algorithms, from traditional ML classifiers to advanced Neural Networks. This document outlines the system's naming conventions, lifecycle, and model configuration.
-
-> The model that is being used is `Model SenseMini 3n3` with a vectorizer from `tools/_vectorizer.py` (Used the random dataset)
-
----
-
-## Naming Conventions
-
-### Model Naming Format
-`Model {Type of model} .{Version}`
-
-- **Type of Model**: Describes the training data configuration.
- - `Sense`: Sensitive data set with 50k files, each 50KB in size.
- - `SenseNano`: Test set with 5-10 files, each 5KB, used for error-checking.
- - `SenseMacro`: Large dataset with 1M files, each 10KB. This is computationally intensive, so some corners were cut in training.
- - `SenseMini`: Dataset with 10K files, each between 10-200KB. Balanced size for effective training and resource efficiency.
-
-- **Version Format**: `{Version#}{c}{Repeat#}`
- - **Version#**: Increment for major code updates.
- - **c**: Model identifier (e.g., NeuralNetwork, BERT, etc.). See below for codes.
- - **Repeat#**: Number of times the same model was trained without significant code changes, used to improve consistency.
- - **-F**: Denotes a failed model or a corrupted model.
-
-### Model Identifiers
-
-| Code | Model Type |
-|------|---------------------------|
-| `b` | BERT |
-| `dt` | DecisionTree |
-| `et` | ExtraTrees |
-| `g` | GBM |
-| `l` | LSTM |
-| `n` | NeuralNetwork (preferred) |
-| `nb` | NaiveBayes |
-| `r` | RandomForestClassifier |
-| `lr` | Logistic Regression |
-| `v` | SupportVectorMachine |
-| `x` | XGBoost |
-
-### Example
-`Model Sense .1n2`:
-- Dataset: `Sense` (50k files, 50KB each).
-- Version: 1 (first major version).
-- Model: `NeuralNetwork` (`n`).
-- Repeat Count: 2 (second training run with no major code changes).
-
----
-
-## Life Cycle Phases
-
-### Version 1 (Deprecated)
-- **Removed**: Small and weak codebase, replaced by `v3`.
-
-1. Generate data.
-2. Index paths.
-3. Read paths.
-4. Train models and iterate through epochs.
-5. Produce outputs: data, graphs, and `.pkl` files.
-
----
-
-### Version 2 (Deprecated)
-- **Deprecation Reason**: Outdated methods for splitting and vectorizing data.
-
-1. Load Data.
-2. Split Data.
-3. Vectorize Text.
-4. Initialize Model.
-5. Train Model.
-6. Evaluate Model.
-7. Save Model.
-8. Track Progress.
-
----
-
-### Version 3 (Current)
-1. **Read Config**: Load model and training parameters.
-2. **Load Data**: Collect and preprocess sensitive data.
-3. **Split Data**: Separate into training and validation sets.
-4. **Vectorize Text**: Transform textual data using `TfidfVectorizer`.
-5. **Initialize Model**: Define traditional ML or Neural Network models.
-6. **Train Model**: Perform iterative training using epochs.
-7. **Validate Model**: Evaluate with metrics and generate classification reports.
-8. **Save Model**: Persist trained models and vectorizers for reuse.
-9. **Track Progress**: Log and visualize accuracy and loss trends over epochs.
-
----
-
-## Preferred Model
-**NeuralNetwork (`n`)**
-- Proven to be the most effective for detecting sensitive data in the project.
-
----
-
-## Notes
-- **Naming System**: Helps track model versions, datasets, and training iterations for transparency and reproducibility.
-- **Current Focus**: Transition to `v3` for improved accuracy, flexibility, and robust performance.
-
----
-
-## Additional Features
-
-- **Progress Tracking**: Visualizes accuracy and loss per epoch with graphs.
-- **Error Handling**: Logs errors for missing files, attribute issues, or unexpected conditions.
-- **Extensibility**: Supports plug-and-play integration for new algorithms or datasets.
-
-
-# More files
-
-There is a repository that archived all the data used to make the model,
-as well as previously trained models for you to test out
-(loading scripts and vectorizers are not included).
-
-The repository is located [here](https://github.com/DefinetlyNotAI/VulnScan_TrainingData).
-
-The repository contains the following directories:
-- `Archived Models`: Contains the previously trained models. Is organized by the model type then version.
-- `NN features`: Contains information about the model `.3n3` and the vectorizer used. Information include:
- - `Documentation_Study_Network.md`: A markdown file that contains more info.
- - `Neural Network Nodes Graph.gexf`: A Gephi file that contains the model nodes and edges.
- - `Nodes and edges (GEPHI).csv`: A CSV file that contains the model nodes and edges.
- - `Statistics`: Directories made by Gephi, containing the statistics of the model nodes and edges.
- - `Feature_Importance.svg`: A SVG file that contains the feature importance of the model.
- - `Loss_Landscape_3D.html`: A HTML file that contains the 3D loss landscape of the model.
- - `Model Accuracy Over Epochs.png` and `Model Loss Over Epochs.png`: PNG files that contain the model accuracy and loss over epochs.
- - `Model state dictionary.txt`: A text file that contains the model state dictionary.
- - `Model Summary.txt`: A text file that contains the model summary.
- - `Model Visualization.png`: A PNG file that contains the model visualization.
- - `Top_90_Features.svg`: A SVG file that contains the top 90 features of the model.
- - `Vectorizer features.txt`: A text file that contains the vectorizer features.
- - `Visualize Activation.png`: A PNG file that contains the visualization of the model activation.
- - `Visualize t-SNE.png`: A PNG file that contains the visualization of the model t-SNE.
- - `Weight Distribution.png`: A PNG file that contains the weight distribution of the model.
diff --git a/CODE/VulnScan/tools/_study_network.py b/CODE/VulnScan/tools/_study_network.py
deleted file mode 100644
index 907c8576..00000000
--- a/CODE/VulnScan/tools/_study_network.py
+++ /dev/null
@@ -1,624 +0,0 @@
-from __future__ import annotations
-
-import os
-import os.path
-import random
-from collections import OrderedDict
-from configparser import ConfigParser
-from os import mkdir
-from typing import Any
-
-import joblib
-import matplotlib.pyplot as plt
-import networkx as nx
-import numpy as np
-import plotly.graph_objects as go
-import seaborn as sns
-import torch
-import torch.nn as nn
-from faker import Faker
-from numpy import ndarray, dtype
-from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
-from sklearn.manifold import TSNE
-from torch import device
-from torch.utils.data import DataLoader, TensorDataset
-from torchviz import make_dot
-from tqdm import tqdm
-
-
-# Example of DataLoader for loss landscape (dummy dataset for visualization)
-class DummyDataset(torch.utils.data.Dataset):
- """
- A dummy dataset for generating synthetic data for visualization purposes.
-
- Attributes:
- num_samples (int): Number of samples in the dataset.
- input_dim (int): Dimension of the input data.
- data (list): List of generated data samples.
- labels (list): List of labels corresponding to the data samples.
- """
-
- def __init__(self, num_samples: int = 100, input_dim: int = 10000):
- """
- Initializes the DummyDataset with the specified number of samples and input dimension.
-
- Args:
- num_samples (int): Number of samples to generate.
- input_dim (int): Dimension of the input data.
- """
- self.num_samples = num_samples
- self.input_dim = input_dim
- self.data: list[str] = []
- self.labels: list[int] = []
- faker = Faker()
- for _ in range(num_samples):
- if random.random() < 0.05: # 5% chance to include sensitive data
- self.data.append(f"Name: {faker.name()}, SSN: {faker.ssn()}, Address: {faker.address()}")
- self.labels.append(1) # Label as sensitive
- else:
- self.data.append(faker.text(max_nb_chars=100)) # Non-sensitive data
- self.labels.append(0) # Label as non-sensitive
-
- def __len__(self) -> int:
- """
- Returns the number of samples in the dataset.
-
- Returns:
- int: Number of samples in the dataset.
- """
- return self.num_samples
-
- def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]:
- """
- Retrieves the data and label at the specified index.
-
- Args:
- idx (int): Index of the data and label to retrieve.
-
- Returns:
- tuple: A tuple containing the data tensor and label tensor.
- """
- data = self.data[idx]
- label = self.labels[idx]
- # Convert data to tensor of ASCII values and pad to input_dim
- data_tensor = torch.tensor([ord(c) for c in data], dtype=torch.float32)
- if len(data_tensor) < self.input_dim:
- padding = torch.zeros(self.input_dim - len(data_tensor))
- data_tensor = torch.cat((data_tensor, padding))
- else:
- data_tensor = data_tensor[:self.input_dim]
- label_tensor = torch.tensor(label, dtype=torch.long)
- return data_tensor, label_tensor
-
-
-def load_data(text_data: list[str], vectorizer_to_load: TfidfVectorizer | CountVectorizer) -> DataLoader:
- """
- Vectorizes the text data and creates a DataLoader for it.
-
- Args:
- text_data (list of str): The text data to be vectorized.
- vectorizer_to_load: The vectorizer to use for transforming the text data.
-
- Returns:
- DataLoader: A DataLoader containing the vectorized text data and dummy labels.
- """
- # Vectorize the text data
- X = vectorizer_to_load.transform(text_data)
- # Create a dummy label for visualization (replace with real labels if available)
- y = np.zeros(len(text_data))
- # Convert to torch tensors
- X_tensor = torch.tensor(X.toarray(), dtype=torch.float32)
- y_tensor = torch.tensor(y, dtype=torch.long)
- dataset = TensorDataset(X_tensor, y_tensor)
- return DataLoader(dataset, batch_size=32, shuffle=True)
-
-
-def visualize_weight_distribution(model_to_load: torch.nn.Module):
- # Access weights of the first layer
- weights = model_to_load[0].weight.detach().cpu().numpy() # Move tensor to CPU before conversion to numpy
- plt.hist(weights.flatten(), bins=50)
- plt.title("Weight Distribution - First Layer")
- plt.xlabel("Weight Value")
- plt.ylabel("Frequency")
- plt.savefig("NN features/Weight Distribution.png")
- plt.close()
-
-
-def visualize_activations(model_to_load: torch.nn.Module, input_tensor: torch.Tensor):
- # Check the device of the model
- device_va = next(model_to_load.parameters()).device
-
- # Move the input tensor to the same device as the model
- input_tensor = input_tensor.to(device_va)
-
- activations = []
-
- # noinspection PyUnusedLocal
- def hook_fn(module, inputx, output):
- # Hook function to extract intermediate layer activations
- activations.append(output)
-
- model_to_load[0].register_forward_hook(hook_fn) # Register hook on first layer
-
- # Perform a forward pass
- _ = model_to_load(input_tensor)
- activation = activations[0].detach().cpu().numpy() # Move activations to CPU
-
- # Plot activations as a bar chart
- plt.figure(figsize=(10, 6))
- plt.bar(range(len(activation[0])), activation[0])
- plt.title("Activation Values - First Layer")
- plt.xlabel("Neuron Index")
- plt.ylabel("Activation Value")
- plt.savefig("NN features/Visualize Activation.png")
- plt.close()
-
-
-def visualize_tsne(model_to_load: torch.nn.Module, dataloader: DataLoader):
- # Get the device of the model
- device_va = next(model_to_load.parameters()).device
-
- model_to_load.eval() # Set the model to evaluation mode
-
- features = []
- labels = []
-
- with torch.no_grad():
- for data, target in dataloader:
- # Move data and target to the same device as the model
- data, target = data.to(device_va), target.to(device_va)
-
- # Extract features (output of the model)
- output = model_to_load(data)
- features.append(output.cpu().numpy()) # Move output to CPU for concatenation
- labels.append(target.cpu().numpy()) # Move target to CPU for concatenation
-
- # Stack all batches
- features = np.vstack(features)
- labels = np.hstack(labels)
-
- # Determine suitable perplexity
- num_samples = features.shape[0]
- perplexity = min(30, num_samples - 1) # Ensure perplexity < num_samples
-
- # Apply t-SNE
- tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity)
- reduced_features = tsne.fit_transform(features)
-
- # Plot the t-SNE results
- plt.figure(figsize=(10, 8))
- scatter = plt.scatter(reduced_features[:, 0], reduced_features[:, 1], c=labels, cmap='viridis', alpha=0.7)
- plt.colorbar(scatter, label="Class")
- plt.title("t-SNE Visualization of Features")
- plt.xlabel("t-SNE Dimension 1")
- plt.ylabel("t-SNE Dimension 2")
- plt.savefig("NN features/Visualize t-SNE.png")
- plt.close()
-
-
-# Main function to run all visualizations
-def plot_many_graphs():
- print("Starting synthetic data generation...")
- # Load data
- faker = Faker()
-
- # Generate sensitive examples
- sensitive_data = [
- f"Name: {faker.name()}, SSN: {faker.ssn()}, Address: {faker.address()}",
- f"Credit Card: {faker.credit_card_number()}, Expiry: {faker.credit_card_expire()}, CVV: {faker.credit_card_security_code()}",
- f"Patient: {faker.name()}, Condition: {faker.text(max_nb_chars=20)}",
- f"Password: {faker.password()}",
- f"Email: {faker.email()}",
- f"Phone: {faker.phone_number()}",
- f"Medical Record: {faker.md5()}",
- f"Username: {faker.user_name()}",
- f"IP: {faker.ipv4()}",
- ]
-
- # Generate non-sensitive examples
- non_sensitive_data = [
- faker.text(max_nb_chars=50) for _ in range(50000)
- ]
-
- data_text = non_sensitive_data + (sensitive_data * 15)
- random.shuffle(data_text)
- print("Loaded data for visualization.")
- dataloader = load_data(data_text, vectorizer)
-
- # Visualizations
- print("Creating visualizations...")
- visualize_weight_distribution(model)
-
- # For activations, use a sample from the dataloader
- print("Creating activation visualizations...")
- sample_input = next(iter(dataloader))[0]
- visualize_activations(model, sample_input)
-
- print("Creating t-SNE visualization - May take a long time...")
- visualize_tsne(model, dataloader)
-
- print("Completed.")
-
-
-# Visualize feature importance (dummy example for visualization) and save as SVG
-def visualize_feature_importance(TOKENS: list[str], FEATURE_IMPORTANCE: float | ndarray[Any, dtype[np.floating]],
- FILENAME: str = "Plot.svg"):
- # Limit the number of tokens to visualize
- TOKENS = TOKENS[:1000]
- FEATURE_IMPORTANCE = FEATURE_IMPORTANCE[:1000]
-
- plt.figure(figsize=(len(TOKENS) * 0.5, 6))
- sns.barplot(x=TOKENS, y=FEATURE_IMPORTANCE, palette="coolwarm", hue=TOKENS, legend=False)
- plt.title("Feature Importance")
- plt.xlabel("Tokens")
- plt.ylabel("Importance")
- plt.xticks(rotation=45)
- plt.savefig(FILENAME, format="svg")
- plt.close() # Close the plot to release memory
-
-
-# Function to visualize the loss landscape as an interactive 3D object
-def plot_loss_landscape_3d(MODEL: torch.nn.Module, DATA_LOADER: DataLoader, CRITERION: torch.nn.Module,
- GRID_SIZE: int = 200, EPSILON: float = 0.01, FILENAME: str = "Plot.html"):
- MODEL.eval() # Set model to evaluation mode
- param = next(MODEL.parameters()) # Use the first parameter for landscape perturbations
- param_flat = param.view(-1)
-
- # Define perturbation directions u and v
- u = torch.randn_like(param_flat).view(param.shape).to(param.device)
- v = torch.randn_like(param_flat).view(param.shape).to(param.device)
-
- # Normalize perturbations
- u = EPSILON * u / torch.norm(u)
- v = EPSILON * v / torch.norm(v)
-
- # Create grid
- x = np.linspace(-1, 1, GRID_SIZE)
- y = np.linspace(-1, 1, GRID_SIZE)
- loss_values = np.zeros((GRID_SIZE, GRID_SIZE))
-
- # Iterate through the grid to compute losses
- for i, dx in enumerate(x):
- print(f"Computing loss for row {i + 1}/{GRID_SIZE}...")
- for j, dy in enumerate(y):
- print(f" Computing loss for column {j + 1}/{GRID_SIZE}...")
- param.data += dx * u + dy * v # Apply perturbation
- loss = 0
-
- # Compute loss for all batches in data loader
- for batch in DATA_LOADER:
- inputs, targets = batch
- inputs = inputs.to(param.device)
- targets = targets.to(param.device)
- outputs = MODEL(inputs)
- loss += CRITERION(outputs, targets).item()
-
- loss_values[i, j] = loss # Store the loss
- param.data -= dx * u + dy * v # Revert perturbation
-
- # Create a meshgrid for plotting
- X, Y = np.meshgrid(x, y)
-
- # Plot the 3D surface using Plotly
- fig = go.Figure(data=[go.Surface(z=loss_values, x=X, y=Y, colorscale="Viridis")])
- fig.update_layout(
- title="Loss Landscape (Interactive 3D)",
- scene=dict(
- xaxis_title="Perturbation in u",
- yaxis_title="Perturbation in v",
- zaxis_title="Loss",
- ),
- )
-
- # Save as an interactive HTML file
- fig.write_html(FILENAME)
- print(f"3D loss landscape saved as {FILENAME}")
-
-
-def main_plot():
- # Instantiate data loader
- print("Creating dummy data loader...")
- dummy_data_loader = DataLoader(DummyDataset(), batch_size=32)
-
- # Define loss criterion
- print("Defining loss criterion...")
- criterion = torch.nn.CrossEntropyLoss()
-
- # Visualizations
- print("Creating visualizations...")
- tokens = vectorizer.get_feature_names_out()
-
- # Feature importance
- # Max number of features to visualize is 3000 due to image constraints
- print(
- f"Visualizing feature importance - This may take a while for {len(tokens[:NUMBER_OF_FEATURES]) + 1} tokens...")
- feature_importance = np.random.rand(len(tokens[:NUMBER_OF_FEATURES])) # Example random importance
- visualize_feature_importance(tokens[:NUMBER_OF_FEATURES], feature_importance,
- FILENAME="NN features/feature_importance.svg")
-
- # Loss landscape
- print("Visualizing loss landscape - This may take a while...")
- plot_loss_landscape_3d(model, dummy_data_loader, criterion, FILENAME="NN features/loss_landscape_3d.html")
-
- # Set model to evaluation mode, and plot many graphs
- print("Setting model to evaluation mode...")
- model.eval() # Set the model to evaluation mode
- plot_many_graphs()
-
-
-def save_data(model_to_use: torch.nn.Module, input_size: tuple[int, Any] | int, batch_size: int = -1,
- device_to_use: str = "cuda"):
- def register_hook(module: torch.nn.Module):
-
- def hook(modules: torch.nn.Module, inputs: (torch.nn.Module, tuple[torch.Tensor]), output: torch.Tensor):
- class_name = str(modules.__class__).split(".")[-1].split("'")[0]
- module_idx = len(summaries)
-
- m_key = "%s-%i" % (class_name, module_idx + 1)
- summaries[m_key] = OrderedDict()
- summaries[m_key]["input_shape"] = list(inputs[0].size())
- summaries[m_key]["input_shape"][0] = batch_size
- if isinstance(output, (list, tuple)):
- summaries[m_key]["output_shape"] = [
- [-1] + list(o.size())[1:] for o in output
- ]
- else:
- summaries[m_key]["output_shape"] = list(output.size())
- summaries[m_key]["output_shape"][0] = batch_size
-
- params = 0
- if hasattr(modules, "weight") and hasattr(modules.weight, "size"):
- params += torch.prod(torch.LongTensor(list(modules.weight.size())))
- summaries[m_key]["trainable"] = modules.weight.requires_grad
- if hasattr(modules, "bias") and hasattr(modules.bias, "size"):
- params += torch.prod(torch.LongTensor(list(modules.bias.size())))
- summaries[m_key]["nb_params"] = params
-
- if (
- not isinstance(module, nn.Sequential)
- and not isinstance(module, nn.ModuleList)
- and not (module == model_to_use)
- ):
- hooks.append(module.register_forward_hook(hook))
-
- device_to_use = device_to_use.lower()
- assert device_to_use in [
- "cuda",
- "cpu",
- ], "Input device is not valid, please specify 'cuda' or 'cpu'"
-
- if device_to_use == "cuda" and torch.cuda.is_available():
- dtype_to_use = torch.cuda.FloatTensor
- else:
- dtype_to_use = torch.FloatTensor
-
- # multiple inputs to the network
- if isinstance(input_size, tuple):
- input_size = [input_size]
-
- # batch_size of 2 for batch norm
- x = [torch.rand(2, *in_size).type(dtype_to_use) for in_size in input_size]
-
- # create properties
- summaries = OrderedDict()
- hooks = []
-
- # register hook
- model_to_use.apply(register_hook)
-
- # make a forward pass
- model_to_use(*x)
-
- # remove these hooks
- for h in hooks:
- h.remove()
-
- # Save the summary
- mode = "a" if os.path.exists("NN features/Model Summary.txt") else "w"
- with open('NN features/Model Summary.txt', mode) as vf_ms:
- vf_ms.write("----------------------------------------------------------------\n")
- line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
- vf_ms.write(f"{line_new}\n")
- vf_ms.write("================================================================\n")
- total_params = 0
- total_output = 0
- trainable_params = 0
- for layer in summaries:
- # input_shape, output_shape, trainable, nb_params
- line_new = "{:>20} {:>25} {:>15}".format(
- layer,
- str(summaries[layer]["output_shape"]),
- "{0:,}".format(summaries[layer]["nb_params"]),
- )
- total_params += summaries[layer]["nb_params"]
- total_output += np.prod(summaries[layer]["output_shape"])
- if "trainable" in summaries[layer]:
- if summaries[layer]["trainable"]:
- trainable_params += summaries[layer]["nb_params"]
- vf_ms.write(f"{line_new}\n")
-
- # assume 4 bytes/number (float on cuda).
- total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.))
- total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients
- total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))
- total_size = total_params_size + total_output_size + total_input_size
-
- vf_ms.write("\n================================================================")
- vf_ms.write("\nTotal params: {0:,}".format(total_params))
- vf_ms.write("\nTrainable params: {0:,}".format(trainable_params))
- vf_ms.write("\nNon-trainable params: {0:,}".format(total_params - trainable_params))
- vf_ms.write("\n----------------------------------------------------------------")
- vf_ms.write("\nInput size (MB): %0.2f" % total_input_size)
- vf_ms.write("\nForward/backward pass size (MB): %0.2f" % total_output_size)
- vf_ms.write("\nParams size (MB): %0.2f" % total_params_size)
- vf_ms.write("\nEstimated Total Size (MB): %0.2f" % total_size)
- vf_ms.write("\n----------------------------------------------------------------\n")
-
-
-def save_graph():
- # Create a directed graph
- G = nx.DiGraph()
-
- def add_edges_bulk(layer_names: str, weight_matrices: np.ndarray[np.float32]):
- """Efficiently add edges to the graph with progress tracking."""
- threshold = 0.1 # Adjust this threshold as needed
- significant_weights = np.abs(weight_matrices) > threshold
- rows, cols = np.where(significant_weights)
- weights = weight_matrices[rows, cols]
-
- # Use tqdm for progress tracking
- edge_count = len(rows)
- with tqdm(total=edge_count, desc=f"Processing {layer_names}", unit="edges") as pbar:
- for row, col, weight in zip(rows, cols, weights):
- in_node = f"{layer_names}_in_{col}"
- out_node = f"{layer_names}_out_{row}"
- G.add_edge(in_node, out_node, weight=weight)
- pbar.update(1)
-
- # Process parameters
- for name, param in model.named_parameters():
- if 'weight' in name:
- layer_name = name.split('.')[0]
- weight_matrix = param.data.cpu().numpy()
-
- # Add edges with progress bar
- add_edges_bulk(layer_name, weight_matrix)
-
- # Draw the graph
- print("Writing the graph to a file...")
- nx.write_gexf(G, "NN features/Neural Network Nodes Graph.gexf")
-
-
-def setup_environment():
- print("Visualizing the model and vectorizer features...")
- print("This may take a while, please wait.")
-
- if not os.path.exists('NN features'):
- mkdir('NN features')
-
-
-def load_vectorizer():
- vectorizer_load = joblib.load(vectorizer_path)
- feature_names = vectorizer_load.get_feature_names_out()
- with open('NN features/Vectorizer features.txt', 'w') as file:
- file.write(f"Number of features: {len(feature_names)}\n\n")
- file.write('\n'.join(feature_names))
- return vectorizer_load
-
-
-def visualize_top_features(top_n: int = 90):
- feature_names = vectorizer.get_feature_names_out()
- sorted_indices = vectorizer.idf_.argsort()[:top_n]
- top_features = [feature_names[i] for i in sorted_indices]
- top_idf_scores = vectorizer.idf_[sorted_indices]
-
- plt.figure(figsize=(20, 12)) # Increase the figure size
- sns.barplot(x=top_idf_scores, y=top_features)
- plt.title('Top 90 Features by IDF Score')
- plt.xlabel('IDF Score')
- plt.ylabel('Feature')
-
- # Save the plot as a vector graphic
- plt.savefig('NN features/Top_90_Features.svg', format='svg')
- plt.close()
-
-
-def load_model() -> tuple[Any, device]:
- device_load = torch.device("cuda" if torch.cuda.is_available() else "cpu")
- model_load = torch.load(model_path, weights_only=False)
- model_load.to(device_load)
- return model_load, device_load
-
-
-def save_model_state_dict():
- with open('NN features/Model state dictionary.txt', 'w') as file:
- file.write("Model's state dictionary:\n\n")
- for param_tensor in model.state_dict():
- file.write(f"\n{param_tensor}\t{model.state_dict()[param_tensor].size()}")
-
-
-def generate_model_visualization():
- dummy_input = torch.randn(1, vectorizer.vocabulary_.__len__()).to(device)
- model_viz = make_dot(model(dummy_input), params=dict(model.named_parameters()), show_attrs=True, show_saved=True)
- model_viz.format = 'png'
- model_viz.render(filename='NN features/Model Visualization', format='png')
-
-
-def cleanup_temp_files():
- if os.path.exists("NN features/Model Visualization"):
- os.remove("NN features/Model Visualization")
-
-
-def model_summary():
- mode = "a" if os.path.exists("NN features/Model Summary.txt") else "w"
- with open("NN features/Model Summary.txt", mode) as file:
- file.write(str(model))
-
-
-if __name__ == '__main__':
- # Print the welcome message
- print("===========================================================================================")
- print("= This script will visualize the features of the model and vectorizer. =")
- print("= Please ensure that the model and vectorizer files are present in the specified paths. =")
- print("= The visualization will be saved in the 'NN features' directory. =")
- print("= This script will take a while to run, please be patient. =")
- print("===========================================================================================")
-
- # Read the config file
- print("\n\nReading config file and setting up...")
- config = ConfigParser()
- config.read('../../config.ini')
-
- setup_environment()
-
- # Load the paths from the config file
- vectorizer_path = config.get('VulnScan.study Settings', 'vectorizer_path')
- model_path = config.get('VulnScan.study Settings', 'model_path')
- NUMBER_OF_FEATURES = int(config.get('VulnScan.study Settings', 'number_of_features'))
-
- # Check if the paths exist
- if not os.path.exists(vectorizer_path):
- print(f"Vectorizer file not found. Please double check the path {vectorizer_path}.")
- exit(1)
- if not os.path.exists(model_path):
- print(f"Model file not found. Please double check the path {model_path}.")
- exit(1)
-
- # Load the vectorizer and model
- vectorizer = load_vectorizer()
- visualize_top_features()
- model, device = load_model()
- # Save the model summary, state dictionary, and visualization
- save_data(model, input_size=(1, vectorizer.vocabulary_.__len__()))
- save_model_state_dict()
- generate_model_visualization()
- cleanup_temp_files()
- save_graph()
- print("Model visualization and summary have been saved to the 'NN features' directory.")
-
- # Check if GPU is available
- if not os.path.exists('NN features'):
- os.mkdir('NN features')
-
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
- print(f"Using device: {device}")
-
- # Load vectorizer (change the path to your vectorizer .pkl file)
- vectorizer_path = "../Vectorizer .3n3.pkl"
- model_path = "../Model SenseMini .3n3.pth"
-
- # Load vectorizer
- print(f"Reloading vectorizer from: {vectorizer_path}")
- with open(vectorizer_path, "rb") as f:
- vectorizer = joblib.load(f)
-
- # Load model and move to the appropriate device (GPU/CPU)
- print(f"Reloading model from: {model_path}")
- model = torch.load(model_path, weights_only=False)
- model.to(device) # Move model to GPU or CPU
-
- model_summary()
- main_plot()
-else:
- raise ImportError("This training script is meant to be run directly "
- "and cannot be imported. Please execute it as a standalone script.")
diff --git a/CODE/VulnScan/tools/_test_gpu_acceleration.py b/CODE/VulnScan/tools/_test_gpu_acceleration.py
deleted file mode 100644
index 3b6b8b1c..00000000
--- a/CODE/VulnScan/tools/_test_gpu_acceleration.py
+++ /dev/null
@@ -1,25 +0,0 @@
-try:
- # noinspection PyUnresolvedReferences
- import torch
-except ImportError as e:
- print(f"Error: Failed to import torch. Please ensure PyTorch is installed correctly: {e}")
- exit(1)
-
-
-def check_gpu() -> str:
- """Check if CUDA is available and print the device information.
-
- This function attempts to detect CUDA capability and prints whether
- GPU acceleration is available, along with the device name if applicable.
- """
- try:
- if torch.cuda.is_available():
- return f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}"
- else:
- return "CUDA is not available. Using CPU."
- except RuntimeError as err:
- return f"Error initializing CUDA: {err}"
-
-
-if __name__ == '__main__':
- print(check_gpu())
diff --git a/CODE/VulnScan/tools/_vectorizer.py b/CODE/VulnScan/tools/_vectorizer.py
deleted file mode 100644
index 25e57272..00000000
--- a/CODE/VulnScan/tools/_vectorizer.py
+++ /dev/null
@@ -1,85 +0,0 @@
-from __future__ import annotations
-
-from configparser import ConfigParser
-
-import joblib
-from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
-
-import os
-
-
-def load_data(data_paths: str | os.PathLike) -> list[str]:
- """
- Load data from the specified path(s).
-
- Args:
- data_paths (str | os.PathLike): Path to a directory or a file containing data.
-
- Returns:
- list[str]: List of strings, each representing the content of a file.
- """
- data = []
- if os.path.isdir(data_paths):
- for root, _, files in os.walk(data_paths):
- for file in files:
- print("Loading File: ", file)
- file_path = os.path.join(root, file)
- with open(file_path, 'r', encoding='utf-8') as f:
- data.append(f.read())
- else:
- with open(data_paths, 'r', encoding='utf-8') as f:
- data.append(f.read())
- return data
-
-
-def choose_vectorizer(vectorizer_types: str) -> TfidfVectorizer | CountVectorizer:
- """
- Choose and return a vectorizer based on the specified type.
-
- Args:
- vectorizer_types (str): Type of vectorizer to use ('tfidf' or 'count').
-
- Returns:
- TfidfVectorizer | CountVectorizer: The chosen vectorizer.
-
- Raises:
- ValueError: If an unsupported vectorizer type is specified.
- """
- print("Vectorizer Type: ", vectorizer_types)
- print("Vectorizing Data...")
- if vectorizer_types == 'tfidf':
- return TfidfVectorizer(max_features=10000)
- if vectorizer_types == 'count':
- return CountVectorizer(max_features=10000)
- raise ValueError("Unsupported vectorizer type. Choose 'tfidf' or 'count'.")
-
-
-def main(data_paths: str, vectorizer_types: str, output_paths: str):
- """
- Main function to load data, choose a vectorizer, fit the vectorizer to the data, and save the vectorizer.
-
- Args:
- data_paths (str): Path to the data.
- vectorizer_types (str): Type of vectorizer to use ('tfidf' or 'count').
- output_paths (str): Path to save the fitted vectorizer.
- """
- data = load_data(data_paths)
- vectorizer = choose_vectorizer(vectorizer_types)
- vectorizer.fit(data)
- joblib.dump(vectorizer, os.path.join(output_paths, "Vectorizer.pkl"))
- print(f"Vectorizer saved to {output_paths}")
-
-
-if __name__ == "__main__":
- print("Reading config file")
- config = ConfigParser()
- config.read('../../config.ini')
- data_path = config.get('VulnScan.vectorizer Settings', 'data_path')
- vectorizer_type = config.get('VulnScan.vectorizer Settings', 'vectorizer_type')
- output_path = config.get('VulnScan.vectorizer Settings', 'output_path')
- if not os.path.exists(output_path):
- os.makedirs(output_path)
- main(data_path, vectorizer_type, output_path)
-else:
- raise ImportError("This training script is meant to be run directly "
- "and cannot be imported. Please execute it as a standalone script.")
diff --git a/CODE/VulnScan/v3/_generate_data.py b/CODE/VulnScan/v3/_generate_data.py
deleted file mode 100644
index 7a5b55f4..00000000
--- a/CODE/VulnScan/v3/_generate_data.py
+++ /dev/null
@@ -1,227 +0,0 @@
-from __future__ import annotations
-
-import configparser
-import os
-import random
-import string
-
-from faker import Faker
-
-from Logicytics import Log, DEBUG
-
-logger = Log(
- {"log_level": DEBUG,
- "filename": "../../../ACCESS/LOGS/VulnScan_Train.log",
- "colorlog_fmt_parameters":
- "%(log_color)s%(levelname)-8s%(reset)s %(yellow)s%(asctime)s %(blue)s%(message)s",
- }
-)
-
-
-def generate_random_filename(extensions: str, suffix_x: str = '') -> str:
- """
- Generate a random filename with the given extension and optional suffix.
-
- Args:
- extensions (str): The file extension.
- suffix_x (str, optional): An optional suffix to add to the filename.
-
- Returns:
- str: The generated random filename.
- """
- return ''.join(random.choices(string.ascii_letters + string.digits, k=10)) + suffix_x + extensions
-
-
-def generate_content_for_extension(extensions: str, size: int | float) -> tuple[str, str]:
- """
- Generate content based on the file extension and size.
-
- Args:
- extensions (str): The file extension.
- size (int | float): The size of the content to generate.
-
- Returns:
- tuple[str, str]: The generated content and a suffix indicating the sensitivity level.
- """
- full_sensitive_chance = float(config.get('full_sensitive_chance', '0.1'))
- partial_sensitive_chance = float(config.get('partial_sensitive_chance', '0.3'))
-
- def generate_sensitive_data() -> str:
- """
- Generate sensitive data based on the file extension.
-
- Returns:
- str: The generated sensitive data.
- """
- sensitive_data_generators = {
- '.txt': lambda: random.choice([
- fake.credit_card_number(),
- fake.ssn(),
- fake.password(),
- fake.email(),
- fake.phone_number(),
- fake.iban(),
- ]),
- '.json': lambda: {
- 'credit_card': fake.credit_card_number(),
- 'email': fake.email(),
- 'phone': fake.phone_number(),
- 'password': fake.password(),
- 'iban': fake.iban(),
- },
- '.csv': lambda: ",".join([
- fake.credit_card_number(),
- fake.email(),
- fake.phone_number(),
- ]),
- '.xml': lambda: f"{random.choice([fake.credit_card_number(), fake.iban(), fake.password()])}",
- '.log': lambda: f"{fake.date_time()} - Sensitive Data: {random.choice([fake.email(), fake.password(), fake.ipv4_private()])}",
- 'default': lambda: fake.text(max_nb_chars=50)
- }
-
- return sensitive_data_generators.get(extensions, sensitive_data_generators['default'])()
-
- def generate_regular_content(extension_grc: str, sizes: int | float) -> str:
- """
- Generate regular content based on the file extension and size.
-
- Args:
- extension_grc (str): The file extension.
- sizes (int | float): The size of the content to generate.
-
- Returns:
- str: The generated regular content.
- """
- if extension_grc == '.txt':
- content_grc = fake.text(max_nb_chars=sizes)
- elif extension_grc == '.json':
- # noinspection PyTypeChecker
- content_grc = fake.json(data_columns={
- 'name': 'name',
- 'email': 'email',
- 'phone': 'phone_number'
- }, num_rows=sizes // 50)
- elif extension_grc == '.csv':
- content_grc = "\n".join(
- ",".join([fake.name(), fake.email(), fake.phone_number()]) for _ in range(sizes // 50)
- )
- elif extension_grc == '.xml':
- content_grc = f"{''.join([f'{fake.text(50)}' for _ in range(sizes // 100)])}"
- elif extension_grc == '.log':
- content_grc = "\n".join([f"{fake.date_time()} - {fake.text(50)}" for _ in range(sizes // 100)])
- else:
- content_grc = fake.text(max_nb_chars=sizes)
- return content_grc
-
- if random.random() < full_sensitive_chance:
- if extensions == '.json':
- contents = str([generate_sensitive_data() for _ in range(size // 500)])
- elif extensions in ['.txt', '.log', '.xml']:
- contents = "\n".join(generate_sensitive_data() for _ in range(size // 500))
- elif extensions == '.csv':
- contents = "\n".join([generate_sensitive_data() for _ in range(size // 500)])
- else:
- contents = "\n".join([generate_sensitive_data() for _ in range(size // 500)])
- return contents, '-sensitive'
- else:
- regular_content = generate_regular_content(extensions, size)
- if random.random() < partial_sensitive_chance:
- sensitive_data_count = max(1, size // 500)
- sensitive_data = [generate_sensitive_data() for _ in range(sensitive_data_count)]
- regular_content_lines = regular_content.split("\n")
- for _ in range(sensitive_data_count):
- insert_position = random.randint(0, len(regular_content_lines) - 1)
- regular_content_lines.insert(insert_position, str(random.choice(sensitive_data)))
- contents = "\n".join(regular_content_lines)
- return contents, '-mix'
- else:
- contents = regular_content
- return contents, '-none'
-
-
-def generate_file_content(extensions: str) -> tuple[str, str]:
- """
- Generate file content based on the file extension.
-
- Args:
- extensions (str): The file extension.
-
- Returns:
- tuple[str, str]: The generated content and a suffix indicating the sensitivity level.
- """
- size = random.randint(MIN_FILE_SIZE, MAX_FILE_SIZE)
- if SIZE_VARIATION != 0:
- variation_choice = random.choice([1, 2, 3, 4])
- if variation_choice == 1:
- size = abs(int(size + (size * SIZE_VARIATION)))
- elif variation_choice == 2:
- size = abs(int(size - (size * SIZE_VARIATION)))
- elif variation_choice == 3:
- size = abs(int(size + (size / SIZE_VARIATION)))
- elif variation_choice == 4:
- size = abs(int(size - (size / SIZE_VARIATION)))
- logger.debug(f"Generating {extensions} content of size {size} bytes")
- return generate_content_for_extension(extensions, size)
-
-
-if __name__ == "__main__":
- """
- Main function to generate files based on the configuration.
- """
- fake = Faker()
-
- config = configparser.ConfigParser()
- config.read('../../config.ini')
-
- config = config['VulnScan.generate Settings']
- EXTENSIONS_ALLOWED = config.get('extensions', '.txt').split(',')
- SAVE_PATH = config.get('save_path', '.')
- CODE_NAME = config.get('code_name', 'Sense')
- SIZE_VARIATION = float(config.get('size_variation', '0.1'))
-
- os.makedirs(SAVE_PATH, exist_ok=True)
-
- DEFAULT_FILE_NUM = 10000
- DEFAULT_MIN_FILE_SIZE = 10 * 1024
- DEFAULT_MAX_FILE_SIZE = 10 * 1024
-
- if CODE_NAME == 'SenseMacro':
- print(
- "\033[91mDeprecationWarning: SenseMacro has been removed due to instability issues. "
- "Please use 'Sense' instead for better stability and performance. "
- "Defaulting to 'Sense' settings for now.\033[0m"
- )
- CODE_NAME = 'Sense'
-
- if CODE_NAME == 'Sense':
- FILE_NUM = DEFAULT_FILE_NUM * 5
- MIN_FILE_SIZE = DEFAULT_MIN_FILE_SIZE * 5
- MAX_FILE_SIZE = DEFAULT_MAX_FILE_SIZE * 5
- elif CODE_NAME == 'SenseNano':
- FILE_NUM = 5
- MIN_FILE_SIZE = int(DEFAULT_MIN_FILE_SIZE * 0.5)
- MAX_FILE_SIZE = int(DEFAULT_MAX_FILE_SIZE * 0.5)
- elif CODE_NAME == 'SenseMini':
- FILE_NUM = DEFAULT_FILE_NUM
- MIN_FILE_SIZE = DEFAULT_MIN_FILE_SIZE
- MAX_FILE_SIZE = DEFAULT_MAX_FILE_SIZE
- else:
- MIN_FILE_SIZE = int(config['min_file_size'].replace('KB', '')) * 1024
- MAX_FILE_SIZE = int(config['max_file_size'].replace('KB', '')) * 1024
- FILE_NUM = DEFAULT_FILE_NUM
-
- logger.info(f"Generating {FILE_NUM} files with sizes between {MIN_FILE_SIZE} and {MAX_FILE_SIZE} bytes")
-
- for i in range(FILE_NUM):
- logger.debug(f"Generating file {i + 1}/{FILE_NUM}")
- extension = random.choice(EXTENSIONS_ALLOWED).strip()
- content, suffix = generate_file_content(extension)
- filename = generate_random_filename(extension, suffix)
- filepath = os.path.join(SAVE_PATH, filename)
- with open(filepath, 'w', encoding='utf-8') as f:
- f.write(content)
-
- logger.info(f"Generated {FILE_NUM} files in {SAVE_PATH}")
-else:
- raise ImportError("This training script is meant to be run directly "
- "and cannot be imported. Please execute it as a standalone script.")
diff --git a/CODE/VulnScan/v3/_train.py b/CODE/VulnScan/v3/_train.py
deleted file mode 100644
index ffd645bc..00000000
--- a/CODE/VulnScan/v3/_train.py
+++ /dev/null
@@ -1,444 +0,0 @@
-from __future__ import annotations
-
-import os
-from configparser import ConfigParser
-from typing import Any, Optional
-
-import joblib
-import matplotlib.pyplot as plt
-import torch
-import torch.nn as nn
-import torch.optim as optim
-import xgboost as xgb
-from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.linear_model import LogisticRegression
-from sklearn.metrics import accuracy_score, classification_report
-from sklearn.model_selection import train_test_split
-from sklearn.naive_bayes import MultinomialNB
-from sklearn.tree import DecisionTreeClassifier
-from torch.utils.data import Dataset, DataLoader
-
-# Set up logging
-from logicytics import Log, DEBUG
-
-# NN seems to be the best choice for this task
-
-logger = Log(
- {"log_level": DEBUG,
- "filename": "../../../ACCESS/LOGS/VulnScan_Train.log",
- "colorlog_fmt_parameters":
- "%(log_color)s%(levelname)-8s%(reset)s %(yellow)s%(asctime)s %(blue)s%(message)s",
- }
-)
-vectorizer = None
-
-
-# Dataset Class for PyTorch models
-class SensitiveDataDataset(Dataset):
- """
- A custom Dataset class for handling sensitive data for PyTorch models.
-
- Attributes:
- texts (list[str]): List of text data.
- labels (list[int]): List of labels corresponding to the text data.
- tokenizer (callable, optional): A function to tokenize the text data.
- """
-
- def __init__(self,
- texts_init: list[str],
- labels_init: list[int],
- tokenizer: Optional[callable] = None):
- """
- Initializes the SensitiveDataDataset with texts, labels, and an optional tokenizer.
-
- Args:
- texts_init (list[str]): List of text data.
- labels_init (list[int]): List of labels corresponding to the text data.
- tokenizer (callable, optional): A function to tokenize the text data.
- """
- self.texts = texts_init
- self.labels = labels_init
- self.tokenizer = tokenizer
-
- def __len__(self) -> int:
- """
- Returns the number of samples in the dataset.
-
- Returns:
- int: Number of samples.
- """
- return len(self.texts)
-
- def __getitem__(self, idx: int) -> tuple:
- """
- Retrieves a sample and its label from the dataset at the specified index.
-
- Args:
- idx (int): Index of the sample to retrieve.
-
- Returns:
- tuple: A tuple containing the tokenized text tensor and the label tensor.
- """
- text = self.texts[idx]
- label = self.labels[idx]
- if self.tokenizer:
- text = self.tokenizer(text)
- return torch.tensor(text, dtype=torch.float32), torch.tensor(label, dtype=torch.long)
-
-
-def vectorize_text_data(X_trains: list[str], X_vals: list[str], save_model_path: str):
- """
- Vectorizes the text data using TfidfVectorizer and saves the vectorizer model.
-
- Args:
- X_trains (list[str]): List of training text data.
- X_vals (list[str]): List of validation text data.
- save_model_path (str): Path to save the vectorizer model.
-
- Returns:
- tuple: Transformed training and validation data as arrays.
- """
- vectorizers = TfidfVectorizer(max_features=10000, ngram_range=(1, 2))
- joblib.dump(vectorizers, os.path.join(os.path.dirname(save_model_path), 'Vectorizer.pkl'))
- return vectorizers.fit_transform(X_trains).toarray(), vectorizers.transform(X_vals).toarray()
-
-
-def save_and_plot_model(model: nn.Module,
- save_model_path: str,
- accuracy_list: list[float],
- loss_list: list[float],
- epochs: int,
- model_name: str):
- """
- Saves the trained model and plots the accuracy and loss over epochs.
-
- Args:
- model (nn.Module): The trained PyTorch model.
- save_model_path (str): The path to save the model.
- accuracy_list (list[float]): List of accuracy values over epochs.
- loss_list (list[float]): List of loss values over epochs.
- epochs (int): The number of epochs.
- model_name (str): The name of the model.
- """
- logger.info(f"Saving {model_name} model")
- if save_model_path:
- logger.info(f"Saving model to {save_model_path}.pth")
- torch.save(model, save_model_path + ".pth")
-
- logger.info(f"Plotting {model_name} model - Accuracy Over Epochs")
- plt.figure(figsize=(12, 6))
- plt.plot(list(range(1, epochs + 1)), accuracy_list, label="Accuracy")
- plt.title(f'{model_name} - Validation Accuracy Over Epochs')
- plt.xlabel('Epoch')
- plt.ylabel('Accuracy')
- plt.legend()
- plt.grid(True)
- plt.savefig(os.path.join(os.path.dirname(save_model_path), f"Model Accuracy Over Epochs - {model_name}.png"))
- plt.show()
-
- logger.info(f"Plotting {model_name} model - Loss Over Epochs")
- plt.plot(list(range(1, epochs + 1)), loss_list, label="Loss")
- plt.title(f'{model_name} - Validation Loss Over Epochs')
- plt.xlabel('Epochs')
- plt.ylabel('Loss')
- plt.legend()
- plt.savefig(os.path.join(os.path.dirname(save_model_path), f"Model Loss Over Epochs - {model_name}.png"))
- plt.show()
-
-
-def select_model_from_traditional(model_name: str,
- epochs: int) -> LogisticRegression | RandomForestClassifier | ExtraTreesClassifier | GradientBoostingClassifier | DecisionTreeClassifier | MultinomialNB | Any:
- """
- Selects and returns a machine learning model based on the provided model name.
-
- Args:
- model_name (str): The name of the model to select.
- epochs (int): The number of epochs for training (used for LogisticRegression).
-
- Returns:
- A machine learning model instance corresponding to the model name.
- """
- logger.info(f"Selecting {model_name} model")
- if model_name == 'LogisticRegression':
- return LogisticRegression(max_iter=epochs)
- if model_name == 'RandomForest':
- return RandomForestClassifier(n_estimators=100)
- if model_name == 'ExtraTrees':
- return ExtraTreesClassifier(n_estimators=100)
- if model_name == 'GBM':
- return GradientBoostingClassifier(n_estimators=100)
- if model_name == 'XGBoost':
- return xgb.XGBClassifier(eval_metric='logloss')
- if model_name == 'DecisionTree':
- return DecisionTreeClassifier()
- if model_name == 'NaiveBayes':
- return MultinomialNB()
- if model_name == 'LogReg':
- return LogisticRegression(max_iter=epochs)
- logger.error(f"Invalid model name: {model_name}")
- exit(1)
-
-
-def train_traditional_model(model_name: str,
- epochs: int,
- save_model_path: str):
- """
- Trains a traditional machine learning model.
-
- Args:
- model_name (str): The name of the model to train.
- epochs (int): The number of epochs for training.
- save_model_path (str): The path to save the trained model.
- """
- global vectorizer, X_val, X_train
- logger.info(f"Using Vectorizer TfidfVectorizer for {model_name} model")
- # Ensure X_train and X_val are lists of strings
- X_train = [str(text) for text in X_train]
- X_val = [str(text) for text in X_val]
-
- # Call the vectorize_text_data function
- X_train, X_val = vectorize_text_data(X_train, X_val, save_model_path)
-
- logger.info(f"Training {model_name} model")
- model = select_model_from_traditional(model_name, epochs)
- model.fit(X_train, y_train)
- predictions = model.predict(X_val)
- accuracy_list = accuracy_score(y_val, predictions)
- logger.info(f"Validation Accuracy: {accuracy_list:.4f}")
- logger.info(classification_report(y_val, predictions))
-
- loss_list, acc_plot = [], []
-
- logger.info(f"Training {model_name} model for {epochs} epochs")
- for epoch in range(epochs):
- model.fit(X_train, y_train)
- predictions = model.predict(X_val)
- accuracy_list = accuracy_score(y_val, predictions)
- acc_plot.append(accuracy_list)
- logger.info(f"Epoch {epoch + 1}/{epochs} - Validation Accuracy: {accuracy_list:.4f}")
- logger.info(classification_report(y_val, predictions, zero_division=0))
-
- if hasattr(model, 'predict_proba'):
- loss = model.score(X_val, y_val)
- logger.debug(f"Epoch {epoch + 1}: Model loss: {loss}")
- else:
- loss = 1 - accuracy_list
- logger.debug(f"Epoch {epoch + 1}: Model loss: {loss}")
- loss_list.append(loss)
-
- save_and_plot_model(model, save_model_path, acc_plot, loss_list, epochs, model_name)
-
-
-def train_neural_network(epochs: int,
- batch_size: int,
- learning_rate: float,
- save_model_path: str,
- use_cuda: Optional[bool] = False):
- """
- Trains a neural network model.
-
- Args:
- epochs (int): The number of epochs to train the model.
- batch_size (int): The size of the batches for training.
- learning_rate (float): The learning rate for the optimizer.
- save_model_path (str): The path to save the trained model.
- use_cuda (bool, optional): Whether to use CUDA for training. Defaults to False.
- """
- if use_cuda is None:
- use_cuda = False
- global vectorizer, X_val, X_train, labels
- logger.info("Vectorizing text data for Neural Network")
- # Ensure X_train and X_val are lists of strings
- X_train = [str(text) for text in X_train]
- X_val = [str(text) for text in X_val]
-
- # Call the vectorize_text_data function
- X_train, X_val = vectorize_text_data(X_train, X_val, save_model_path)
-
- logger.info("Training Neural Network model")
- model = nn.Sequential(nn.Linear(X_train.shape[1], 128), nn.ReLU(), nn.Linear(128, 2))
- criterion = nn.CrossEntropyLoss()
- optimizer = optim.Adam(model.parameters(), lr=learning_rate)
- scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=250, gamma=0.01)
- device = torch.device("cuda" if use_cuda and torch.cuda.is_available() else "cpu")
- logger.info(f"Training on hardware: {device}")
- model.to(device)
-
- logger.info("Creating DataLoaders for Neural Network")
- train_dataset = SensitiveDataDataset(X_train, y_train)
- val_dataset = SensitiveDataDataset(X_val, y_val)
- train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
- val_loader = DataLoader(val_dataset, batch_size=batch_size)
-
- accuracy_list = []
- loss_list = []
-
- for epoch in range(epochs):
- model.train()
- epoch_loss, correct, total = 0, 0, 0
- for inputs, labels in train_loader:
- inputs, labels = inputs.to(device), labels.to(device)
- optimizer.zero_grad()
- outputs = model(inputs)
- loss = criterion(outputs, labels)
- loss.backward()
- optimizer.step()
- epoch_loss += loss.item()
- _, predictions = torch.max(outputs, 1)
- correct += (predictions == labels).sum().item()
- total += labels.size(0)
- logger.debug(f"Epoch {epoch + 1}: Correct: {correct}, Total: {total}")
-
- scheduler.step()
-
- accuracy_list.append(correct / total)
- loss_list.append(epoch_loss)
- current_lr = scheduler.get_last_lr()[0]
- logger.info(f"Epoch {epoch + 1}/{epochs}, Learning Rate: {current_lr}")
- logger.info(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}, Accuracy: {(correct / total):.4f}")
-
- logger.info("Validating Neural Network model")
- val_loss, val_correct, val_total = 0, 0, 0
- with torch.no_grad():
- model.eval()
- for inputs, labels in val_loader:
- inputs, labels = inputs.to(device), labels.to(device)
- outputs = model(inputs)
- loss = criterion(outputs, labels)
- val_loss += loss.item()
- _, predictions = torch.max(outputs, 1)
- val_correct += (predictions == labels).sum().item()
- val_total += labels.size(0)
- logger.debug(f"Validation: Correct: {val_correct}, Total: {val_total}")
-
- val_acc = val_correct / val_total
- logger.info(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}")
-
- save_and_plot_model(model, save_model_path, accuracy_list, loss_list, epochs, 'NeuralNetwork')
-
-
-def train_model(
- model_name: str,
- epochs: int,
- batch_size: int,
- learning_rate: float,
- save_model_path: str,
- use_cuda: Optional[bool] = False,
-):
- """
- Trains a machine learning model based on the specified parameters.
-
- Args:
- model_name (str): The name of the model to train.
- epochs (int): The number of epochs to train the model.
- batch_size (int): The size of the batches for training.
- learning_rate (float): The learning rate for the optimizer.
- save_model_path (str): The path to save the trained model.
- use_cuda (bool, optional): Whether to use CUDA for training. Defaults to False.
- """
- if use_cuda is None:
- use_cuda = False
- if model_name == 'NeuralNetwork':
- train_neural_network(epochs, batch_size, learning_rate, save_model_path, use_cuda)
- else:
- train_traditional_model(model_name, epochs, save_model_path)
-
-
-def validate_data():
- """
- Validates the data by checking if the variables are of the correct type.
- """
- if not isinstance(EPOCHS, int) or EPOCHS <= 0:
- logger.error("EPOCHS must be a positive integer")
- exit(1)
- if not isinstance(BATCH_SIZE, int) or BATCH_SIZE <= 0:
- logger.error("BATCH_SIZE must be a positive integer")
- exit(1)
- if not isinstance(LEARN_RATE, float) or not (0 < LEARN_RATE < 1):
- logger.error("LEARN_RATE must be a float between 0 and 1")
- exit(1)
- if not isinstance(CUDA, bool):
- logger.error("CUDA must be a boolean")
- exit(1)
-
- allowed_models = ["NeuralNetwork", "LogReg", "RandomForest", "ExtraTrees", "GBM", "XGBoost", "DecisionTree",
- "NaiveBayes"]
- if MODEL_NAME not in allowed_models:
- logger.error(f"MODEL_NAME must be one of: {', '.join(allowed_models)}")
- exit(1)
- if not os.path.exists(TRAINING_PATH):
- logger.error(f"Training data path {TRAINING_PATH} does not exist")
- exit(1)
- if not os.path.exists(os.path.dirname(SAVE_PATH)):
- logger.error(f"Save model path {SAVE_PATH} does not exist")
- exit(1)
-
-
-if __name__ == "__main__":
- # Config file reading and setting constants
- logger.info("Reading config file")
- config = ConfigParser()
- config.read('../../config.ini')
-
- MODEL_NAME = config.get('VulnScan.train Settings', 'model_name')
- TRAINING_PATH = config.get('VulnScan.train Settings', 'train_data_path')
- EPOCHS = int(config.get('VulnScan.train Settings', 'epochs'))
- BATCH_SIZE = int(config.get('VulnScan.train Settings', 'batch_size'))
- LEARN_RATE = float(config.get('VulnScan.train Settings', 'learning_rate'))
- CUDA = config.getboolean('VulnScan.train Settings', 'use_cuda')
- SAVE_PATH = config.get('VulnScan.train Settings', 'save_model_path')
-
- validate_data()
-
- # Load Data
- logger.info(f"Loading data from {TRAINING_PATH}")
- texts, labels = [], []
- for filename in os.listdir(TRAINING_PATH):
- with open(os.path.join(config.get('VulnScan.train Settings', 'train_data_path'), filename), 'r',
- encoding='utf-8') as file:
- texts.append(file.read())
- labels.append(1 if '-sensitive' in filename else 0)
- logger.debug(f"Loaded data from {filename} with label {labels[-1]}")
-
- # Split Data
- logger.info("Splitting data into training and validation sets")
- X_train, X_val, y_train, y_val = train_test_split(texts,
- labels,
- test_size=0.2,
- random_state=42)
-
- # Train Model
- try:
- train_model(model_name=MODEL_NAME,
- epochs=EPOCHS,
- batch_size=BATCH_SIZE,
- learning_rate=LEARN_RATE,
- save_model_path=SAVE_PATH,
- use_cuda=CUDA)
- except RuntimeError as e:
- if "CUDA" in str(e):
- logger.error(f"GPU error: {e}. Falling back to CPU...")
- train_model(model_name=MODEL_NAME,
- epochs=EPOCHS,
- batch_size=BATCH_SIZE,
- learning_rate=LEARN_RATE,
- save_model_path=SAVE_PATH,
- use_cuda=False)
- else:
- logger.error(f"Runtime Error in training model: {e}")
- exit(1)
- except FileNotFoundError as e:
- logger.error(f"Training data or model files not found: {e}."
- f" Please check if all required files exist.")
- exit(1)
- except AttributeError as e:
- logger.error(f"Invalid model configuration or missing attributes: {e}."
- f" Please verify model settings.")
- exit(1)
- except Exception as e:
- logger.error(f"Error in training model: {e}")
- exit(1)
-else:
- raise ImportError("This training script is meant to be run directly "
- "and cannot be imported. Please execute it as a standalone script.")
diff --git a/CODE/_dev.py b/CODE/_dev.py
index 1b9cd3e6..b2b0e5e0 100644
--- a/CODE/_dev.py
+++ b/CODE/_dev.py
@@ -99,11 +99,11 @@ def _perform_checks() -> bool:
bool: True if all checks are confirmed by the user, False otherwise.
"""
checks = [
- ("[-] Have you read the required contributing guidelines?", "..\\CONTRIBUTING.md"),
- ("[-] Have you made files you don't want to be run start with '_'?", "."),
- ("[-] Have you added the file to CODE dir?", "."),
- ("[-] Have you added docstrings and comments?", "..\\CONTRIBUTING.md"),
- ("[-] Is each file containing around 1 main feature?", "..\\CONTRIBUTING.md"),
+ ("Have you read the required contributing guidelines?", "..\\CONTRIBUTING.md"),
+ ("Have you made files you don't want to be run start with '_'?", "."),
+ ("Have you added the file to CODE dir?", "."),
+ ("Have you added docstrings and comments?", "..\\CONTRIBUTING.md"),
+ ("Is each file containing around 1 main feature?", "..\\CONTRIBUTING.md"),
]
for question, file_to_open in checks:
@@ -139,7 +139,7 @@ def _handle_file_operations() -> None:
print("\n".join([f"\033[91m- {file}\033[0m" for file in removed_files])) # Red -
print("\n".join([f"* {file}" for file in normal_files]))
- if not _prompt_user("[-] Does the list above include your added files?"):
+ if not _prompt_user("Does the list above include your added files?"):
color_print("[x] Something went wrong! Please contact support.", "red")
return
diff --git a/CODE/config.ini b/CODE/config.ini
index 2f92e51a..fca15d55 100644
--- a/CODE/config.ini
+++ b/CODE/config.ini
@@ -26,8 +26,8 @@ save_preferences = true
[System Settings]
# Do not play with these settings unless you know what you are doing
# Dev Mode allows a safe way to modify these settings!!
-version = 3.4.2
-files = "bluetooth_details.py, bluetooth_logger.py, browser_miner.ps1, cmd_commands.py, config.ini, dir_list.py, dump_memory.py, event_log.py, Logicytics.py, log_miner.py, media_backup.py, netadapter.ps1, network_psutil.py, packet_sniffer.py, property_scraper.ps1, registry.py, sensitive_data_miner.py, ssh_miner.py, sys_internal.py, tasklist.py, tree.ps1, vulnscan.py, wifi_stealer.py, window_feature_miner.ps1, wmic.py, logicytics\Checks.py, logicytics\Config.py, logicytics\Execute.py, logicytics\FileManagement.py, logicytics\Flag.py, logicytics\Get.py, logicytics\Logger.py, logicytics\User_History.json.gz, VulnScan\Model SenseMini .3n3.pth, VulnScan\README.md, VulnScan\Vectorizer .3n3.pkl"
+version = 3.5.0
+files = "bluetooth_details.py, bluetooth_logger.py, browser_miner.ps1, cmd_commands.py, config.ini, dir_list.py, dump_memory.py, event_log.py, Logicytics.py, log_miner.py, media_backup.py, netadapter.ps1, network_psutil.py, packet_sniffer.py, property_scraper.ps1, registry.py, sensitive_data_miner.py, ssh_miner.py, sys_internal.py, tasklist.py, tree.ps1, vulnscan.py, wifi_stealer.py, window_feature_miner.ps1, wmic.py, logicytics\Checks.py, logicytics\Config.py, logicytics\Execute.py, logicytics\FileManagement.py, logicytics\Flag.py, logicytics\Get.py, logicytics\Logger.py, logicytics\User_History.json.gz, vulnscan\Model SenseMini .3n3.pth, vulnscan\Vectorizer .3n3.pkl"
# If you forked the project, change the USERNAME to your own to use your own fork as update material,
# I dont advise doing this however
config_url = https://raw.githubusercontent.com/DefinetlyNotAI/Logicytics/main/CODE/config.ini
diff --git a/CODE/dump_memory.py b/CODE/dump_memory.py
index f4dbaa3f..360a27d8 100644
--- a/CODE/dump_memory.py
+++ b/CODE/dump_memory.py
@@ -98,7 +98,7 @@ def memory_dump():
try:
process = psutil.Process(pid)
dump_path = os.path.join(DUMP_DIR, "Ram_Dump.txt")
- with open(dump_path, "wb", encoding="utf-8") as dump_file:
+ with open(dump_path, "wb") as dump_file:
total_size = 0
# Disk space safety check
diff --git a/CODE/logicytics/FileManagement.py b/CODE/logicytics/FileManagement.py
index 39fe3215..3305c53d 100644
--- a/CODE/logicytics/FileManagement.py
+++ b/CODE/logicytics/FileManagement.py
@@ -30,6 +30,7 @@ def open_file(file: str, use_full_path: bool = False) -> str | None:
subprocess.run(["start", file_path], shell=False)
except Exception as e:
return f"Error opening file: {e}"
+ return None
@staticmethod
def mkdir():
@@ -145,6 +146,7 @@ def __remove_files(path: str, files: list) -> str | None:
os.remove(os.path.join(path, file))
except Exception as e:
return f"Error: {e}"
+ return None
@staticmethod
def __generate_sha256_hash(filename: str) -> str:
diff --git a/CODE/logicytics/Flag.py b/CODE/logicytics/Flag.py
index 149a885e..9e362160 100644
--- a/CODE/logicytics/Flag.py
+++ b/CODE/logicytics/Flag.py
@@ -53,7 +53,6 @@ def __get_sim(user_input: str, all_descriptions: list[str]) -> list[float]:
"""
# Encode the current user input and historical inputs
from sentence_transformers import SentenceTransformer, util
-
import logging # Suppress logging messages from Sentence Transformer due to verbosity
# Set the logging level based on the debug mode, either DEBUG or ERROR (aka only important messages)
if DEBUG_MODE:
@@ -211,14 +210,14 @@ def _generate_summary_and_graph(cls):
log.info("\nFlag Usage Summary Graph saved in current working directory as 'Flag_usage_summary.png'")
@staticmethod
- def load_history() -> dict[str, any]:
+ def load_history() -> dict:
"""
Load user interaction history from a gzipped JSON file.
This method attempts to read and parse historical interaction data from a compressed JSON file. If the file is not found, it returns an empty history structure with an empty interactions dictionary and a zero-initialized flags usage counter.
Returns:
- dict[str, any]: A dictionary containing:
+ dict: A dictionary containing:
- 'interactions': A dictionary of past user interactions
- 'flags_usage': A Counter object tracking flag usage frequencies
@@ -233,7 +232,7 @@ def load_history() -> dict[str, any]:
return {'interactions': {}, 'flags_usage': Counter()}
@staticmethod
- def save_history(history_data: dict[str, any]):
+ def save_history(history_data: dict):
"""
Save user interaction history to a gzipped JSON file.
@@ -518,22 +517,6 @@ def __available_arguments(cls) -> tuple[argparse.Namespace, argparse.ArgumentPar
help="Execute Flag that will shutdown the device afterward",
)
- # Not yet Implemented
- parser.add_argument(
- "--webhook",
- action="store_true",
- help="Execute Flag that will send zip File via webhook "
- f"{cls.__colorify('- Not yet Implemented -', 'r')}",
- )
-
- parser.add_argument(
- "--restore",
- action="store_true",
- help="Restore Logicytics files from the ACCESS/BACKUPS directory "
- f"{cls.__colorify('- Use on your own device only -', 'y')} "
- f"{cls.__colorify('- Not yet Implemented -', 'r')}",
- )
-
# Parse the arguments
args, unknown = parser.parse_known_args()
valid_flags = [action.dest for action in parser._actions if action.dest != 'help']
@@ -564,7 +547,6 @@ def __exclusivity_logic(args: argparse.Namespace) -> bool:
special_flags = {
args.reboot,
args.shutdown,
- args.webhook
}
action_flags = {
args.default,
diff --git a/CODE/packet_sniffer.py b/CODE/packet_sniffer.py
index 69804a14..95d85627 100644
--- a/CODE/packet_sniffer.py
+++ b/CODE/packet_sniffer.py
@@ -112,8 +112,8 @@ def sniff_packets(self, iface: str, count: int, timeout: int, retry_max: int):
)
log.info("Sniff complete.")
break
- except Exception as e:
- log.warning(f"Sniff failed on {iface}: {e}")
+ except Exception as err:
+ log.warning(f"Sniff failed on {iface}: {err}")
iface = self._correct_interface(iface)
else:
log.error("Max retry time exceeded.")
diff --git a/CODE/vulnscan.py b/CODE/vulnscan.py
index cd755a9b..b7c86124 100644
--- a/CODE/vulnscan.py
+++ b/CODE/vulnscan.py
@@ -4,12 +4,12 @@
import os
import threading
import warnings
-from pathlib import Path
import aiofiles
import joblib
import numpy as np
import torch
+from pathlib import Path
from safetensors import safe_open
from tqdm import tqdm
@@ -184,7 +184,7 @@ async def scan_worker(scan_file):
"C:\\Program Files",
"C:\\Program Files (x86)"
]
- vulnscan = VulnScan("VulnScan/Model SenseMini .3n3.pth", "VulnScan/Vectorizer .3n3.pkl")
+ vulnscan = VulnScan("vulnscan/Model SenseMini .3n3.pth", "vulnscan/Vectorizer .3n3.pkl")
vulnscan.scan_directory(base_paths)
except KeyboardInterrupt:
log.warning("User interrupted. Exiting gracefully.")
diff --git a/PLANS.md b/PLANS.md
index 661ea33d..4c7e31f6 100644
--- a/PLANS.md
+++ b/PLANS.md
@@ -7,8 +7,6 @@
| Task | Version | Might or Will be done? |
|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|------------------------|
-| Implement the 2 missing flags | v3.5.0 | â |
-| Move VulnScan tools and v3 module to separate repository, keep only the model and vectorizer | v3.5.0 | â |
| Get any BETA features out of BETA | v3.6.0 | â |
| Replace Logger.py with Util that contains (tprint), also implement the ExceptionHandler and UpdateManager from Util | v3.6.0 | â |
| Remake VulnScan .pkl and .pth to be more accurate | v3.6.0 | â |
diff --git a/README.md b/README.md
index b4a18205..f0345e5f 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ This comprehensive guide is here to equip you with everything you need to use Lo
-
+
@@ -36,13 +36,13 @@ To install and setup Logicytics, follow these steps:
> [!IMPORTANT]
> We recommend Python Version `3.11` or higher, as the project is developed and tested on this version.
->
-> You must also install `pytorch` if you want to use the vulnscan feature, To install run the command `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124`
-> If the device has CUDA available (NVidea GPUs),
>
-> Otherwise, run `pip3 install torch torchvision torchaudio` to use the CPU, ofcourse this is optional for normal
-> usage's,
-> until you require `vulnscan`
+> To use vulnscan, you will need `torch` - Installation instructions can be
+> found [here](https://pytorch.org/#fws_68845ae25b0fb).
+> If you have a supported GPU, it is recommended to install the Nvidea GPU version of PyTorch for better performance.
+>
+> Settings should be: `Stable -> Windows -> Pip -> Python` and if you have a supported CUDA version, select that too
+> else CPU.
### Prerequisites
diff --git a/SECURITY.md b/SECURITY.md
index 9b5c6b67..b9fb057b 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -4,26 +4,27 @@
This section outlines the versions of our project that are currently supported with security updates.
-| Version | Supported | Release Date |
-|---------|-----------|-----------------|
-| 3.4.x | â | January 3, 2025 |
-| 3.3.x | â | January 3, 2025 |
-| 3.2.x | âī¸ | Dec 19, 2024 |
-| 3.1.x | âī¸ | Dec 11, 2024 |
-| 3.0.x | âī¸ | Dec 6, 2024 |
-| 2.5.x | â | Nov 25, 2024 |
-| 2.4.x | â | Nov 12, 2024 |
-| 2.3.x | â | Sep 21, 2024 |
-| 2.2.x | â | Sep 9, 2024 |
-| 2.1.x | â | Aug 29, 2024 |
-| 2.0.x | â | Aug 25, 2024 |
-| 1.6.x | â | Jun 18, 2024 |
-| 1.5.x | â | Jun 10, 2024 |
-| 1.4.x | â | May 30, 2024 |
-| 1.3.x | â | May 21, 2024 |
-| 1.2.x | â | May 16, 2024 |
-| 1.1.x | â | May 10, 2024 |
-| 1.0.x | â | May 4, 2024 |
+| Version | Supported | Major Release Date |
+|---------|-----------|--------------------|
+| 3.5.x | â | July 26, 2025 |
+| 3.4.x | âī¸ | January 3, 2025 |
+| 3.3.x | âī¸ | January 3, 2025 |
+| 3.2.x | âī¸ | Dec 19, 2024 |
+| 3.1.x | âī¸ | Dec 11, 2024 |
+| 3.0.x | â | Dec 6, 2024 |
+| 2.5.x | â | Nov 25, 2024 |
+| 2.4.x | â | Nov 12, 2024 |
+| 2.3.x | â | Sep 21, 2024 |
+| 2.2.x | â | Sep 9, 2024 |
+| 2.1.x | â | Aug 29, 2024 |
+| 2.0.x | â | Aug 25, 2024 |
+| 1.6.x | â | Jun 18, 2024 |
+| 1.5.x | â | Jun 10, 2024 |
+| 1.4.x | â | May 30, 2024 |
+| 1.3.x | â | May 21, 2024 |
+| 1.2.x | â | May 16, 2024 |
+| 1.1.x | â | May 10, 2024 |
+| 1.0.x | â | May 4, 2024 |
### Key:
diff --git a/requirements.txt b/requirements.txt
index da6e4703..2754b8d1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,26 +1,20 @@
configobj~=5.0.9
-pathlib~=1.0.1
-joblib~=1.3.2
-matplotlib~=3.10.1
-xgboost~=2.1.4
-scikit-learn~=1.6.1
-Faker~=36.1.1
-networkx~=3.2.1
-numpy~=2.2.3
-plotly~=6.0.0
-seaborn~=0.13.2
-torchviz~=0.0.3
-tqdm~=4.66.6
+configparser~=7.1.0
+psutil~=6.1.1
requests~=2.32.3
DateTime~=5.5
-sentence-transformers~=3.4.1
colorlog~=6.9.0
+aiofiles~=24.1.0
+joblib~=1.3.2
+numpy~=2.2.3
+pathlib~=1.0.1
safetensors~=0.5.3
+tqdm~=4.66.6
WMI~=1.5.1
prettytable~=3.15.1
-pandas~=2.2.2
+matplotlib~=3.10.1
+networkx~=3.2.1
+pandas~=2.2.3
+cryptography~=44.0.2
scapy~=2.5.0
-psutil~=7.0.0
-configparser~=7.1.0
-aiofiles~=24.1.0
-cryptography~=44.0.2
\ No newline at end of file
+sentence-transformers~=5.0.0
\ No newline at end of file