diff --git a/.github/config.yml b/.github/config.yml index 738f3c05..3ad48885 100644 --- a/.github/config.yml +++ b/.github/config.yml @@ -15,7 +15,7 @@ requestInfoDefaultTitles: - update # *OPTIONAL* Label to be added to Issues and Pull Requests with insufficient information given -requestInfoLabelToAdd: progress:Invalid +requestInfoLabelToAdd: issue/Invalid # *OPTIONAL* Require Issues to contain more information than what is provided in the issue templates # Will fail if the issue's body is equal to a provided template diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml deleted file mode 100644 index 86f165e6..00000000 --- a/.github/workflows/codeql.yml +++ /dev/null @@ -1,78 +0,0 @@ -# For most projects, this workflow file will not need changing; you simply need -# to commit it to your repository. -# -# You may wish to alter this file to override the set of languages analyzed, -# or to provide custom queries or build logic. -# -# ******** NOTE ******** -# We have attempted to detect the languages in your repository. Please check -# the `language` matrix defined below to confirm you have the correct set of -# supported CodeQL languages. -# -name: "CodeQL" - -on: - push: - branches: ["main"] - pull_request: - # The branches below must be a subset of the branches above - branches: ["main"] - schedule: - - cron: "0 0 * * 1" - -permissions: - contents: read - -jobs: - analyze: - name: Analyze - runs-on: ubuntu-latest - permissions: - actions: read - contents: read - security-events: write - - strategy: - fail-fast: false - matrix: - language: ["python"] - # CodeQL supports [ $supported-codeql-languages ] - # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support - - steps: - - name: Harden Runner - uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 - with: - egress-policy: audit - - - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d # v3.28.10 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - - # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d # v3.28.10 - - # â„šī¸ Command-line programs to run using the OS shell. - # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun - - # If the Autobuild fails above, remove it and uncomment the following three lines. - # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. - - # - run: | - # echo "Run, Build Application using script" - # ./location_of_script_within_repo/buildscript.sh - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@b56ba49b26e50535fa1e7f7db0f4f7b4bf65d80d # v3.28.10 - with: - category: "/language:${{matrix.language}}" diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml deleted file mode 100644 index 4a15a1df..00000000 --- a/.github/workflows/dependency-review.yml +++ /dev/null @@ -1,27 +0,0 @@ -# Dependency Review Action -# -# This Action will scan dependency manifest files that change as part of a Pull Request, -# surfacing known-vulnerable versions of the packages declared or updated in the PR. -# Once installed, if the workflow run is marked as required, -# PRs introducing known-vulnerable packages will be blocked from merging. -# -# Source repository: https://github.com/actions/dependency-review-action -name: 'Dependency Review' -on: [pull_request] - -permissions: - contents: read - -jobs: - dependency-review: - runs-on: ubuntu-latest - steps: - - name: Harden Runner - uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 - with: - egress-policy: audit - - - name: 'Checkout Repository' - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: 'Dependency Review' - uses: actions/dependency-review-action@3b139cfc5fae8b618d3eae3675e383bb1769c019 # v4.5.0 diff --git a/.github/workflows/greetings.yml b/.github/workflows/greetings.yml index 794f83e4..9d4a3ede 100644 --- a/.github/workflows/greetings.yml +++ b/.github/workflows/greetings.yml @@ -7,7 +7,7 @@ permissions: jobs: greeting: - runs-on: ubuntu-latest + runs-on: windows-latest permissions: issues: write pull-requests: write diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index d979ed4d..1d56287e 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -15,7 +15,7 @@ permissions: jobs: stale: - runs-on: ubuntu-latest + runs-on: windows-latest permissions: issues: write pull-requests: write diff --git a/.idea/csv-editor.xml b/.idea/csv-editor.xml new file mode 100644 index 00000000..487ddc99 --- /dev/null +++ b/.idea/csv-editor.xml @@ -0,0 +1,16 @@ + + + + + + \ No newline at end of file diff --git a/CODE/Logicytics.py b/CODE/Logicytics.py index bbe11631..bd770446 100644 --- a/CODE/Logicytics.py +++ b/CODE/Logicytics.py @@ -490,8 +490,6 @@ def handle_sub_action(): subprocess.call("shutdown /s /t 3", shell=False) elif SUB_ACTION == "reboot": subprocess.call("shutdown /r /t 3", shell=False) - # elif sub_action == "webhook": - # TODO: Implement this in future v3.5 @log.function diff --git a/CODE/VulnScan/README.md b/CODE/VulnScan/README.md deleted file mode 100644 index 8c463bf3..00000000 --- a/CODE/VulnScan/README.md +++ /dev/null @@ -1,148 +0,0 @@ -# VulnScan Documentation - -> [!WARNING] -> # VulnScan Migration Notice 🚨 -> -> VulnScan is moving to a new home! Here's what you need to know: -> - This repository section will be archived -> - A link to the new repository will be added here -> - The `pkl` and `pth` files will stay here (they'll work with `vulnscan.py`) -> - VulnScan itself will NOT be moved to the other repository -> -> This will occur in version `3.2.0` of Logicytics. - -## Overview - -VulnScan is designed to detect sensitive data across various file formats. It offers a modular framework to train models using diverse algorithms, from traditional ML classifiers to advanced Neural Networks. This document outlines the system's naming conventions, lifecycle, and model configuration. - -> The model that is being used is `Model SenseMini 3n3` with a vectorizer from `tools/_vectorizer.py` (Used the random dataset) - ---- - -## Naming Conventions - -### Model Naming Format -`Model {Type of model} .{Version}` - -- **Type of Model**: Describes the training data configuration. - - `Sense`: Sensitive data set with 50k files, each 50KB in size. - - `SenseNano`: Test set with 5-10 files, each 5KB, used for error-checking. - - `SenseMacro`: Large dataset with 1M files, each 10KB. This is computationally intensive, so some corners were cut in training. - - `SenseMini`: Dataset with 10K files, each between 10-200KB. Balanced size for effective training and resource efficiency. - -- **Version Format**: `{Version#}{c}{Repeat#}` - - **Version#**: Increment for major code updates. - - **c**: Model identifier (e.g., NeuralNetwork, BERT, etc.). See below for codes. - - **Repeat#**: Number of times the same model was trained without significant code changes, used to improve consistency. - - **-F**: Denotes a failed model or a corrupted model. - -### Model Identifiers - -| Code | Model Type | -|------|---------------------------| -| `b` | BERT | -| `dt` | DecisionTree | -| `et` | ExtraTrees | -| `g` | GBM | -| `l` | LSTM | -| `n` | NeuralNetwork (preferred) | -| `nb` | NaiveBayes | -| `r` | RandomForestClassifier | -| `lr` | Logistic Regression | -| `v` | SupportVectorMachine | -| `x` | XGBoost | - -### Example -`Model Sense .1n2`: -- Dataset: `Sense` (50k files, 50KB each). -- Version: 1 (first major version). -- Model: `NeuralNetwork` (`n`). -- Repeat Count: 2 (second training run with no major code changes). - ---- - -## Life Cycle Phases - -### Version 1 (Deprecated) -- **Removed**: Small and weak codebase, replaced by `v3`. - -1. Generate data. -2. Index paths. -3. Read paths. -4. Train models and iterate through epochs. -5. Produce outputs: data, graphs, and `.pkl` files. - ---- - -### Version 2 (Deprecated) -- **Deprecation Reason**: Outdated methods for splitting and vectorizing data. - -1. Load Data. -2. Split Data. -3. Vectorize Text. -4. Initialize Model. -5. Train Model. -6. Evaluate Model. -7. Save Model. -8. Track Progress. - ---- - -### Version 3 (Current) -1. **Read Config**: Load model and training parameters. -2. **Load Data**: Collect and preprocess sensitive data. -3. **Split Data**: Separate into training and validation sets. -4. **Vectorize Text**: Transform textual data using `TfidfVectorizer`. -5. **Initialize Model**: Define traditional ML or Neural Network models. -6. **Train Model**: Perform iterative training using epochs. -7. **Validate Model**: Evaluate with metrics and generate classification reports. -8. **Save Model**: Persist trained models and vectorizers for reuse. -9. **Track Progress**: Log and visualize accuracy and loss trends over epochs. - ---- - -## Preferred Model -**NeuralNetwork (`n`)** -- Proven to be the most effective for detecting sensitive data in the project. - ---- - -## Notes -- **Naming System**: Helps track model versions, datasets, and training iterations for transparency and reproducibility. -- **Current Focus**: Transition to `v3` for improved accuracy, flexibility, and robust performance. - ---- - -## Additional Features - -- **Progress Tracking**: Visualizes accuracy and loss per epoch with graphs. -- **Error Handling**: Logs errors for missing files, attribute issues, or unexpected conditions. -- **Extensibility**: Supports plug-and-play integration for new algorithms or datasets. - - -# More files - -There is a repository that archived all the data used to make the model, -as well as previously trained models for you to test out -(loading scripts and vectorizers are not included). - -The repository is located [here](https://github.com/DefinetlyNotAI/VulnScan_TrainingData). - -The repository contains the following directories: -- `Archived Models`: Contains the previously trained models. Is organized by the model type then version. -- `NN features`: Contains information about the model `.3n3` and the vectorizer used. Information include: - - `Documentation_Study_Network.md`: A markdown file that contains more info. - - `Neural Network Nodes Graph.gexf`: A Gephi file that contains the model nodes and edges. - - `Nodes and edges (GEPHI).csv`: A CSV file that contains the model nodes and edges. - - `Statistics`: Directories made by Gephi, containing the statistics of the model nodes and edges. - - `Feature_Importance.svg`: A SVG file that contains the feature importance of the model. - - `Loss_Landscape_3D.html`: A HTML file that contains the 3D loss landscape of the model. - - `Model Accuracy Over Epochs.png` and `Model Loss Over Epochs.png`: PNG files that contain the model accuracy and loss over epochs. - - `Model state dictionary.txt`: A text file that contains the model state dictionary. - - `Model Summary.txt`: A text file that contains the model summary. - - `Model Visualization.png`: A PNG file that contains the model visualization. - - `Top_90_Features.svg`: A SVG file that contains the top 90 features of the model. - - `Vectorizer features.txt`: A text file that contains the vectorizer features. - - `Visualize Activation.png`: A PNG file that contains the visualization of the model activation. - - `Visualize t-SNE.png`: A PNG file that contains the visualization of the model t-SNE. - - `Weight Distribution.png`: A PNG file that contains the weight distribution of the model. diff --git a/CODE/VulnScan/tools/_study_network.py b/CODE/VulnScan/tools/_study_network.py deleted file mode 100644 index 907c8576..00000000 --- a/CODE/VulnScan/tools/_study_network.py +++ /dev/null @@ -1,624 +0,0 @@ -from __future__ import annotations - -import os -import os.path -import random -from collections import OrderedDict -from configparser import ConfigParser -from os import mkdir -from typing import Any - -import joblib -import matplotlib.pyplot as plt -import networkx as nx -import numpy as np -import plotly.graph_objects as go -import seaborn as sns -import torch -import torch.nn as nn -from faker import Faker -from numpy import ndarray, dtype -from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer -from sklearn.manifold import TSNE -from torch import device -from torch.utils.data import DataLoader, TensorDataset -from torchviz import make_dot -from tqdm import tqdm - - -# Example of DataLoader for loss landscape (dummy dataset for visualization) -class DummyDataset(torch.utils.data.Dataset): - """ - A dummy dataset for generating synthetic data for visualization purposes. - - Attributes: - num_samples (int): Number of samples in the dataset. - input_dim (int): Dimension of the input data. - data (list): List of generated data samples. - labels (list): List of labels corresponding to the data samples. - """ - - def __init__(self, num_samples: int = 100, input_dim: int = 10000): - """ - Initializes the DummyDataset with the specified number of samples and input dimension. - - Args: - num_samples (int): Number of samples to generate. - input_dim (int): Dimension of the input data. - """ - self.num_samples = num_samples - self.input_dim = input_dim - self.data: list[str] = [] - self.labels: list[int] = [] - faker = Faker() - for _ in range(num_samples): - if random.random() < 0.05: # 5% chance to include sensitive data - self.data.append(f"Name: {faker.name()}, SSN: {faker.ssn()}, Address: {faker.address()}") - self.labels.append(1) # Label as sensitive - else: - self.data.append(faker.text(max_nb_chars=100)) # Non-sensitive data - self.labels.append(0) # Label as non-sensitive - - def __len__(self) -> int: - """ - Returns the number of samples in the dataset. - - Returns: - int: Number of samples in the dataset. - """ - return self.num_samples - - def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]: - """ - Retrieves the data and label at the specified index. - - Args: - idx (int): Index of the data and label to retrieve. - - Returns: - tuple: A tuple containing the data tensor and label tensor. - """ - data = self.data[idx] - label = self.labels[idx] - # Convert data to tensor of ASCII values and pad to input_dim - data_tensor = torch.tensor([ord(c) for c in data], dtype=torch.float32) - if len(data_tensor) < self.input_dim: - padding = torch.zeros(self.input_dim - len(data_tensor)) - data_tensor = torch.cat((data_tensor, padding)) - else: - data_tensor = data_tensor[:self.input_dim] - label_tensor = torch.tensor(label, dtype=torch.long) - return data_tensor, label_tensor - - -def load_data(text_data: list[str], vectorizer_to_load: TfidfVectorizer | CountVectorizer) -> DataLoader: - """ - Vectorizes the text data and creates a DataLoader for it. - - Args: - text_data (list of str): The text data to be vectorized. - vectorizer_to_load: The vectorizer to use for transforming the text data. - - Returns: - DataLoader: A DataLoader containing the vectorized text data and dummy labels. - """ - # Vectorize the text data - X = vectorizer_to_load.transform(text_data) - # Create a dummy label for visualization (replace with real labels if available) - y = np.zeros(len(text_data)) - # Convert to torch tensors - X_tensor = torch.tensor(X.toarray(), dtype=torch.float32) - y_tensor = torch.tensor(y, dtype=torch.long) - dataset = TensorDataset(X_tensor, y_tensor) - return DataLoader(dataset, batch_size=32, shuffle=True) - - -def visualize_weight_distribution(model_to_load: torch.nn.Module): - # Access weights of the first layer - weights = model_to_load[0].weight.detach().cpu().numpy() # Move tensor to CPU before conversion to numpy - plt.hist(weights.flatten(), bins=50) - plt.title("Weight Distribution - First Layer") - plt.xlabel("Weight Value") - plt.ylabel("Frequency") - plt.savefig("NN features/Weight Distribution.png") - plt.close() - - -def visualize_activations(model_to_load: torch.nn.Module, input_tensor: torch.Tensor): - # Check the device of the model - device_va = next(model_to_load.parameters()).device - - # Move the input tensor to the same device as the model - input_tensor = input_tensor.to(device_va) - - activations = [] - - # noinspection PyUnusedLocal - def hook_fn(module, inputx, output): - # Hook function to extract intermediate layer activations - activations.append(output) - - model_to_load[0].register_forward_hook(hook_fn) # Register hook on first layer - - # Perform a forward pass - _ = model_to_load(input_tensor) - activation = activations[0].detach().cpu().numpy() # Move activations to CPU - - # Plot activations as a bar chart - plt.figure(figsize=(10, 6)) - plt.bar(range(len(activation[0])), activation[0]) - plt.title("Activation Values - First Layer") - plt.xlabel("Neuron Index") - plt.ylabel("Activation Value") - plt.savefig("NN features/Visualize Activation.png") - plt.close() - - -def visualize_tsne(model_to_load: torch.nn.Module, dataloader: DataLoader): - # Get the device of the model - device_va = next(model_to_load.parameters()).device - - model_to_load.eval() # Set the model to evaluation mode - - features = [] - labels = [] - - with torch.no_grad(): - for data, target in dataloader: - # Move data and target to the same device as the model - data, target = data.to(device_va), target.to(device_va) - - # Extract features (output of the model) - output = model_to_load(data) - features.append(output.cpu().numpy()) # Move output to CPU for concatenation - labels.append(target.cpu().numpy()) # Move target to CPU for concatenation - - # Stack all batches - features = np.vstack(features) - labels = np.hstack(labels) - - # Determine suitable perplexity - num_samples = features.shape[0] - perplexity = min(30, num_samples - 1) # Ensure perplexity < num_samples - - # Apply t-SNE - tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity) - reduced_features = tsne.fit_transform(features) - - # Plot the t-SNE results - plt.figure(figsize=(10, 8)) - scatter = plt.scatter(reduced_features[:, 0], reduced_features[:, 1], c=labels, cmap='viridis', alpha=0.7) - plt.colorbar(scatter, label="Class") - plt.title("t-SNE Visualization of Features") - plt.xlabel("t-SNE Dimension 1") - plt.ylabel("t-SNE Dimension 2") - plt.savefig("NN features/Visualize t-SNE.png") - plt.close() - - -# Main function to run all visualizations -def plot_many_graphs(): - print("Starting synthetic data generation...") - # Load data - faker = Faker() - - # Generate sensitive examples - sensitive_data = [ - f"Name: {faker.name()}, SSN: {faker.ssn()}, Address: {faker.address()}", - f"Credit Card: {faker.credit_card_number()}, Expiry: {faker.credit_card_expire()}, CVV: {faker.credit_card_security_code()}", - f"Patient: {faker.name()}, Condition: {faker.text(max_nb_chars=20)}", - f"Password: {faker.password()}", - f"Email: {faker.email()}", - f"Phone: {faker.phone_number()}", - f"Medical Record: {faker.md5()}", - f"Username: {faker.user_name()}", - f"IP: {faker.ipv4()}", - ] - - # Generate non-sensitive examples - non_sensitive_data = [ - faker.text(max_nb_chars=50) for _ in range(50000) - ] - - data_text = non_sensitive_data + (sensitive_data * 15) - random.shuffle(data_text) - print("Loaded data for visualization.") - dataloader = load_data(data_text, vectorizer) - - # Visualizations - print("Creating visualizations...") - visualize_weight_distribution(model) - - # For activations, use a sample from the dataloader - print("Creating activation visualizations...") - sample_input = next(iter(dataloader))[0] - visualize_activations(model, sample_input) - - print("Creating t-SNE visualization - May take a long time...") - visualize_tsne(model, dataloader) - - print("Completed.") - - -# Visualize feature importance (dummy example for visualization) and save as SVG -def visualize_feature_importance(TOKENS: list[str], FEATURE_IMPORTANCE: float | ndarray[Any, dtype[np.floating]], - FILENAME: str = "Plot.svg"): - # Limit the number of tokens to visualize - TOKENS = TOKENS[:1000] - FEATURE_IMPORTANCE = FEATURE_IMPORTANCE[:1000] - - plt.figure(figsize=(len(TOKENS) * 0.5, 6)) - sns.barplot(x=TOKENS, y=FEATURE_IMPORTANCE, palette="coolwarm", hue=TOKENS, legend=False) - plt.title("Feature Importance") - plt.xlabel("Tokens") - plt.ylabel("Importance") - plt.xticks(rotation=45) - plt.savefig(FILENAME, format="svg") - plt.close() # Close the plot to release memory - - -# Function to visualize the loss landscape as an interactive 3D object -def plot_loss_landscape_3d(MODEL: torch.nn.Module, DATA_LOADER: DataLoader, CRITERION: torch.nn.Module, - GRID_SIZE: int = 200, EPSILON: float = 0.01, FILENAME: str = "Plot.html"): - MODEL.eval() # Set model to evaluation mode - param = next(MODEL.parameters()) # Use the first parameter for landscape perturbations - param_flat = param.view(-1) - - # Define perturbation directions u and v - u = torch.randn_like(param_flat).view(param.shape).to(param.device) - v = torch.randn_like(param_flat).view(param.shape).to(param.device) - - # Normalize perturbations - u = EPSILON * u / torch.norm(u) - v = EPSILON * v / torch.norm(v) - - # Create grid - x = np.linspace(-1, 1, GRID_SIZE) - y = np.linspace(-1, 1, GRID_SIZE) - loss_values = np.zeros((GRID_SIZE, GRID_SIZE)) - - # Iterate through the grid to compute losses - for i, dx in enumerate(x): - print(f"Computing loss for row {i + 1}/{GRID_SIZE}...") - for j, dy in enumerate(y): - print(f" Computing loss for column {j + 1}/{GRID_SIZE}...") - param.data += dx * u + dy * v # Apply perturbation - loss = 0 - - # Compute loss for all batches in data loader - for batch in DATA_LOADER: - inputs, targets = batch - inputs = inputs.to(param.device) - targets = targets.to(param.device) - outputs = MODEL(inputs) - loss += CRITERION(outputs, targets).item() - - loss_values[i, j] = loss # Store the loss - param.data -= dx * u + dy * v # Revert perturbation - - # Create a meshgrid for plotting - X, Y = np.meshgrid(x, y) - - # Plot the 3D surface using Plotly - fig = go.Figure(data=[go.Surface(z=loss_values, x=X, y=Y, colorscale="Viridis")]) - fig.update_layout( - title="Loss Landscape (Interactive 3D)", - scene=dict( - xaxis_title="Perturbation in u", - yaxis_title="Perturbation in v", - zaxis_title="Loss", - ), - ) - - # Save as an interactive HTML file - fig.write_html(FILENAME) - print(f"3D loss landscape saved as {FILENAME}") - - -def main_plot(): - # Instantiate data loader - print("Creating dummy data loader...") - dummy_data_loader = DataLoader(DummyDataset(), batch_size=32) - - # Define loss criterion - print("Defining loss criterion...") - criterion = torch.nn.CrossEntropyLoss() - - # Visualizations - print("Creating visualizations...") - tokens = vectorizer.get_feature_names_out() - - # Feature importance - # Max number of features to visualize is 3000 due to image constraints - print( - f"Visualizing feature importance - This may take a while for {len(tokens[:NUMBER_OF_FEATURES]) + 1} tokens...") - feature_importance = np.random.rand(len(tokens[:NUMBER_OF_FEATURES])) # Example random importance - visualize_feature_importance(tokens[:NUMBER_OF_FEATURES], feature_importance, - FILENAME="NN features/feature_importance.svg") - - # Loss landscape - print("Visualizing loss landscape - This may take a while...") - plot_loss_landscape_3d(model, dummy_data_loader, criterion, FILENAME="NN features/loss_landscape_3d.html") - - # Set model to evaluation mode, and plot many graphs - print("Setting model to evaluation mode...") - model.eval() # Set the model to evaluation mode - plot_many_graphs() - - -def save_data(model_to_use: torch.nn.Module, input_size: tuple[int, Any] | int, batch_size: int = -1, - device_to_use: str = "cuda"): - def register_hook(module: torch.nn.Module): - - def hook(modules: torch.nn.Module, inputs: (torch.nn.Module, tuple[torch.Tensor]), output: torch.Tensor): - class_name = str(modules.__class__).split(".")[-1].split("'")[0] - module_idx = len(summaries) - - m_key = "%s-%i" % (class_name, module_idx + 1) - summaries[m_key] = OrderedDict() - summaries[m_key]["input_shape"] = list(inputs[0].size()) - summaries[m_key]["input_shape"][0] = batch_size - if isinstance(output, (list, tuple)): - summaries[m_key]["output_shape"] = [ - [-1] + list(o.size())[1:] for o in output - ] - else: - summaries[m_key]["output_shape"] = list(output.size()) - summaries[m_key]["output_shape"][0] = batch_size - - params = 0 - if hasattr(modules, "weight") and hasattr(modules.weight, "size"): - params += torch.prod(torch.LongTensor(list(modules.weight.size()))) - summaries[m_key]["trainable"] = modules.weight.requires_grad - if hasattr(modules, "bias") and hasattr(modules.bias, "size"): - params += torch.prod(torch.LongTensor(list(modules.bias.size()))) - summaries[m_key]["nb_params"] = params - - if ( - not isinstance(module, nn.Sequential) - and not isinstance(module, nn.ModuleList) - and not (module == model_to_use) - ): - hooks.append(module.register_forward_hook(hook)) - - device_to_use = device_to_use.lower() - assert device_to_use in [ - "cuda", - "cpu", - ], "Input device is not valid, please specify 'cuda' or 'cpu'" - - if device_to_use == "cuda" and torch.cuda.is_available(): - dtype_to_use = torch.cuda.FloatTensor - else: - dtype_to_use = torch.FloatTensor - - # multiple inputs to the network - if isinstance(input_size, tuple): - input_size = [input_size] - - # batch_size of 2 for batch norm - x = [torch.rand(2, *in_size).type(dtype_to_use) for in_size in input_size] - - # create properties - summaries = OrderedDict() - hooks = [] - - # register hook - model_to_use.apply(register_hook) - - # make a forward pass - model_to_use(*x) - - # remove these hooks - for h in hooks: - h.remove() - - # Save the summary - mode = "a" if os.path.exists("NN features/Model Summary.txt") else "w" - with open('NN features/Model Summary.txt', mode) as vf_ms: - vf_ms.write("----------------------------------------------------------------\n") - line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #") - vf_ms.write(f"{line_new}\n") - vf_ms.write("================================================================\n") - total_params = 0 - total_output = 0 - trainable_params = 0 - for layer in summaries: - # input_shape, output_shape, trainable, nb_params - line_new = "{:>20} {:>25} {:>15}".format( - layer, - str(summaries[layer]["output_shape"]), - "{0:,}".format(summaries[layer]["nb_params"]), - ) - total_params += summaries[layer]["nb_params"] - total_output += np.prod(summaries[layer]["output_shape"]) - if "trainable" in summaries[layer]: - if summaries[layer]["trainable"]: - trainable_params += summaries[layer]["nb_params"] - vf_ms.write(f"{line_new}\n") - - # assume 4 bytes/number (float on cuda). - total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.)) - total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients - total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.)) - total_size = total_params_size + total_output_size + total_input_size - - vf_ms.write("\n================================================================") - vf_ms.write("\nTotal params: {0:,}".format(total_params)) - vf_ms.write("\nTrainable params: {0:,}".format(trainable_params)) - vf_ms.write("\nNon-trainable params: {0:,}".format(total_params - trainable_params)) - vf_ms.write("\n----------------------------------------------------------------") - vf_ms.write("\nInput size (MB): %0.2f" % total_input_size) - vf_ms.write("\nForward/backward pass size (MB): %0.2f" % total_output_size) - vf_ms.write("\nParams size (MB): %0.2f" % total_params_size) - vf_ms.write("\nEstimated Total Size (MB): %0.2f" % total_size) - vf_ms.write("\n----------------------------------------------------------------\n") - - -def save_graph(): - # Create a directed graph - G = nx.DiGraph() - - def add_edges_bulk(layer_names: str, weight_matrices: np.ndarray[np.float32]): - """Efficiently add edges to the graph with progress tracking.""" - threshold = 0.1 # Adjust this threshold as needed - significant_weights = np.abs(weight_matrices) > threshold - rows, cols = np.where(significant_weights) - weights = weight_matrices[rows, cols] - - # Use tqdm for progress tracking - edge_count = len(rows) - with tqdm(total=edge_count, desc=f"Processing {layer_names}", unit="edges") as pbar: - for row, col, weight in zip(rows, cols, weights): - in_node = f"{layer_names}_in_{col}" - out_node = f"{layer_names}_out_{row}" - G.add_edge(in_node, out_node, weight=weight) - pbar.update(1) - - # Process parameters - for name, param in model.named_parameters(): - if 'weight' in name: - layer_name = name.split('.')[0] - weight_matrix = param.data.cpu().numpy() - - # Add edges with progress bar - add_edges_bulk(layer_name, weight_matrix) - - # Draw the graph - print("Writing the graph to a file...") - nx.write_gexf(G, "NN features/Neural Network Nodes Graph.gexf") - - -def setup_environment(): - print("Visualizing the model and vectorizer features...") - print("This may take a while, please wait.") - - if not os.path.exists('NN features'): - mkdir('NN features') - - -def load_vectorizer(): - vectorizer_load = joblib.load(vectorizer_path) - feature_names = vectorizer_load.get_feature_names_out() - with open('NN features/Vectorizer features.txt', 'w') as file: - file.write(f"Number of features: {len(feature_names)}\n\n") - file.write('\n'.join(feature_names)) - return vectorizer_load - - -def visualize_top_features(top_n: int = 90): - feature_names = vectorizer.get_feature_names_out() - sorted_indices = vectorizer.idf_.argsort()[:top_n] - top_features = [feature_names[i] for i in sorted_indices] - top_idf_scores = vectorizer.idf_[sorted_indices] - - plt.figure(figsize=(20, 12)) # Increase the figure size - sns.barplot(x=top_idf_scores, y=top_features) - plt.title('Top 90 Features by IDF Score') - plt.xlabel('IDF Score') - plt.ylabel('Feature') - - # Save the plot as a vector graphic - plt.savefig('NN features/Top_90_Features.svg', format='svg') - plt.close() - - -def load_model() -> tuple[Any, device]: - device_load = torch.device("cuda" if torch.cuda.is_available() else "cpu") - model_load = torch.load(model_path, weights_only=False) - model_load.to(device_load) - return model_load, device_load - - -def save_model_state_dict(): - with open('NN features/Model state dictionary.txt', 'w') as file: - file.write("Model's state dictionary:\n\n") - for param_tensor in model.state_dict(): - file.write(f"\n{param_tensor}\t{model.state_dict()[param_tensor].size()}") - - -def generate_model_visualization(): - dummy_input = torch.randn(1, vectorizer.vocabulary_.__len__()).to(device) - model_viz = make_dot(model(dummy_input), params=dict(model.named_parameters()), show_attrs=True, show_saved=True) - model_viz.format = 'png' - model_viz.render(filename='NN features/Model Visualization', format='png') - - -def cleanup_temp_files(): - if os.path.exists("NN features/Model Visualization"): - os.remove("NN features/Model Visualization") - - -def model_summary(): - mode = "a" if os.path.exists("NN features/Model Summary.txt") else "w" - with open("NN features/Model Summary.txt", mode) as file: - file.write(str(model)) - - -if __name__ == '__main__': - # Print the welcome message - print("===========================================================================================") - print("= This script will visualize the features of the model and vectorizer. =") - print("= Please ensure that the model and vectorizer files are present in the specified paths. =") - print("= The visualization will be saved in the 'NN features' directory. =") - print("= This script will take a while to run, please be patient. =") - print("===========================================================================================") - - # Read the config file - print("\n\nReading config file and setting up...") - config = ConfigParser() - config.read('../../config.ini') - - setup_environment() - - # Load the paths from the config file - vectorizer_path = config.get('VulnScan.study Settings', 'vectorizer_path') - model_path = config.get('VulnScan.study Settings', 'model_path') - NUMBER_OF_FEATURES = int(config.get('VulnScan.study Settings', 'number_of_features')) - - # Check if the paths exist - if not os.path.exists(vectorizer_path): - print(f"Vectorizer file not found. Please double check the path {vectorizer_path}.") - exit(1) - if not os.path.exists(model_path): - print(f"Model file not found. Please double check the path {model_path}.") - exit(1) - - # Load the vectorizer and model - vectorizer = load_vectorizer() - visualize_top_features() - model, device = load_model() - # Save the model summary, state dictionary, and visualization - save_data(model, input_size=(1, vectorizer.vocabulary_.__len__())) - save_model_state_dict() - generate_model_visualization() - cleanup_temp_files() - save_graph() - print("Model visualization and summary have been saved to the 'NN features' directory.") - - # Check if GPU is available - if not os.path.exists('NN features'): - os.mkdir('NN features') - - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - print(f"Using device: {device}") - - # Load vectorizer (change the path to your vectorizer .pkl file) - vectorizer_path = "../Vectorizer .3n3.pkl" - model_path = "../Model SenseMini .3n3.pth" - - # Load vectorizer - print(f"Reloading vectorizer from: {vectorizer_path}") - with open(vectorizer_path, "rb") as f: - vectorizer = joblib.load(f) - - # Load model and move to the appropriate device (GPU/CPU) - print(f"Reloading model from: {model_path}") - model = torch.load(model_path, weights_only=False) - model.to(device) # Move model to GPU or CPU - - model_summary() - main_plot() -else: - raise ImportError("This training script is meant to be run directly " - "and cannot be imported. Please execute it as a standalone script.") diff --git a/CODE/VulnScan/tools/_test_gpu_acceleration.py b/CODE/VulnScan/tools/_test_gpu_acceleration.py deleted file mode 100644 index 3b6b8b1c..00000000 --- a/CODE/VulnScan/tools/_test_gpu_acceleration.py +++ /dev/null @@ -1,25 +0,0 @@ -try: - # noinspection PyUnresolvedReferences - import torch -except ImportError as e: - print(f"Error: Failed to import torch. Please ensure PyTorch is installed correctly: {e}") - exit(1) - - -def check_gpu() -> str: - """Check if CUDA is available and print the device information. - - This function attempts to detect CUDA capability and prints whether - GPU acceleration is available, along with the device name if applicable. - """ - try: - if torch.cuda.is_available(): - return f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}" - else: - return "CUDA is not available. Using CPU." - except RuntimeError as err: - return f"Error initializing CUDA: {err}" - - -if __name__ == '__main__': - print(check_gpu()) diff --git a/CODE/VulnScan/tools/_vectorizer.py b/CODE/VulnScan/tools/_vectorizer.py deleted file mode 100644 index 25e57272..00000000 --- a/CODE/VulnScan/tools/_vectorizer.py +++ /dev/null @@ -1,85 +0,0 @@ -from __future__ import annotations - -from configparser import ConfigParser - -import joblib -from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer - -import os - - -def load_data(data_paths: str | os.PathLike) -> list[str]: - """ - Load data from the specified path(s). - - Args: - data_paths (str | os.PathLike): Path to a directory or a file containing data. - - Returns: - list[str]: List of strings, each representing the content of a file. - """ - data = [] - if os.path.isdir(data_paths): - for root, _, files in os.walk(data_paths): - for file in files: - print("Loading File: ", file) - file_path = os.path.join(root, file) - with open(file_path, 'r', encoding='utf-8') as f: - data.append(f.read()) - else: - with open(data_paths, 'r', encoding='utf-8') as f: - data.append(f.read()) - return data - - -def choose_vectorizer(vectorizer_types: str) -> TfidfVectorizer | CountVectorizer: - """ - Choose and return a vectorizer based on the specified type. - - Args: - vectorizer_types (str): Type of vectorizer to use ('tfidf' or 'count'). - - Returns: - TfidfVectorizer | CountVectorizer: The chosen vectorizer. - - Raises: - ValueError: If an unsupported vectorizer type is specified. - """ - print("Vectorizer Type: ", vectorizer_types) - print("Vectorizing Data...") - if vectorizer_types == 'tfidf': - return TfidfVectorizer(max_features=10000) - if vectorizer_types == 'count': - return CountVectorizer(max_features=10000) - raise ValueError("Unsupported vectorizer type. Choose 'tfidf' or 'count'.") - - -def main(data_paths: str, vectorizer_types: str, output_paths: str): - """ - Main function to load data, choose a vectorizer, fit the vectorizer to the data, and save the vectorizer. - - Args: - data_paths (str): Path to the data. - vectorizer_types (str): Type of vectorizer to use ('tfidf' or 'count'). - output_paths (str): Path to save the fitted vectorizer. - """ - data = load_data(data_paths) - vectorizer = choose_vectorizer(vectorizer_types) - vectorizer.fit(data) - joblib.dump(vectorizer, os.path.join(output_paths, "Vectorizer.pkl")) - print(f"Vectorizer saved to {output_paths}") - - -if __name__ == "__main__": - print("Reading config file") - config = ConfigParser() - config.read('../../config.ini') - data_path = config.get('VulnScan.vectorizer Settings', 'data_path') - vectorizer_type = config.get('VulnScan.vectorizer Settings', 'vectorizer_type') - output_path = config.get('VulnScan.vectorizer Settings', 'output_path') - if not os.path.exists(output_path): - os.makedirs(output_path) - main(data_path, vectorizer_type, output_path) -else: - raise ImportError("This training script is meant to be run directly " - "and cannot be imported. Please execute it as a standalone script.") diff --git a/CODE/VulnScan/v3/_generate_data.py b/CODE/VulnScan/v3/_generate_data.py deleted file mode 100644 index 7a5b55f4..00000000 --- a/CODE/VulnScan/v3/_generate_data.py +++ /dev/null @@ -1,227 +0,0 @@ -from __future__ import annotations - -import configparser -import os -import random -import string - -from faker import Faker - -from Logicytics import Log, DEBUG - -logger = Log( - {"log_level": DEBUG, - "filename": "../../../ACCESS/LOGS/VulnScan_Train.log", - "colorlog_fmt_parameters": - "%(log_color)s%(levelname)-8s%(reset)s %(yellow)s%(asctime)s %(blue)s%(message)s", - } -) - - -def generate_random_filename(extensions: str, suffix_x: str = '') -> str: - """ - Generate a random filename with the given extension and optional suffix. - - Args: - extensions (str): The file extension. - suffix_x (str, optional): An optional suffix to add to the filename. - - Returns: - str: The generated random filename. - """ - return ''.join(random.choices(string.ascii_letters + string.digits, k=10)) + suffix_x + extensions - - -def generate_content_for_extension(extensions: str, size: int | float) -> tuple[str, str]: - """ - Generate content based on the file extension and size. - - Args: - extensions (str): The file extension. - size (int | float): The size of the content to generate. - - Returns: - tuple[str, str]: The generated content and a suffix indicating the sensitivity level. - """ - full_sensitive_chance = float(config.get('full_sensitive_chance', '0.1')) - partial_sensitive_chance = float(config.get('partial_sensitive_chance', '0.3')) - - def generate_sensitive_data() -> str: - """ - Generate sensitive data based on the file extension. - - Returns: - str: The generated sensitive data. - """ - sensitive_data_generators = { - '.txt': lambda: random.choice([ - fake.credit_card_number(), - fake.ssn(), - fake.password(), - fake.email(), - fake.phone_number(), - fake.iban(), - ]), - '.json': lambda: { - 'credit_card': fake.credit_card_number(), - 'email': fake.email(), - 'phone': fake.phone_number(), - 'password': fake.password(), - 'iban': fake.iban(), - }, - '.csv': lambda: ",".join([ - fake.credit_card_number(), - fake.email(), - fake.phone_number(), - ]), - '.xml': lambda: f"{random.choice([fake.credit_card_number(), fake.iban(), fake.password()])}", - '.log': lambda: f"{fake.date_time()} - Sensitive Data: {random.choice([fake.email(), fake.password(), fake.ipv4_private()])}", - 'default': lambda: fake.text(max_nb_chars=50) - } - - return sensitive_data_generators.get(extensions, sensitive_data_generators['default'])() - - def generate_regular_content(extension_grc: str, sizes: int | float) -> str: - """ - Generate regular content based on the file extension and size. - - Args: - extension_grc (str): The file extension. - sizes (int | float): The size of the content to generate. - - Returns: - str: The generated regular content. - """ - if extension_grc == '.txt': - content_grc = fake.text(max_nb_chars=sizes) - elif extension_grc == '.json': - # noinspection PyTypeChecker - content_grc = fake.json(data_columns={ - 'name': 'name', - 'email': 'email', - 'phone': 'phone_number' - }, num_rows=sizes // 50) - elif extension_grc == '.csv': - content_grc = "\n".join( - ",".join([fake.name(), fake.email(), fake.phone_number()]) for _ in range(sizes // 50) - ) - elif extension_grc == '.xml': - content_grc = f"{''.join([f'{fake.text(50)}' for _ in range(sizes // 100)])}" - elif extension_grc == '.log': - content_grc = "\n".join([f"{fake.date_time()} - {fake.text(50)}" for _ in range(sizes // 100)]) - else: - content_grc = fake.text(max_nb_chars=sizes) - return content_grc - - if random.random() < full_sensitive_chance: - if extensions == '.json': - contents = str([generate_sensitive_data() for _ in range(size // 500)]) - elif extensions in ['.txt', '.log', '.xml']: - contents = "\n".join(generate_sensitive_data() for _ in range(size // 500)) - elif extensions == '.csv': - contents = "\n".join([generate_sensitive_data() for _ in range(size // 500)]) - else: - contents = "\n".join([generate_sensitive_data() for _ in range(size // 500)]) - return contents, '-sensitive' - else: - regular_content = generate_regular_content(extensions, size) - if random.random() < partial_sensitive_chance: - sensitive_data_count = max(1, size // 500) - sensitive_data = [generate_sensitive_data() for _ in range(sensitive_data_count)] - regular_content_lines = regular_content.split("\n") - for _ in range(sensitive_data_count): - insert_position = random.randint(0, len(regular_content_lines) - 1) - regular_content_lines.insert(insert_position, str(random.choice(sensitive_data))) - contents = "\n".join(regular_content_lines) - return contents, '-mix' - else: - contents = regular_content - return contents, '-none' - - -def generate_file_content(extensions: str) -> tuple[str, str]: - """ - Generate file content based on the file extension. - - Args: - extensions (str): The file extension. - - Returns: - tuple[str, str]: The generated content and a suffix indicating the sensitivity level. - """ - size = random.randint(MIN_FILE_SIZE, MAX_FILE_SIZE) - if SIZE_VARIATION != 0: - variation_choice = random.choice([1, 2, 3, 4]) - if variation_choice == 1: - size = abs(int(size + (size * SIZE_VARIATION))) - elif variation_choice == 2: - size = abs(int(size - (size * SIZE_VARIATION))) - elif variation_choice == 3: - size = abs(int(size + (size / SIZE_VARIATION))) - elif variation_choice == 4: - size = abs(int(size - (size / SIZE_VARIATION))) - logger.debug(f"Generating {extensions} content of size {size} bytes") - return generate_content_for_extension(extensions, size) - - -if __name__ == "__main__": - """ - Main function to generate files based on the configuration. - """ - fake = Faker() - - config = configparser.ConfigParser() - config.read('../../config.ini') - - config = config['VulnScan.generate Settings'] - EXTENSIONS_ALLOWED = config.get('extensions', '.txt').split(',') - SAVE_PATH = config.get('save_path', '.') - CODE_NAME = config.get('code_name', 'Sense') - SIZE_VARIATION = float(config.get('size_variation', '0.1')) - - os.makedirs(SAVE_PATH, exist_ok=True) - - DEFAULT_FILE_NUM = 10000 - DEFAULT_MIN_FILE_SIZE = 10 * 1024 - DEFAULT_MAX_FILE_SIZE = 10 * 1024 - - if CODE_NAME == 'SenseMacro': - print( - "\033[91mDeprecationWarning: SenseMacro has been removed due to instability issues. " - "Please use 'Sense' instead for better stability and performance. " - "Defaulting to 'Sense' settings for now.\033[0m" - ) - CODE_NAME = 'Sense' - - if CODE_NAME == 'Sense': - FILE_NUM = DEFAULT_FILE_NUM * 5 - MIN_FILE_SIZE = DEFAULT_MIN_FILE_SIZE * 5 - MAX_FILE_SIZE = DEFAULT_MAX_FILE_SIZE * 5 - elif CODE_NAME == 'SenseNano': - FILE_NUM = 5 - MIN_FILE_SIZE = int(DEFAULT_MIN_FILE_SIZE * 0.5) - MAX_FILE_SIZE = int(DEFAULT_MAX_FILE_SIZE * 0.5) - elif CODE_NAME == 'SenseMini': - FILE_NUM = DEFAULT_FILE_NUM - MIN_FILE_SIZE = DEFAULT_MIN_FILE_SIZE - MAX_FILE_SIZE = DEFAULT_MAX_FILE_SIZE - else: - MIN_FILE_SIZE = int(config['min_file_size'].replace('KB', '')) * 1024 - MAX_FILE_SIZE = int(config['max_file_size'].replace('KB', '')) * 1024 - FILE_NUM = DEFAULT_FILE_NUM - - logger.info(f"Generating {FILE_NUM} files with sizes between {MIN_FILE_SIZE} and {MAX_FILE_SIZE} bytes") - - for i in range(FILE_NUM): - logger.debug(f"Generating file {i + 1}/{FILE_NUM}") - extension = random.choice(EXTENSIONS_ALLOWED).strip() - content, suffix = generate_file_content(extension) - filename = generate_random_filename(extension, suffix) - filepath = os.path.join(SAVE_PATH, filename) - with open(filepath, 'w', encoding='utf-8') as f: - f.write(content) - - logger.info(f"Generated {FILE_NUM} files in {SAVE_PATH}") -else: - raise ImportError("This training script is meant to be run directly " - "and cannot be imported. Please execute it as a standalone script.") diff --git a/CODE/VulnScan/v3/_train.py b/CODE/VulnScan/v3/_train.py deleted file mode 100644 index ffd645bc..00000000 --- a/CODE/VulnScan/v3/_train.py +++ /dev/null @@ -1,444 +0,0 @@ -from __future__ import annotations - -import os -from configparser import ConfigParser -from typing import Any, Optional - -import joblib -import matplotlib.pyplot as plt -import torch -import torch.nn as nn -import torch.optim as optim -import xgboost as xgb -from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import accuracy_score, classification_report -from sklearn.model_selection import train_test_split -from sklearn.naive_bayes import MultinomialNB -from sklearn.tree import DecisionTreeClassifier -from torch.utils.data import Dataset, DataLoader - -# Set up logging -from logicytics import Log, DEBUG - -# NN seems to be the best choice for this task - -logger = Log( - {"log_level": DEBUG, - "filename": "../../../ACCESS/LOGS/VulnScan_Train.log", - "colorlog_fmt_parameters": - "%(log_color)s%(levelname)-8s%(reset)s %(yellow)s%(asctime)s %(blue)s%(message)s", - } -) -vectorizer = None - - -# Dataset Class for PyTorch models -class SensitiveDataDataset(Dataset): - """ - A custom Dataset class for handling sensitive data for PyTorch models. - - Attributes: - texts (list[str]): List of text data. - labels (list[int]): List of labels corresponding to the text data. - tokenizer (callable, optional): A function to tokenize the text data. - """ - - def __init__(self, - texts_init: list[str], - labels_init: list[int], - tokenizer: Optional[callable] = None): - """ - Initializes the SensitiveDataDataset with texts, labels, and an optional tokenizer. - - Args: - texts_init (list[str]): List of text data. - labels_init (list[int]): List of labels corresponding to the text data. - tokenizer (callable, optional): A function to tokenize the text data. - """ - self.texts = texts_init - self.labels = labels_init - self.tokenizer = tokenizer - - def __len__(self) -> int: - """ - Returns the number of samples in the dataset. - - Returns: - int: Number of samples. - """ - return len(self.texts) - - def __getitem__(self, idx: int) -> tuple: - """ - Retrieves a sample and its label from the dataset at the specified index. - - Args: - idx (int): Index of the sample to retrieve. - - Returns: - tuple: A tuple containing the tokenized text tensor and the label tensor. - """ - text = self.texts[idx] - label = self.labels[idx] - if self.tokenizer: - text = self.tokenizer(text) - return torch.tensor(text, dtype=torch.float32), torch.tensor(label, dtype=torch.long) - - -def vectorize_text_data(X_trains: list[str], X_vals: list[str], save_model_path: str): - """ - Vectorizes the text data using TfidfVectorizer and saves the vectorizer model. - - Args: - X_trains (list[str]): List of training text data. - X_vals (list[str]): List of validation text data. - save_model_path (str): Path to save the vectorizer model. - - Returns: - tuple: Transformed training and validation data as arrays. - """ - vectorizers = TfidfVectorizer(max_features=10000, ngram_range=(1, 2)) - joblib.dump(vectorizers, os.path.join(os.path.dirname(save_model_path), 'Vectorizer.pkl')) - return vectorizers.fit_transform(X_trains).toarray(), vectorizers.transform(X_vals).toarray() - - -def save_and_plot_model(model: nn.Module, - save_model_path: str, - accuracy_list: list[float], - loss_list: list[float], - epochs: int, - model_name: str): - """ - Saves the trained model and plots the accuracy and loss over epochs. - - Args: - model (nn.Module): The trained PyTorch model. - save_model_path (str): The path to save the model. - accuracy_list (list[float]): List of accuracy values over epochs. - loss_list (list[float]): List of loss values over epochs. - epochs (int): The number of epochs. - model_name (str): The name of the model. - """ - logger.info(f"Saving {model_name} model") - if save_model_path: - logger.info(f"Saving model to {save_model_path}.pth") - torch.save(model, save_model_path + ".pth") - - logger.info(f"Plotting {model_name} model - Accuracy Over Epochs") - plt.figure(figsize=(12, 6)) - plt.plot(list(range(1, epochs + 1)), accuracy_list, label="Accuracy") - plt.title(f'{model_name} - Validation Accuracy Over Epochs') - plt.xlabel('Epoch') - plt.ylabel('Accuracy') - plt.legend() - plt.grid(True) - plt.savefig(os.path.join(os.path.dirname(save_model_path), f"Model Accuracy Over Epochs - {model_name}.png")) - plt.show() - - logger.info(f"Plotting {model_name} model - Loss Over Epochs") - plt.plot(list(range(1, epochs + 1)), loss_list, label="Loss") - plt.title(f'{model_name} - Validation Loss Over Epochs') - plt.xlabel('Epochs') - plt.ylabel('Loss') - plt.legend() - plt.savefig(os.path.join(os.path.dirname(save_model_path), f"Model Loss Over Epochs - {model_name}.png")) - plt.show() - - -def select_model_from_traditional(model_name: str, - epochs: int) -> LogisticRegression | RandomForestClassifier | ExtraTreesClassifier | GradientBoostingClassifier | DecisionTreeClassifier | MultinomialNB | Any: - """ - Selects and returns a machine learning model based on the provided model name. - - Args: - model_name (str): The name of the model to select. - epochs (int): The number of epochs for training (used for LogisticRegression). - - Returns: - A machine learning model instance corresponding to the model name. - """ - logger.info(f"Selecting {model_name} model") - if model_name == 'LogisticRegression': - return LogisticRegression(max_iter=epochs) - if model_name == 'RandomForest': - return RandomForestClassifier(n_estimators=100) - if model_name == 'ExtraTrees': - return ExtraTreesClassifier(n_estimators=100) - if model_name == 'GBM': - return GradientBoostingClassifier(n_estimators=100) - if model_name == 'XGBoost': - return xgb.XGBClassifier(eval_metric='logloss') - if model_name == 'DecisionTree': - return DecisionTreeClassifier() - if model_name == 'NaiveBayes': - return MultinomialNB() - if model_name == 'LogReg': - return LogisticRegression(max_iter=epochs) - logger.error(f"Invalid model name: {model_name}") - exit(1) - - -def train_traditional_model(model_name: str, - epochs: int, - save_model_path: str): - """ - Trains a traditional machine learning model. - - Args: - model_name (str): The name of the model to train. - epochs (int): The number of epochs for training. - save_model_path (str): The path to save the trained model. - """ - global vectorizer, X_val, X_train - logger.info(f"Using Vectorizer TfidfVectorizer for {model_name} model") - # Ensure X_train and X_val are lists of strings - X_train = [str(text) for text in X_train] - X_val = [str(text) for text in X_val] - - # Call the vectorize_text_data function - X_train, X_val = vectorize_text_data(X_train, X_val, save_model_path) - - logger.info(f"Training {model_name} model") - model = select_model_from_traditional(model_name, epochs) - model.fit(X_train, y_train) - predictions = model.predict(X_val) - accuracy_list = accuracy_score(y_val, predictions) - logger.info(f"Validation Accuracy: {accuracy_list:.4f}") - logger.info(classification_report(y_val, predictions)) - - loss_list, acc_plot = [], [] - - logger.info(f"Training {model_name} model for {epochs} epochs") - for epoch in range(epochs): - model.fit(X_train, y_train) - predictions = model.predict(X_val) - accuracy_list = accuracy_score(y_val, predictions) - acc_plot.append(accuracy_list) - logger.info(f"Epoch {epoch + 1}/{epochs} - Validation Accuracy: {accuracy_list:.4f}") - logger.info(classification_report(y_val, predictions, zero_division=0)) - - if hasattr(model, 'predict_proba'): - loss = model.score(X_val, y_val) - logger.debug(f"Epoch {epoch + 1}: Model loss: {loss}") - else: - loss = 1 - accuracy_list - logger.debug(f"Epoch {epoch + 1}: Model loss: {loss}") - loss_list.append(loss) - - save_and_plot_model(model, save_model_path, acc_plot, loss_list, epochs, model_name) - - -def train_neural_network(epochs: int, - batch_size: int, - learning_rate: float, - save_model_path: str, - use_cuda: Optional[bool] = False): - """ - Trains a neural network model. - - Args: - epochs (int): The number of epochs to train the model. - batch_size (int): The size of the batches for training. - learning_rate (float): The learning rate for the optimizer. - save_model_path (str): The path to save the trained model. - use_cuda (bool, optional): Whether to use CUDA for training. Defaults to False. - """ - if use_cuda is None: - use_cuda = False - global vectorizer, X_val, X_train, labels - logger.info("Vectorizing text data for Neural Network") - # Ensure X_train and X_val are lists of strings - X_train = [str(text) for text in X_train] - X_val = [str(text) for text in X_val] - - # Call the vectorize_text_data function - X_train, X_val = vectorize_text_data(X_train, X_val, save_model_path) - - logger.info("Training Neural Network model") - model = nn.Sequential(nn.Linear(X_train.shape[1], 128), nn.ReLU(), nn.Linear(128, 2)) - criterion = nn.CrossEntropyLoss() - optimizer = optim.Adam(model.parameters(), lr=learning_rate) - scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=250, gamma=0.01) - device = torch.device("cuda" if use_cuda and torch.cuda.is_available() else "cpu") - logger.info(f"Training on hardware: {device}") - model.to(device) - - logger.info("Creating DataLoaders for Neural Network") - train_dataset = SensitiveDataDataset(X_train, y_train) - val_dataset = SensitiveDataDataset(X_val, y_val) - train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) - val_loader = DataLoader(val_dataset, batch_size=batch_size) - - accuracy_list = [] - loss_list = [] - - for epoch in range(epochs): - model.train() - epoch_loss, correct, total = 0, 0, 0 - for inputs, labels in train_loader: - inputs, labels = inputs.to(device), labels.to(device) - optimizer.zero_grad() - outputs = model(inputs) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - epoch_loss += loss.item() - _, predictions = torch.max(outputs, 1) - correct += (predictions == labels).sum().item() - total += labels.size(0) - logger.debug(f"Epoch {epoch + 1}: Correct: {correct}, Total: {total}") - - scheduler.step() - - accuracy_list.append(correct / total) - loss_list.append(epoch_loss) - current_lr = scheduler.get_last_lr()[0] - logger.info(f"Epoch {epoch + 1}/{epochs}, Learning Rate: {current_lr}") - logger.info(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}, Accuracy: {(correct / total):.4f}") - - logger.info("Validating Neural Network model") - val_loss, val_correct, val_total = 0, 0, 0 - with torch.no_grad(): - model.eval() - for inputs, labels in val_loader: - inputs, labels = inputs.to(device), labels.to(device) - outputs = model(inputs) - loss = criterion(outputs, labels) - val_loss += loss.item() - _, predictions = torch.max(outputs, 1) - val_correct += (predictions == labels).sum().item() - val_total += labels.size(0) - logger.debug(f"Validation: Correct: {val_correct}, Total: {val_total}") - - val_acc = val_correct / val_total - logger.info(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}") - - save_and_plot_model(model, save_model_path, accuracy_list, loss_list, epochs, 'NeuralNetwork') - - -def train_model( - model_name: str, - epochs: int, - batch_size: int, - learning_rate: float, - save_model_path: str, - use_cuda: Optional[bool] = False, -): - """ - Trains a machine learning model based on the specified parameters. - - Args: - model_name (str): The name of the model to train. - epochs (int): The number of epochs to train the model. - batch_size (int): The size of the batches for training. - learning_rate (float): The learning rate for the optimizer. - save_model_path (str): The path to save the trained model. - use_cuda (bool, optional): Whether to use CUDA for training. Defaults to False. - """ - if use_cuda is None: - use_cuda = False - if model_name == 'NeuralNetwork': - train_neural_network(epochs, batch_size, learning_rate, save_model_path, use_cuda) - else: - train_traditional_model(model_name, epochs, save_model_path) - - -def validate_data(): - """ - Validates the data by checking if the variables are of the correct type. - """ - if not isinstance(EPOCHS, int) or EPOCHS <= 0: - logger.error("EPOCHS must be a positive integer") - exit(1) - if not isinstance(BATCH_SIZE, int) or BATCH_SIZE <= 0: - logger.error("BATCH_SIZE must be a positive integer") - exit(1) - if not isinstance(LEARN_RATE, float) or not (0 < LEARN_RATE < 1): - logger.error("LEARN_RATE must be a float between 0 and 1") - exit(1) - if not isinstance(CUDA, bool): - logger.error("CUDA must be a boolean") - exit(1) - - allowed_models = ["NeuralNetwork", "LogReg", "RandomForest", "ExtraTrees", "GBM", "XGBoost", "DecisionTree", - "NaiveBayes"] - if MODEL_NAME not in allowed_models: - logger.error(f"MODEL_NAME must be one of: {', '.join(allowed_models)}") - exit(1) - if not os.path.exists(TRAINING_PATH): - logger.error(f"Training data path {TRAINING_PATH} does not exist") - exit(1) - if not os.path.exists(os.path.dirname(SAVE_PATH)): - logger.error(f"Save model path {SAVE_PATH} does not exist") - exit(1) - - -if __name__ == "__main__": - # Config file reading and setting constants - logger.info("Reading config file") - config = ConfigParser() - config.read('../../config.ini') - - MODEL_NAME = config.get('VulnScan.train Settings', 'model_name') - TRAINING_PATH = config.get('VulnScan.train Settings', 'train_data_path') - EPOCHS = int(config.get('VulnScan.train Settings', 'epochs')) - BATCH_SIZE = int(config.get('VulnScan.train Settings', 'batch_size')) - LEARN_RATE = float(config.get('VulnScan.train Settings', 'learning_rate')) - CUDA = config.getboolean('VulnScan.train Settings', 'use_cuda') - SAVE_PATH = config.get('VulnScan.train Settings', 'save_model_path') - - validate_data() - - # Load Data - logger.info(f"Loading data from {TRAINING_PATH}") - texts, labels = [], [] - for filename in os.listdir(TRAINING_PATH): - with open(os.path.join(config.get('VulnScan.train Settings', 'train_data_path'), filename), 'r', - encoding='utf-8') as file: - texts.append(file.read()) - labels.append(1 if '-sensitive' in filename else 0) - logger.debug(f"Loaded data from {filename} with label {labels[-1]}") - - # Split Data - logger.info("Splitting data into training and validation sets") - X_train, X_val, y_train, y_val = train_test_split(texts, - labels, - test_size=0.2, - random_state=42) - - # Train Model - try: - train_model(model_name=MODEL_NAME, - epochs=EPOCHS, - batch_size=BATCH_SIZE, - learning_rate=LEARN_RATE, - save_model_path=SAVE_PATH, - use_cuda=CUDA) - except RuntimeError as e: - if "CUDA" in str(e): - logger.error(f"GPU error: {e}. Falling back to CPU...") - train_model(model_name=MODEL_NAME, - epochs=EPOCHS, - batch_size=BATCH_SIZE, - learning_rate=LEARN_RATE, - save_model_path=SAVE_PATH, - use_cuda=False) - else: - logger.error(f"Runtime Error in training model: {e}") - exit(1) - except FileNotFoundError as e: - logger.error(f"Training data or model files not found: {e}." - f" Please check if all required files exist.") - exit(1) - except AttributeError as e: - logger.error(f"Invalid model configuration or missing attributes: {e}." - f" Please verify model settings.") - exit(1) - except Exception as e: - logger.error(f"Error in training model: {e}") - exit(1) -else: - raise ImportError("This training script is meant to be run directly " - "and cannot be imported. Please execute it as a standalone script.") diff --git a/CODE/_dev.py b/CODE/_dev.py index 1b9cd3e6..b2b0e5e0 100644 --- a/CODE/_dev.py +++ b/CODE/_dev.py @@ -99,11 +99,11 @@ def _perform_checks() -> bool: bool: True if all checks are confirmed by the user, False otherwise. """ checks = [ - ("[-] Have you read the required contributing guidelines?", "..\\CONTRIBUTING.md"), - ("[-] Have you made files you don't want to be run start with '_'?", "."), - ("[-] Have you added the file to CODE dir?", "."), - ("[-] Have you added docstrings and comments?", "..\\CONTRIBUTING.md"), - ("[-] Is each file containing around 1 main feature?", "..\\CONTRIBUTING.md"), + ("Have you read the required contributing guidelines?", "..\\CONTRIBUTING.md"), + ("Have you made files you don't want to be run start with '_'?", "."), + ("Have you added the file to CODE dir?", "."), + ("Have you added docstrings and comments?", "..\\CONTRIBUTING.md"), + ("Is each file containing around 1 main feature?", "..\\CONTRIBUTING.md"), ] for question, file_to_open in checks: @@ -139,7 +139,7 @@ def _handle_file_operations() -> None: print("\n".join([f"\033[91m- {file}\033[0m" for file in removed_files])) # Red - print("\n".join([f"* {file}" for file in normal_files])) - if not _prompt_user("[-] Does the list above include your added files?"): + if not _prompt_user("Does the list above include your added files?"): color_print("[x] Something went wrong! Please contact support.", "red") return diff --git a/CODE/config.ini b/CODE/config.ini index 2f92e51a..fca15d55 100644 --- a/CODE/config.ini +++ b/CODE/config.ini @@ -26,8 +26,8 @@ save_preferences = true [System Settings] # Do not play with these settings unless you know what you are doing # Dev Mode allows a safe way to modify these settings!! -version = 3.4.2 -files = "bluetooth_details.py, bluetooth_logger.py, browser_miner.ps1, cmd_commands.py, config.ini, dir_list.py, dump_memory.py, event_log.py, Logicytics.py, log_miner.py, media_backup.py, netadapter.ps1, network_psutil.py, packet_sniffer.py, property_scraper.ps1, registry.py, sensitive_data_miner.py, ssh_miner.py, sys_internal.py, tasklist.py, tree.ps1, vulnscan.py, wifi_stealer.py, window_feature_miner.ps1, wmic.py, logicytics\Checks.py, logicytics\Config.py, logicytics\Execute.py, logicytics\FileManagement.py, logicytics\Flag.py, logicytics\Get.py, logicytics\Logger.py, logicytics\User_History.json.gz, VulnScan\Model SenseMini .3n3.pth, VulnScan\README.md, VulnScan\Vectorizer .3n3.pkl" +version = 3.5.0 +files = "bluetooth_details.py, bluetooth_logger.py, browser_miner.ps1, cmd_commands.py, config.ini, dir_list.py, dump_memory.py, event_log.py, Logicytics.py, log_miner.py, media_backup.py, netadapter.ps1, network_psutil.py, packet_sniffer.py, property_scraper.ps1, registry.py, sensitive_data_miner.py, ssh_miner.py, sys_internal.py, tasklist.py, tree.ps1, vulnscan.py, wifi_stealer.py, window_feature_miner.ps1, wmic.py, logicytics\Checks.py, logicytics\Config.py, logicytics\Execute.py, logicytics\FileManagement.py, logicytics\Flag.py, logicytics\Get.py, logicytics\Logger.py, logicytics\User_History.json.gz, vulnscan\Model SenseMini .3n3.pth, vulnscan\Vectorizer .3n3.pkl" # If you forked the project, change the USERNAME to your own to use your own fork as update material, # I dont advise doing this however config_url = https://raw.githubusercontent.com/DefinetlyNotAI/Logicytics/main/CODE/config.ini diff --git a/CODE/dump_memory.py b/CODE/dump_memory.py index f4dbaa3f..360a27d8 100644 --- a/CODE/dump_memory.py +++ b/CODE/dump_memory.py @@ -98,7 +98,7 @@ def memory_dump(): try: process = psutil.Process(pid) dump_path = os.path.join(DUMP_DIR, "Ram_Dump.txt") - with open(dump_path, "wb", encoding="utf-8") as dump_file: + with open(dump_path, "wb") as dump_file: total_size = 0 # Disk space safety check diff --git a/CODE/logicytics/FileManagement.py b/CODE/logicytics/FileManagement.py index 39fe3215..3305c53d 100644 --- a/CODE/logicytics/FileManagement.py +++ b/CODE/logicytics/FileManagement.py @@ -30,6 +30,7 @@ def open_file(file: str, use_full_path: bool = False) -> str | None: subprocess.run(["start", file_path], shell=False) except Exception as e: return f"Error opening file: {e}" + return None @staticmethod def mkdir(): @@ -145,6 +146,7 @@ def __remove_files(path: str, files: list) -> str | None: os.remove(os.path.join(path, file)) except Exception as e: return f"Error: {e}" + return None @staticmethod def __generate_sha256_hash(filename: str) -> str: diff --git a/CODE/logicytics/Flag.py b/CODE/logicytics/Flag.py index 149a885e..9e362160 100644 --- a/CODE/logicytics/Flag.py +++ b/CODE/logicytics/Flag.py @@ -53,7 +53,6 @@ def __get_sim(user_input: str, all_descriptions: list[str]) -> list[float]: """ # Encode the current user input and historical inputs from sentence_transformers import SentenceTransformer, util - import logging # Suppress logging messages from Sentence Transformer due to verbosity # Set the logging level based on the debug mode, either DEBUG or ERROR (aka only important messages) if DEBUG_MODE: @@ -211,14 +210,14 @@ def _generate_summary_and_graph(cls): log.info("\nFlag Usage Summary Graph saved in current working directory as 'Flag_usage_summary.png'") @staticmethod - def load_history() -> dict[str, any]: + def load_history() -> dict: """ Load user interaction history from a gzipped JSON file. This method attempts to read and parse historical interaction data from a compressed JSON file. If the file is not found, it returns an empty history structure with an empty interactions dictionary and a zero-initialized flags usage counter. Returns: - dict[str, any]: A dictionary containing: + dict: A dictionary containing: - 'interactions': A dictionary of past user interactions - 'flags_usage': A Counter object tracking flag usage frequencies @@ -233,7 +232,7 @@ def load_history() -> dict[str, any]: return {'interactions': {}, 'flags_usage': Counter()} @staticmethod - def save_history(history_data: dict[str, any]): + def save_history(history_data: dict): """ Save user interaction history to a gzipped JSON file. @@ -518,22 +517,6 @@ def __available_arguments(cls) -> tuple[argparse.Namespace, argparse.ArgumentPar help="Execute Flag that will shutdown the device afterward", ) - # Not yet Implemented - parser.add_argument( - "--webhook", - action="store_true", - help="Execute Flag that will send zip File via webhook " - f"{cls.__colorify('- Not yet Implemented -', 'r')}", - ) - - parser.add_argument( - "--restore", - action="store_true", - help="Restore Logicytics files from the ACCESS/BACKUPS directory " - f"{cls.__colorify('- Use on your own device only -', 'y')} " - f"{cls.__colorify('- Not yet Implemented -', 'r')}", - ) - # Parse the arguments args, unknown = parser.parse_known_args() valid_flags = [action.dest for action in parser._actions if action.dest != 'help'] @@ -564,7 +547,6 @@ def __exclusivity_logic(args: argparse.Namespace) -> bool: special_flags = { args.reboot, args.shutdown, - args.webhook } action_flags = { args.default, diff --git a/CODE/packet_sniffer.py b/CODE/packet_sniffer.py index 69804a14..95d85627 100644 --- a/CODE/packet_sniffer.py +++ b/CODE/packet_sniffer.py @@ -112,8 +112,8 @@ def sniff_packets(self, iface: str, count: int, timeout: int, retry_max: int): ) log.info("Sniff complete.") break - except Exception as e: - log.warning(f"Sniff failed on {iface}: {e}") + except Exception as err: + log.warning(f"Sniff failed on {iface}: {err}") iface = self._correct_interface(iface) else: log.error("Max retry time exceeded.") diff --git a/CODE/vulnscan.py b/CODE/vulnscan.py index cd755a9b..b7c86124 100644 --- a/CODE/vulnscan.py +++ b/CODE/vulnscan.py @@ -4,12 +4,12 @@ import os import threading import warnings -from pathlib import Path import aiofiles import joblib import numpy as np import torch +from pathlib import Path from safetensors import safe_open from tqdm import tqdm @@ -184,7 +184,7 @@ async def scan_worker(scan_file): "C:\\Program Files", "C:\\Program Files (x86)" ] - vulnscan = VulnScan("VulnScan/Model SenseMini .3n3.pth", "VulnScan/Vectorizer .3n3.pkl") + vulnscan = VulnScan("vulnscan/Model SenseMini .3n3.pth", "vulnscan/Vectorizer .3n3.pkl") vulnscan.scan_directory(base_paths) except KeyboardInterrupt: log.warning("User interrupted. Exiting gracefully.") diff --git a/PLANS.md b/PLANS.md index 661ea33d..4c7e31f6 100644 --- a/PLANS.md +++ b/PLANS.md @@ -7,8 +7,6 @@ | Task | Version | Might or Will be done? | |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|------------------------| -| Implement the 2 missing flags | v3.5.0 | ✅ | -| Move VulnScan tools and v3 module to separate repository, keep only the model and vectorizer | v3.5.0 | ✅ | | Get any BETA features out of BETA | v3.6.0 | ✅ | | Replace Logger.py with Util that contains (tprint), also implement the ExceptionHandler and UpdateManager from Util | v3.6.0 | ✅ | | Remake VulnScan .pkl and .pth to be more accurate | v3.6.0 | ❌ | diff --git a/README.md b/README.md index b4a18205..f0345e5f 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ This comprehensive guide is here to equip you with everything you need to use Lo
GitHub Repo CodeFactor Rating - GitHub Repo CodeClimate Rating + Maintainability OpenSSF Best Practices Score OpenSSF Best Practices Badge
@@ -36,13 +36,13 @@ To install and setup Logicytics, follow these steps: > [!IMPORTANT] > We recommend Python Version `3.11` or higher, as the project is developed and tested on this version. -> -> You must also install `pytorch` if you want to use the vulnscan feature, To install run the command `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124` -> If the device has CUDA available (NVidea GPUs), > -> Otherwise, run `pip3 install torch torchvision torchaudio` to use the CPU, ofcourse this is optional for normal -> usage's, -> until you require `vulnscan` +> To use vulnscan, you will need `torch` - Installation instructions can be +> found [here](https://pytorch.org/#fws_68845ae25b0fb). +> If you have a supported GPU, it is recommended to install the Nvidea GPU version of PyTorch for better performance. +> +> Settings should be: `Stable -> Windows -> Pip -> Python` and if you have a supported CUDA version, select that too +> else CPU. ### Prerequisites diff --git a/SECURITY.md b/SECURITY.md index 9b5c6b67..b9fb057b 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -4,26 +4,27 @@ This section outlines the versions of our project that are currently supported with security updates. -| Version | Supported | Release Date | -|---------|-----------|-----------------| -| 3.4.x | ✅ | January 3, 2025 | -| 3.3.x | ✅ | January 3, 2025 | -| 3.2.x | âœ–ī¸ | Dec 19, 2024 | -| 3.1.x | âœ–ī¸ | Dec 11, 2024 | -| 3.0.x | âœ–ī¸ | Dec 6, 2024 | -| 2.5.x | ❌ | Nov 25, 2024 | -| 2.4.x | ❌ | Nov 12, 2024 | -| 2.3.x | ❌ | Sep 21, 2024 | -| 2.2.x | ❌ | Sep 9, 2024 | -| 2.1.x | ❌ | Aug 29, 2024 | -| 2.0.x | ❌ | Aug 25, 2024 | -| 1.6.x | ❌ | Jun 18, 2024 | -| 1.5.x | ❌ | Jun 10, 2024 | -| 1.4.x | ❌ | May 30, 2024 | -| 1.3.x | ❌ | May 21, 2024 | -| 1.2.x | ❌ | May 16, 2024 | -| 1.1.x | ❌ | May 10, 2024 | -| 1.0.x | ❌ | May 4, 2024 | +| Version | Supported | Major Release Date | +|---------|-----------|--------------------| +| 3.5.x | ✅ | July 26, 2025 | +| 3.4.x | âœ–ī¸ | January 3, 2025 | +| 3.3.x | âœ–ī¸ | January 3, 2025 | +| 3.2.x | âœ–ī¸ | Dec 19, 2024 | +| 3.1.x | âœ–ī¸ | Dec 11, 2024 | +| 3.0.x | ❌ | Dec 6, 2024 | +| 2.5.x | ❌ | Nov 25, 2024 | +| 2.4.x | ❌ | Nov 12, 2024 | +| 2.3.x | ❌ | Sep 21, 2024 | +| 2.2.x | ❌ | Sep 9, 2024 | +| 2.1.x | ❌ | Aug 29, 2024 | +| 2.0.x | ❌ | Aug 25, 2024 | +| 1.6.x | ❌ | Jun 18, 2024 | +| 1.5.x | ❌ | Jun 10, 2024 | +| 1.4.x | ❌ | May 30, 2024 | +| 1.3.x | ❌ | May 21, 2024 | +| 1.2.x | ❌ | May 16, 2024 | +| 1.1.x | ❌ | May 10, 2024 | +| 1.0.x | ❌ | May 4, 2024 | ### Key: diff --git a/requirements.txt b/requirements.txt index da6e4703..2754b8d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,26 +1,20 @@ configobj~=5.0.9 -pathlib~=1.0.1 -joblib~=1.3.2 -matplotlib~=3.10.1 -xgboost~=2.1.4 -scikit-learn~=1.6.1 -Faker~=36.1.1 -networkx~=3.2.1 -numpy~=2.2.3 -plotly~=6.0.0 -seaborn~=0.13.2 -torchviz~=0.0.3 -tqdm~=4.66.6 +configparser~=7.1.0 +psutil~=6.1.1 requests~=2.32.3 DateTime~=5.5 -sentence-transformers~=3.4.1 colorlog~=6.9.0 +aiofiles~=24.1.0 +joblib~=1.3.2 +numpy~=2.2.3 +pathlib~=1.0.1 safetensors~=0.5.3 +tqdm~=4.66.6 WMI~=1.5.1 prettytable~=3.15.1 -pandas~=2.2.2 +matplotlib~=3.10.1 +networkx~=3.2.1 +pandas~=2.2.3 +cryptography~=44.0.2 scapy~=2.5.0 -psutil~=7.0.0 -configparser~=7.1.0 -aiofiles~=24.1.0 -cryptography~=44.0.2 \ No newline at end of file +sentence-transformers~=5.0.0 \ No newline at end of file