diff --git a/README.md b/README.md index 036fed9..0775ae6 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,8 @@ you can see the sample [fil read in csv](./samples/charts/sbk-file-read.csv) and ### Setup Instructions +#### Setup with Python virtual environment + ``` #create the env python3 -m venv venv-sbk-charts @@ -123,6 +125,31 @@ to deactivate from the venv deactivate ``` +#### Setup with conda + +``` +# Create a new conda environment with Python 3.14 or higher +conda create -n conda-sbk-charts python=3.14 -y + +# Activate the environment +conda activate conda-sbk-charts + +# Install pip if not already installed +conda install pip -y + +# Install the project in editable mode using pip +pip install -e . + +# Build the sbk-charts package +python -m build +``` + +To deactivate + +``` +conda deactivate +``` + ## Generative AI-Powered Analysis SBK Charts includes AI-powered analysis descriptions to provide deeper insights into your storage benchmark results. diff --git a/requirements.txt b/requirements.txt index 6c9ea7e..6f601b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,8 +5,11 @@ ordered-set~=4.1.0 jproperties~=2.1.1 pillow~=12.0.0 openpyxl-image-loader~=1.0.5 -huggingface_hub~=1.2.3 +huggingface_hub~=0.34.0 build~=1.3.0 lmstudio~=1.5.0 openai~=2.14.0 -requests~=2.32.5 \ No newline at end of file +requests~=2.32.5 +torch~=2.9.1 +transformers~=4.57.3 +accelerate>=0.20.0 # Required for model parallelism and memory optimization diff --git a/samples/charts/.DS_Store b/samples/charts/.DS_Store index 9403e16..08b7c71 100644 Binary files a/samples/charts/.DS_Store and b/samples/charts/.DS_Store differ diff --git a/samples/charts/sbk-file-rocksdb-read-pytorch-llm.xlsx b/samples/charts/sbk-file-rocksdb-read-pytorch-llm.xlsx new file mode 100644 index 0000000..494aa6e Binary files /dev/null and b/samples/charts/sbk-file-rocksdb-read-pytorch-llm.xlsx differ diff --git a/src/ai/sbk_ai.py b/src/ai/sbk_ai.py index ad758ca..eca28e7 100644 --- a/src/ai/sbk_ai.py +++ b/src/ai/sbk_ai.py @@ -46,11 +46,26 @@ # Log the full exception for debugging import traceback +# Excel formatting constants +class ExcelColors: + RED_BOLD = "FFFF0000" + GREEN = "00CF00" + PURPLE = "EE00FF" + DARK_RED = "FF800000" + GREEN_HEADER = "00AA00" + DARK_BLUE = "FF000080" + BLUE = "0000FF" + DARK_GREEN = "008000" + DARK_MAGENTA = "8B008B" + SADDLE_BROWN = "8B4513" + TITLE_RED = "FF0000" + # Warning message about AI-generated content reliability warning_msg = ("The AI may hallucinate !. " "The summary generated by generative AI models may not be complete and accurate. " "It's recommended to analyze the graphs along with the generated summary.") +DEFAULT_TIMEOUT_SECONDS = 120 def get_t_num_sheet_name(r_num_name): """ @@ -134,7 +149,7 @@ def __init__(self): self.file = None self.ai_instance = None self.web = None - self.timeout_seconds = 120 + self.timeout_seconds = DEFAULT_TIMEOUT_SECONDS self.no_threads = False def add_args(self, parser): @@ -155,7 +170,7 @@ def add_args(self, parser): default=self.timeout_seconds) parser.add_argument("-nothreads", "--nothreads", help=f"No parallel threads, default : {self.no_threads}", default=self.no_threads) - self.subparsers = parser.add_subparsers(dest="ai_class", help="Available sub-commands", required=False) + self.subparsers = parser.add_subparsers(dest="ai_class", help="Available GenAI commands", required=False) parser.set_defaults(ai_class=None) for name, cls in self.classes.items(): try: @@ -177,19 +192,27 @@ def parse_args(self, args): Args: args (argparse.Namespace): Parsed command-line arguments - + Side Effects: - Sets the output file path - - Configures timeout and threading settings + - Configures timeout (converted to int) and threading settings - Activates the selected AI backend instance if specified """ + self.timeout_seconds = int(args.seconds) if hasattr(args, 'seconds') and args.seconds is not None else DEFAULT_TIMEOUT_SECONDS self.file = args.ofile - self.timeout_seconds = args.seconds self.no_threads = args.nothreads if args.ai_class: self.ai_instance = self.ai_instance_map[args.ai_class.lower()] self.ai_instance.parse_args(args) + def open(self, args): + if self.ai_instance: + self.ai_instance.open(args) + + def close(self, args): + if self.ai_instance: + self.ai_instance.close(args) + def load_workbook(self): """ Load the Excel workbook from the specified file path. @@ -399,12 +422,11 @@ def run_analysis(function_name): print(f"Analysis completed in {time.time() - start_time:.2f} seconds", flush=True) - - # Format and add AI analysis to the worksheet + # Format and add AI analysis to the worksheet try: sheet = self.wb["Summary"] - # Configure column H width to accommodate 120 characters + # Configure column H width to accommodate text sheet.column_dimensions['H'].width = 120 * 0.90 # Add AI warning section with proper formatting @@ -413,7 +435,7 @@ def run_analysis(function_name): # Format and add the warning message warn_cell = sheet.cell(row=max_row, column=8) warn_cell.value = warning_msg - warn_cell.font = Font(size=16, bold=True, color="FFFF0000") # Red bold text + warn_cell.font = Font(size=16, bold=True, color=ExcelColors.RED_BOLD) warn_cell.alignment = Alignment(wrap_text=True, vertical='top') # Calculate optimal row height for the warning message @@ -429,24 +451,24 @@ def run_analysis(function_name): max_row = sheet.max_row + 2 title_cell = sheet.cell(row=max_row, column=7) title_cell.value = "AI Performance Analysis" - title_cell.font = Font(size=18, bold=True, color="FF0000") + title_cell.font = Font(size=18, bold=True, color=ExcelColors.TITLE_RED) # Add model description dec_cell = sheet.cell(row=max_row, column=8) dec_cell.value = self.ai_instance.get_model_description()[1] - dec_cell.font = Font(size=16, color="00CF00") # Green text for model info + dec_cell.font = Font(size=16, color=ExcelColors.GREEN) # Add Throughput Analysis section throughput_header_row = max_row + 2 cell = sheet.cell(row=throughput_header_row, column=7) cell.value = "Throughput Analysis" - cell.font = Font(size=16, bold=True, color="EE00FF") # Purple header + cell.font = Font(size=16, bold=True, color=ExcelColors.PURPLE) # Add throughput analysis content with formatting cell = sheet.cell(row=throughput_header_row, column=8) throughput_analysis = results['get_throughput_analysis'][1] cell.value = throughput_analysis - cell.font = Font(size=14, color="FF800000") # Dark red text + cell.font = Font(size=14, color=ExcelColors.DARK_RED) cell.border = Border( left=Side(style='thin'), right=Side(style='thin'), @@ -469,13 +491,13 @@ def run_analysis(function_name): # Format and add latency analysis header cell = sheet.cell(row=latency_row, column=7) cell.value = "Latency Analysis" - cell.font = Font(size=16, bold=True, color="00AA00") # Green header + cell.font = Font(size=16, bold=True, color=ExcelColors.GREEN_HEADER) # Add latency analysis content with formatting cell = sheet.cell(row=latency_row, column=8) latency_analysis = results['get_latency_analysis'][1] cell.value = latency_analysis - cell.font = Font(size=14, color="FF000080") # Dark blue text + cell.font = Font(size=14, color="FF000080") cell.border = Border( left=Side(style='thin'), right=Side(style='thin'), @@ -498,13 +520,13 @@ def run_analysis(function_name): # Format and add total MB analysis header cell = sheet.cell(row=mb_row, column=7) cell.value = "Total MB Analysis" - cell.font = Font(size=16, bold=True, color="0000FF") # Blue header + cell.font = Font(size=16, bold=True, color=ExcelColors.BLUE) # Add total MB analysis content with formatting cell = sheet.cell(row=mb_row, column=8) mb_analysis = results['get_total_mb_analysis'][1] cell.value = mb_analysis - cell.font = Font(size=14, color="008000") # Green text + cell.font = Font(size=14, color=ExcelColors.DARK_GREEN) cell.border = Border( left=Side(style='thin'), right=Side(style='thin'), @@ -527,13 +549,13 @@ def run_analysis(function_name): # Format and add percentile histogram analysis header cell = sheet.cell(row=percentile_row, column=7) cell.value = "Percentile Histogram Analysis" - cell.font = Font(size=16, bold=True, color="8B008B") # Dark magenta header + cell.font = Font(size=16, bold=True, color=ExcelColors.DARK_MAGENTA) # Add percentile histogram analysis content with formatting cell = sheet.cell(row=percentile_row, column=8) percentile_analysis = results['get_percentile_histogram_analysis'][1] cell.value = percentile_analysis - cell.font = Font(size=14, color="8B4513") # Saddle brown text + cell.font = Font(size=14, color=ExcelColors.SADDLE_BROWN) cell.border = Border( left=Side(style='thin'), right=Side(style='thin'), @@ -553,7 +575,7 @@ def run_analysis(function_name): except Exception as e: print(f"Error adding analysis to summary sheet: {str(e)}") traceback.print_exc() - + return True def add_performance_details(self): diff --git a/src/custom_ai/pytorch_llm/README.md b/src/custom_ai/pytorch_llm/README.md new file mode 100644 index 0000000..af44464 --- /dev/null +++ b/src/custom_ai/pytorch_llm/README.md @@ -0,0 +1,187 @@ + +# PyTorch LLM Implementation for SBK Charts + +This document describes the PyTorch LLM implementation for SBK Charts, which enables local AI-powered analysis of storage benchmark results using PyTorch and Hugging Face models. + +## Overview + +The PyTorch LLM implementation allows SBK Charts to leverage local language models through PyTorch's inference capabilities. +This implementation is particularly useful for users who want to run AI analysis locally without relying on external APIs. +This implementation is based on the Hugging Face Transformers library and PyTorch. +you can train the model by providing '--pt-train' option, but the default model : 'openai/gpt-oss-20b' consumes more than 200GB RAM. + +## Implementation Details + +The PyTorch LLM implementation is located in the `src/custom_ai/pytorch_llm` directory and extends the base AI interface defined in `src/genai/genai.py`. + +### Key Features + +1. **Local Model Inference**: Runs entirely on your hardware using PyTorch +2. **Hugging Face Integration**: Supports any Causal Language Model from the Hugging Face Hub +3. **Hardware Acceleration**: Automatically utilizes CUDA, MPS, or CPU based on availability +4. **Memory Efficient**: Uses 16-bit or 32-bit precision based on hardware support +5. **Configurable Parameters**: Adjust model parameters like temperature and top-p sampling + +## Prerequisites + +### Python Dependencies + +- PyTorch (with CUDA support recommended for GPU acceleration) +- Transformers library from Hugging Face +- A compatible pre-trained language model (e.g., from Hugging Face Hub) + +### Hardware Requirements + +- CPU: Modern x86-64 or ARM processor +- RAM: At least 32GB (128GB+ recommended for larger models) +- GPU: NVIDIA GPU with CUDA support recommended for better performance +- Disk Space: 10GB+ for model storage (varies by model size) + +## Installation + +1. Install PyTorch (with CUDA if available): + ```bash + # For CUDA 11.8 + pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 + + # For CPU-only + # pip3 install torch torchvision torchaudio + ``` + +2. Install the Transformers library: + ```bash + pip install transformers + ``` + +3. (Optional) Install additional dependencies for specific models: + ```bash + pip install accelerate bitsandbytes + ``` + +## Configuration + +The implementation supports the following configuration options: + +- **Model**: Any Hugging Face model ID or local path (default: `openai/gpt-oss-20b`) +- **Device**: Automatically detects CUDA/MPS/CPU (can be overridden) +- **Max Length**: Maximum sequence length for generation (default: 2048) +- **Temperature**: Controls randomness (default: 0.4) +- **Top-p**: Nucleus sampling parameter (default: 0.9) + +## Usage + +### Basic Usage + +```bash +# Process a single CSV file with default settings +sbk-charts -i input.csv -o output.xlsx pytorch_llm + +# Process multiple CSV files +sbk-charts -i file1.csv,file2.csv -o output.xlsx pytorch_llm +``` + +### Advanced Options + +```bash +# Specify a different model +sbk-charts -i input.csv -o output.xlsx pytorch_llm --pt-model mistralai/Mistral-7B-v0.1 + +# Adjust generation parameters +sbk-charts -i input.csv -o output.xlsx pytorch_llm \ + --pt-temperature 0.7 \ + --pt-top-p 0.95 \ + --pt-max-length 1024 + +# Force CPU usage +sbk-charts -i input.csv -o output.xlsx pytorch_llm --pt-device cpu +``` + +## Example Commands + +```bash +# Process file with default settings +sbk-charts -i ./samples/charts/sbk-file-read.csv -o ./samples/charts/sbk-file-read-pytorch.xlsx pytorch_llm + +# Use a smaller model with custom parameters +sbk-charts -i ./samples/charts/sbk-file-read.csv -o ./samples/charts/sbk-file-read-mistral.xlsx pytorch_llm \ + --pt-model mistralai/Mistral-7B-v0.1 \ + --pt-temperature 0.5 \ + --pt-max-length 1024 + +# Process multiple files +sbk-charts -i ./samples/charts/sbk-file-read.csv,./samples/charts/sbk-rocksdb-read.csv \ + -o ./samples/charts/sbk-combined-pytorch.xlsx pytorch_llm +``` + +## Model Management + +### Using Local Models + +1. Download a model from Hugging Face Hub: + ```bash + from transformers import AutoModelForCausalLM, AutoTokenizer + + model_name = "mistralai/Mistral-7B-v0.1" + model = AutoModelForCausalLM.from_pretrained(model_name) + tokenizer = AutoTokenizer.from_pretrained(model_name) + + # Save locally + save_path = "./saved_models/mistral-7b" + model.save_pretrained(save_path) + tokenizer.save_pretrained(save_path) + ``` + +2. Use the local model: + ```bash + sbk-charts -i input.csv -o output.xlsx pytorch_llm --pt-model ./saved_models/mistral-7b + ``` + +## Performance Tips + +1. **Use GPU Acceleration**: Ensure CUDA is properly installed for best performance +2. **Quantization**: For large models, consider 4-bit or 8-bit quantization +3. **Batch Processing**: Process multiple files in a single command when possible +4. **Model Size**: Choose an appropriately sized model for your hardware +5. **Execution time**: choose the execution time at least 30 minutes ; you can use the parameter '-secs' parameter + +## Troubleshooting + +### Common Issues + +1. **Out of Memory (OOM) Errors**: + - Reduce `--pt-max-length` + - Use a smaller model + - Enable gradient checkpointing + - Use quantization + +2. **Model Loading Issues**: + - Ensure the model name is correct + - Check internet connection if downloading + - Verify disk space is available + +3. **CUDA Errors**: + - Check CUDA installation: `nvidia-smi` + - Ensure PyTorch was installed with CUDA support + - Try reducing batch size or model size + +## Directory Structure + +``` +src/ +└── custom_ai/ + └── pytorch_llm/ + ├── __init__.py + ├── pytorch_llm.py # Main implementation + └── README.md # This document +``` +## License + +Apache License 2.0 diff --git a/src/custom_ai/pytorch_llm/__init__.py b/src/custom_ai/pytorch_llm/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/custom_ai/pytorch_llm/pytorch_llm.py b/src/custom_ai/pytorch_llm/pytorch_llm.py new file mode 100644 index 0000000..f98c597 --- /dev/null +++ b/src/custom_ai/pytorch_llm/pytorch_llm.py @@ -0,0 +1,631 @@ +#!/usr/local/bin/python3 +# Copyright (c) KMG. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +## +"""PyTorch LLM Integration Module + +This module provides integration with local PyTorch-based language models for +generating AI-powered analysis of storage benchmark results. It allows running +AI analysis locally using PyTorch's native inference capabilities. + +Key Features: +- Local model inference using PyTorch +- Support for Hugging Face models through PyTorch +- Configurable model parameters (temperature, max tokens, device) +- Automatic model loading and management + +Requirements: +- PyTorch +- Transformers library (for Hugging Face models) +- A compatible pre-trained language model (e.g., from Hugging Face) +""" + +import torch +from typing import Tuple, Optional, Dict, Any +from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig +from transformers.utils.hub import cached_file, TRANSFORMERS_CACHE +from src.genai.genai import SbkGenAI +import traceback +import re +import os +import glob +import logging + +# Default model configuration +DEFAULT_MODEL = "openai/gpt-oss-20b" +DEFAULT_DEVICE = ( + "cuda" if torch.cuda.is_available() + else "mps" if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available() + else "cpu" +) +DEFAULT_MAX_LENGTH = 2048 +DEFAULT_TEMPERATURE = 0.4 +DEFAULT_TOP_P = 0.9 + +# Configure logging +logger = logging.getLogger(__name__) + + +class PyTorchLLM(SbkGenAI): + """PyTorch LLM Analysis Backend + + This class implements the SbkGenAI interface to provide AI-powered analysis + using locally loaded PyTorch models. It supports any causal language model + from the Hugging Face model hub that's compatible with PyTorch. + + Configuration: + - Model: Any Hugging Face model ID or local path (default: openai/gpt-oss-20b) + - Device: 'cuda', 'mps', or 'cpu' (auto-detects CUDA by default) + - Max Length: Maximum sequence length for generation (default: 2048) + - Temperature: Controls randomness (default: 0.4) + - Top-p: Nucleus sampling parameter (default: 0.9) + + Attributes: + model_name (str): Name or path of the loaded model + device (str): Device type for model execution + max_length (int): Maximum sequence length + temperature (float): Sampling temperature + top_p (float): Top-p sampling parameter + model: The loaded PyTorch model + tokenizer: The loaded tokenizer + _is_initialized (bool): Whether the model is initialized + output_list (list): List of generated outputs for training + """ + + def __init__(self) -> None: + super().__init__() + self.model_name = DEFAULT_MODEL + self.device = DEFAULT_DEVICE + self.max_length = DEFAULT_MAX_LENGTH + self.temperature = DEFAULT_TEMPERATURE + self.top_p = DEFAULT_TOP_P + self.model = None + self.tokenizer = None + self._is_initialized = False + self.output_list = [] + + def _initialize_model(self) -> bool: + """Initialize the PyTorch model and tokenizer. + + Returns: + bool: True if initialization was successful, False otherwise + """ + if self._is_initialized: + return True + + try: + logger.info(f"Loading model {self.model_name} on {self.device}...") + # Check if we have a saved model + saved_model_dir = os.path.join(os.path.dirname(__file__), 'saved_models', self.model_name.split('/')[-1]) + + # In _initialize_model, update the device and dtype handling: + if 'cuda' in self.device and torch.cuda.is_available(): + if torch.cuda.is_bf16_supported(): + dtype = torch.bfloat16 # Use BF16 if supported + else: + dtype = torch.float16 # Fall back to FP16 + else: + dtype = torch.float32 + + if os.path.exists(saved_model_dir): + logger.info(f"Loading model from {saved_model_dir}") + self.model = AutoModelForCausalLM.from_pretrained( + saved_model_dir, + device_map="auto", + dtype=dtype, + trust_remote_code=True + ) + self.tokenizer = AutoTokenizer.from_pretrained( + saved_model_dir, + trust_remote_code=True + ) + else: + logger.info(f"Downloading model {self.model_name}") + # Get the default cache directory + logger.info(f"Default cache directory: {TRANSFORMERS_CACHE}") + + # Download the tokenizer + self.tokenizer = AutoTokenizer.from_pretrained( + self.model_name, + trust_remote_code=True + ) + + # Download the model + self.model = AutoModelForCausalLM.from_pretrained( + self.model_name, + device_map="auto", + dtype=dtype, + trust_remote_code=True + ) + + # Print the model's cache location + try: + # Try to get the model's configuration file path + config_file = None + if hasattr(self.model.config, 'name_or_path'): + # For most models, the config is in the same directory as the model + model_name = self.model.config.name_or_path + if os.path.exists(model_name): + # If it's a local path + config_file = os.path.join(model_name, 'config.json') + else: + # If it's a model name, check the cache + try: + config_file = cached_file(model_name, 'config.json') + except: + pass + + if config_file and os.path.exists(config_file): + model_dir = os.path.dirname(config_file) + logger.info(f"✅ Model files cached at: {model_dir}") + else: + # Fallback: try to find any .bin or .safetensors file in the cache + cache_files = glob.glob(os.path.join(TRANSFORMERS_CACHE, '**/*.bin'), recursive=True) + \ + glob.glob(os.path.join(TRANSFORMERS_CACHE, '**/*.safetensors'), recursive=True) + if cache_files: + model_dir = os.path.dirname(cache_files[0]) + logger.info(f"✅ Found model weights at: {model_dir}") + else: + logger.warning(f"⚠️ Could not determine exact cache location. Using default: {TRANSFORMERS_CACHE}") + except Exception as e: + logger.warning(f"⚠️ Could not determine exact cache location: {str(e)}") + logger.info(f"Using default cache directory: {TRANSFORMERS_CACHE}") + + self.model = self.model.to(self.device) + + # Set padding token if not set + if self.tokenizer.pad_token is None: + self.tokenizer.pad_token = self.tokenizer.eos_token + + # Set model to evaluation mode + self.model.eval() + + self._is_initialized = True + return True + + except Exception as e: + logger.error(f"Failed to initialize model: {str(e)}") + self.model = None + self.tokenizer = None + self._is_initialized = False + return False + + def _get_device_type(self) -> str: + """Get standardized device type string. + + Returns: + str: 'cuda', 'mps', or 'cpu' + """ + device = torch.device(self.device) + device_str = str(device).lower() + if 'cuda' in device_str: + return 'cuda' + elif 'mps' in device_str: + return 'mps' + else: + return 'cpu' + + def add_args(self, parser) -> None: + """Add command-line arguments for PyTorch LLM configuration.""" + parser.add_argument( + "--pt-model", + help=f"Hugging Face model ID or path (default: {DEFAULT_MODEL})", + default=DEFAULT_MODEL + ) + parser.add_argument( + "--pt-train", + action="store_true", + help="Enable training mode (default: False)", + default=False + ) + parser.add_argument( + "--pt-device", + help=f"Device to run the model on (default: {DEFAULT_DEVICE})", + default=DEFAULT_DEVICE + ) + parser.add_argument( + "--pt-max-length", + type=int, + help=f"Maximum sequence length (default: {DEFAULT_MAX_LENGTH})", + default=DEFAULT_MAX_LENGTH + ) + parser.add_argument( + "--pt-temperature", + type=float, + help=f"Sampling temperature (default: {DEFAULT_TEMPERATURE})", + default=DEFAULT_TEMPERATURE + ) + parser.add_argument( + "--pt-top-p", + type=float, + help=f"Top-p sampling parameter (default: {DEFAULT_TOP_P})", + default=DEFAULT_TOP_P + ) + + def parse_args(self, args) -> None: + """Parse command-line arguments.""" + self.model_name = args.pt_model + self.device = args.pt_device + self.max_length = args.pt_max_length + self.temperature = args.pt_temperature + self.top_p = args.pt_top_p + + # Store training flag + self.train_mode = args.pt_train + + + def open(self, args) -> None: + # Reinitialize model if needed + if not self._is_initialized: + if not self._initialize_model(): + logger.error("Failed to initialize model during open") + return + + + def close(self, args) -> None: + loss = None + # If in training mode and we have a target, train on the generated output + if self.train_mode: + self.model.train() + logger.info("\n" + "=" * 50) + logger.info("🚀 Starting training on generated output") + + for output_text in self.output_list: + logger.info(f"📄 Generated length: {len(output_text)} chars") + loss = self._train_on_output(output_text) + if loss is None: + break + logger.info("=" * 50) + # Save the model after training + if loss is not None: + logger.info("💾 Saving trained model...") + save_success = self._save_model() + if save_success: + logger.info("✅ Model saved successfully") + else: + logger.error("❌ Failed to save model") + + + def _save_model(self, output_dir: str = None) -> bool: + """Save the model and tokenizer to disk. + + Args: + output_dir: Directory to save the model. If None, uses model_name. + + Returns: + bool: True if save was successful, False otherwise + """ + try: + if output_dir is None: + output_dir = os.path.join(os.path.dirname(__file__), 'saved_models', self.model_name.split('/')[-1]) + + # Validate output directory path + if not output_dir or not isinstance(output_dir, str): + logger.error("Invalid output directory path") + return False + + # Sanitize path to prevent directory traversal + output_dir = os.path.normpath(output_dir) + if '..' in output_dir: + logger.error("Directory traversal detected in output path") + return False + + os.makedirs(output_dir, exist_ok=True) + logger.info(f"💾 Saving model to {output_dir}...") + + # Save model and tokenizer + self.model.save_pretrained(output_dir) + self.tokenizer.save_pretrained(output_dir) + + logger.info(f"✅ Model successfully saved to {output_dir}") + return True + + except Exception as e: + logger.error(f"❌ Error saving model: {str(e)}") + return False + + def _train_on_output(self, generated_text: str) -> Optional[float]: + """Train the model on the generated output. + + Args: + generated_text: The text generated by the model + + Returns: + float: The loss value if successful, None otherwise + """ + # Input validation + if not generated_text or not isinstance(generated_text, str): + logger.error("Invalid generated text for training") + return None + + if len(generated_text.strip()) == 0: + logger.error("Empty generated text for training") + return None + try: + logger.info(f"🔄 Starting training on generated output (length: {len(generated_text)} chars)...") + + # Get model's dtype for consistent typing + model_dtype = next(self.model.parameters()).dtype + + # Tokenize the training text + inputs = self.tokenizer( + generated_text, + return_tensors="pt", + padding="max_length", + max_length=self.max_length, + truncation=True + ) + + # Move inputs to the correct device and type + inputs = {k: v.to(self.device) for k, v in inputs.items()} + + # Create labels (shifted input) and ensure correct dtype + labels = inputs["input_ids"].clone() + + # Ensure attention mask is the same dtype as the model + if 'attention_mask' in inputs: + inputs['attention_mask'] = inputs['attention_mask'].to(dtype=model_dtype) + + # Initialize optimizer if not already done + if not hasattr(self, 'optimizer'): + self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=5e-5) + + # Zero gradients + self.optimizer.zero_grad() + + device = torch.device(self.device) + + device_type = self._get_device_type() + + # Forward pass with autocast for mixed precision training + with torch.autocast(device_type=device_type, + dtype=model_dtype if model_dtype in [torch.float16, torch.bfloat16] else None): + outputs = self.model( + input_ids=inputs["input_ids"], + attention_mask=inputs.get("attention_mask", None), + labels=labels + ) + + # Backward pass and optimize + loss = outputs.loss + if loss is not None: + loss.backward() + torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0) + self.optimizer.step() + # Print training progress + logger.info(f"✅ Training complete - Loss: {loss.item():.4f}") + return loss.item() + + logger.warning("⚠️ No loss computed during training") + return None + + except Exception as e: + logger.error(f"❌ Error during training: {str(e)}") + traceback.print_exc() + return None + + def _generate_text(self, prompt: str) -> Tuple[bool, str]: + """Generate text using the loaded PyTorch model. + + Args: + prompt: The input prompt for text generation + + Returns: + tuple: (success, response) where success is a boolean and + response is either the generated text or an error message + """ + # Input validation + if not prompt or not isinstance(prompt, str): + return False, "Invalid prompt: must be a non-empty string" + + if len(prompt.strip()) == 0: + return False, "Invalid prompt: cannot be empty or whitespace only" + + if not self._is_initialized and not self._initialize_model(): + return False, "Model initialization failed" + + try: + # Ensure model is on the correct device and in evaluation mode + device = torch.device(self.device) + + # Get model's dtype for consistent typing + model_dtype = next(self.model.parameters()).dtype + + # Tokenize input + inputs = self.tokenizer( + prompt, + return_tensors="pt", + padding=True, + truncation=True, + max_length=self.max_length, + return_token_type_ids=False + ) + + # Move inputs to the correct device and type + inputs = {k: v.to(device) for k, v in inputs.items()} + inputs['input_ids'] = inputs['input_ids'].to(dtype=torch.long) + if 'attention_mask' in inputs: + inputs['attention_mask'] = inputs['attention_mask'].to(dtype=torch.long) + + # Calculate max_new_tokens, ensuring it's at least DEFAULT_MAX_LENGTH + max_new_tokens = max(DEFAULT_MAX_LENGTH, inputs['input_ids'].shape[1]) + + # Generate text with appropriate settings + with torch.no_grad(): + device_type = self._get_device_type() + with torch.autocast(device_type=device_type, + enabled=device_type != 'cpu', + dtype=model_dtype if model_dtype in [torch.float16, torch.bfloat16] else None): + outputs = self.model.generate( + input_ids=inputs['input_ids'], + attention_mask=inputs.get('attention_mask', None), + max_new_tokens=max_new_tokens, + temperature=self.temperature, + top_p=self.top_p, + do_sample=True, + pad_token_id=self.tokenizer.eos_token_id, + no_repeat_ngram_size=3, + ) + + # Decode the generated text + full_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) + + # Remove the input prompt from the output if it appears at the beginning + generated_text = full_text[len(prompt):].strip() if full_text.startswith(prompt) else full_text + + # Clean up the generated text + cleaned_text = self._clean_generated_text(generated_text) + + self.output_list.append(cleaned_text) + + return True, cleaned_text + + except Exception as e: + error_msg = f"Error in text generation: {str(e)}\n{traceback.format_exc()}" + logger.error(error_msg) + return False, error_msg + + + def get_model_description(self) -> Tuple[bool, str]: + """Get a description of the current PyTorch LLM configuration. + + Returns: + tuple: (success, description) where success is a boolean and + description is a string describing the configuration + """ + if not self._is_initialized: + return False, "Model initialization failed" + + desc = (f"PyTorch LLM: {self.model_name}" + f", Device: {self.device}, Max Length: {self.max_length}" + f", Temperature: {self.temperature}, Top-p: {self.top_p}" + f", Model Class: {self.model.__class__.__name__ if self.model else 'Not loaded'}\n") + return True, desc + + def _clean_generated_text(self, text: str) -> str: + """Clean up generated text by removing multiple consecutive non-text symbols. + + Args: + text: The text to clean + + Returns: + str: Cleaned text with normalized punctuation and symbols + """ + if not text: + return text + + cleaned = text + # List of special characters to clean up + special_chars = ['.', '|', '%', '?'] + + for char in special_chars: + # Escape special regex characters + escaped_char = re.escape(char) + # Replace multiple consecutive characters with a single one + cleaned = re.sub(fr'{escaped_char}+', char, cleaned) + # Replace characters separated by whitespace with a single one + cleaned = re.sub(fr'({escaped_char}\s*)+', char, cleaned) + + + return cleaned.strip() + + def _train_on_example(self, prompt: str, target: str) -> Optional[float]: + """Train the model on a single example. + + Args: + prompt: The input prompt + target: The target response + + Returns: + float: The loss value if successful, None otherwise + """ + try: + # Combine prompt and target with separator + text = f"{prompt}{target}{self.tokenizer.eos_token}" + + # Get model's dtype for consistent typing + model_dtype = next(self.model.parameters()).dtype + + # Tokenize the input + inputs = self.tokenizer( + text, + return_tensors="pt", + padding="max_length", + max_length=self.max_length, + truncation=True + ) + + # Move inputs to the correct device and type + inputs = {k: v.to(self.device) for k, v in inputs.items()} + + # Create labels (shifted input) and ensure correct dtype + labels = inputs["input_ids"].clone() + + # Ensure attention mask is the same dtype as the model + if 'attention_mask' in inputs: + inputs['attention_mask'] = inputs['attention_mask'].to(dtype=model_dtype) + + # Forward pass with autocast for mixed precision training + with torch.autocast(device_type='cuda' if 'cuda' in str(self.device) else 'cpu', + dtype=model_dtype): + outputs = self.model( + input_ids=inputs["input_ids"], + attention_mask=inputs.get("attention_mask", None), + labels=labels + ) + + # Backward pass and optimize + loss = outputs.loss + if loss is not None: + loss.backward() + return loss.item() + return None + + except Exception as e: + return None, f"Error during training: {str(e)}" + + def get_throughput_analysis(self) -> Tuple[bool, str]: + """Generate throughput analysis using the PyTorch LLM.""" + try: + prompt = self.get_throughput_prompt() + return self._generate_text(prompt) + except Exception as e: + return False, f"Failed to generate throughput analysis: {str(e)}" + + def get_latency_analysis(self) -> Tuple[bool, str]: + """Generate latency analysis using the PyTorch LLM.""" + try: + prompt = self.get_latency_prompt() + return self._generate_text(prompt) + except Exception as e: + return False, f"Failed to generate latency analysis: {str(e)}" + + def get_total_mb_analysis(self) -> Tuple[bool, str]: + """Generate total MB processed analysis using the PyTorch LLM.""" + try: + prompt = self.get_total_mb_prompt() + return self._generate_text(prompt) + except Exception as e: + return False, f"Failed to generate total MB analysis: {str(e)}" + + def get_percentile_histogram_analysis(self) -> Tuple[bool, str]: + """Generate percentile histogram analysis using the PyTorch LLM.""" + try: + prompt = self.get_percentile_histogram_prompt() + return self._generate_text(prompt) + except Exception as e: + return False, f"Failed to generate percentile histogram analysis: {str(e)}" + + def __del__(self) -> None: + """Clean up resources when the object is destroyed.""" + if hasattr(self, 'model') and self.model is not None: + del self.model + if hasattr(self, 'tokenizer') and self.tokenizer is not None: + del self.tokenizer + if torch.cuda.is_available(): + torch.cuda.empty_cache() diff --git a/src/genai/genai.py b/src/genai/genai.py index a395872..1f0556c 100644 --- a/src/genai/genai.py +++ b/src/genai/genai.py @@ -55,6 +55,12 @@ def add_args(self, parser): def parse_args(self, args): pass + def open(self, args): + pass + + def close(self, args): + pass + @abstractmethod def get_model_description(self) -> Tuple[bool, str]: """ @@ -108,7 +114,7 @@ def get_throughput_prompt(self): "- Identify which storage systems have the highest and lowest minimum, average and maximum throughput.\n" "- Quantify relative differences roughly (for example, 'about 2x higher').\n" "- Mention any big gaps or interesting patterns.\n" - "- if any sentence of paragraph is of more than 70 characters, break it into multiple sentences.\n" + "- If any sentence of paragraph is of more than 70 characters, break it into multiple sentences.\n" "Here are the measurements:\n" f"{metrics_block}\n\n" "Now write the analysis in clear, technical English." diff --git a/src/main/sbk_charts.py b/src/main/sbk_charts.py index 656a806..1121d81 100644 --- a/src/main/sbk_charts.py +++ b/src/main/sbk_charts.py @@ -71,4 +71,6 @@ def sbk_charts(): # create AI Summary in excel file ch.parse_args(args) + ch.open(args) ch.add_performance_details() + ch.close(args) diff --git a/src/version/sbk_version.py b/src/version/sbk_version.py index 7269908..fb75200 100644 --- a/src/version/sbk_version.py +++ b/src/version/sbk_version.py @@ -8,4 +8,4 @@ # http://www.apache.org/licenses/LICENSE-2.0 ## -__sbk_version__="3.26.1.0" \ No newline at end of file +__sbk_version__="3.26.2.0" \ No newline at end of file