vllm-project
diff --git a/‎pyproject.toml‎
Lines changed: 3 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/guidellm/__main__.py‎
Lines changed: 248 additions & 22 deletions b/‎src/guidellm/__main__.py‎
Lines changed: 248 additions & 22 deletions
diff --git a/‎src/guidellm/mock_server/__init__.py‎
Lines changed: 7 additions & 0 deletions b/‎src/guidellm/mock_server/__init__.py‎
Lines changed: 7 additions & 0 deletions
@@ -47,6 +47,7 @@ dependencies = [
     "culsans~=0.9.0",
     "datasets",
     "eval_type_backport",
+    "faker",
     "ftfy>=6.0.0",
     "httpx[http2]<1.0.0",
     "loguru",
@@ -59,6 +60,7 @@ dependencies = [
     "pyhumps>=3.8.0",
     "pyyaml>=6.0.0",
     "rich",
+    "sanic",
     "transformers",
     "uvloop>=0.18",
 ]
@@ -79,7 +81,7 @@ dev = [
     # testing
     "lorem~=0.1.1",
     "pytest~=8.2.2",
-    "pytest-asyncio~=0.23.8",
+    "pytest-asyncio~=1.1.0",
     "pytest-cov~=5.0.0",
     "pytest-mock~=3.14.0",
     "pytest-rerunfailures~=14.0",
 
@@ -1,7 +1,34 @@
+"""
+GuideLLM command-line interface providing benchmarking, dataset preprocessing, and
+mock server functionality.
+
+This module serves as the primary entry point for the GuideLLM CLI application,
+offering a comprehensive suite of tools for language model evaluation and testing.
+It provides three main command groups: benchmark operations for performance testing
+against generative models, dataset preprocessing utilities for data preparation and
+transformation, and a mock server for testing and development scenarios. The CLI
+supports various backends, output formats, and configuration options to accommodate
+different benchmarking needs and deployment environments.
+
+Example:
+::
+    # Run a benchmark against a model
+    guidellm benchmark run --target http://localhost:8000 --data dataset.json \\
+        --profile sweep
+
+    # Preprocess a dataset
+    guidellm preprocess dataset input.json output.json --processor gpt2
+
+    # Start a mock server for testing
+    guidellm mock-server --host 0.0.0.0 --port 8080
+"""
+
+from __future__ import annotations
+
 import asyncio
 import codecs
 from pathlib import Path
-from typing import Union
+from typing import Annotated, Union
 
 import click
 
@@ -16,18 +43,62 @@
 from guidellm.benchmark.scenario import (
     GenerativeTextScenario,
 )
+from guidellm.mock_server import MockServer, ServerConfig
 from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
 from guidellm.scheduler import StrategyType
 from guidellm.settings import print_config
-from guidellm.utils import DefaultGroupHandler, get_literal_vals
+from guidellm.utils import Console, DefaultGroupHandler, get_literal_vals
 from guidellm.utils import cli as cli_tools
 
-STRATEGY_PROFILE_CHOICES = list(get_literal_vals(Union[ProfileType, StrategyType]))
+__all__ = [
+    "STRATEGY_PROFILE_CHOICES",
+    "benchmark",
+    "cli",
+    "config",
+    "dataset",
+    "decode_escaped_str",
+    "from_file",
+    "mock_server",
+    "preprocess",
+    "run",
+]
+
+STRATEGY_PROFILE_CHOICES: Annotated[
+    list[str], "Available strategy and profile choices for benchmark execution types"
+] = list(get_literal_vals(Union[ProfileType, StrategyType]))
+
+
+def decode_escaped_str(_ctx, _param, value):
+    """
+    Decode escape sequences in Click option values.
+
+    Click automatically escapes characters in option values, converting sequences
+    like "\\n" to "\\\\n". This function properly decodes these escape sequences
+    to their intended characters for use in CLI options.
+
+    :param _ctx: Click context (unused)
+    :param _param: Click parameter (unused)
+    :param value: String value to decode escape sequences from
+    :return: Decoded string with proper escape sequences
+    :raises click.BadParameter: When escape sequence decoding fails
+    """
+    if value is None:
+        return None
+    try:
+        return codecs.decode(value, "unicode_escape")
+    except Exception as e:
+        raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
 
 
 @click.group()
 def cli():
-    pass
+    """
+    Main entry point for the GuideLLM command-line interface.
+
+    This is the root command group that organizes all GuideLLM CLI functionality
+    into logical subgroups for benchmarking, preprocessing, configuration, and
+    mock server operations.
+    """
 
 
 @cli.group(
@@ -36,7 +107,13 @@ def cli():
     default="run",
 )
 def benchmark():
-    pass
+    """
+    Benchmark command group for running and managing performance tests.
+
+    This command group provides functionality to execute new benchmarks against
+    generative models and load previously saved benchmark reports for analysis.
+    Supports various benchmarking strategies, output formats, and backend types.
+    """
 
 
 @benchmark.command(
@@ -264,9 +341,24 @@ def benchmark():
         "If None, will run until max_seconds or the data is exhausted."
     ),
 )
-@click.option("--max-errors", type=int, default=None, help="")
-@click.option("--max-error-rate", type=float, default=None, help="")
-@click.option("--max-global-error-rate", type=float, default=None, help="")
+@click.option(
+    "--max-errors",
+    type=int,
+    default=None,
+    help="Maximum number of errors allowed before stopping the benchmark",
+)
+@click.option(
+    "--max-error-rate",
+    type=float,
+    default=None,
+    help="Maximum error rate allowed before stopping the benchmark",
+)
+@click.option(
+    "--max-global-error-rate",
+    type=float,
+    default=None,
+    help="Maximum global error rate allowed across all benchmarks",
+)
 def run(
     target,
     data,
@@ -301,6 +393,14 @@ def run(
     max_error_rate,
     max_global_error_rate,
 ):
+    """
+    Execute a generative text benchmark against a target model backend.
+
+    Runs comprehensive performance testing using various strategies and profiles,
+    collecting metrics on latency, throughput, error rates, and resource usage.
+    Supports multiple backends, data sources, output formats, and constraint types
+    for flexible benchmark configuration.
+    """
     asyncio.run(
         benchmark_generative_text(
             target=target,
@@ -375,21 +475,14 @@ def run(
     ),
 )
 def from_file(path, output_path):
-    reimport_benchmarks_report(path, output_path)
-
-
-def decode_escaped_str(_ctx, _param, value):
     """
-    Click auto adds characters. For example, when using --pad-char "\n",
-    it parses it as "\\n". This method decodes the string to handle escape
-    sequences correctly.
+    Load and optionally re-export a previously saved benchmark report.
+
+    Imports benchmark results from a saved file and provides optional conversion
+    to different output formats. Supports JSON, YAML, and CSV export formats
+    based on the output file extension.
     """
-    if value is None:
-        return None
-    try:
-        return codecs.decode(value, "unicode_escape")
-    except Exception as e:
-        raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
+    reimport_benchmarks_report(path, output_path)
 
 
 @cli.command(
@@ -400,12 +493,25 @@ def decode_escaped_str(_ctx, _param, value):
     ),
 )
 def config():
+    """
+    Display available GuideLLM configuration environment variables.
+
+    Prints a comprehensive list of all environment variables that can be used
+    to configure GuideLLM behavior, including their current values, defaults,
+    and descriptions.
+    """
     print_config()
 
 
 @cli.group(help="General preprocessing tools and utilities.")
 def preprocess():
-    pass
+    """
+    Preprocessing command group for dataset preparation and transformation.
+
+    This command group provides utilities for converting, processing, and
+    optimizing datasets for use in GuideLLM benchmarks. Includes functionality
+    for token count adjustments, format conversions, and data validation.
+    """
 
 
 @preprocess.command(
@@ -521,6 +627,13 @@ def dataset(
     hub_dataset_id,
     random_seed,
 ):
+    """
+    Convert and process datasets for specific prompt and output token requirements.
+
+    Transforms datasets to meet target token length specifications using various
+    strategies for handling short prompts and output length adjustments. Supports
+    multiple input formats and can optionally push results to Hugging Face Hub.
+    """
     process_dataset(
         data=data,
         output_path=output_path,
@@ -538,5 +651,118 @@ def dataset(
     )
 
 
+@cli.command(help="Start the GuideLLM mock OpenAI/vLLM server for testing.")
+@click.option("--host", default="127.0.0.1", help="Host to bind the server to")
+@click.option("--port", default=8000, type=int, help="Port to bind the server to")
+@click.option("--workers", default=1, type=int, help="Number of worker processes")
+@click.option(
+    "--model", default="llama-3.1-8b-instruct", help="The name of the model to mock"
+)
+@click.option(
+    "--request-latency",
+    default=3,
+    type=float,
+    help="Request latency in seconds for non-streaming requests",
+)
+@click.option(
+    "--request-latency-std",
+    default=0,
+    type=float,
+    help=(
+        "Request latency standard deviation (normal distribution) "
+        "in seconds for non-streaming requests"
+    ),
+)
+@click.option(
+    "--ttft-ms",
+    default=150,
+    type=float,
+    help="Time to first token in milliseconds for streaming requests",
+)
+@click.option(
+    "--ttft-ms-std",
+    default=0,
+    type=float,
+    help=(
+        "Time to first token standard deviation (normal distribution) in milliseconds"
+    ),
+)
+@click.option(
+    "--itl-ms",
+    default=10,
+    type=float,
+    help="Inter token latency in milliseconds for streaming requests",
+)
+@click.option(
+    "--itl-ms-std",
+    default=0,
+    type=float,
+    help=(
+        "Inter token latency standard deviation (normal distribution) "
+        "in milliseconds for streaming requests"
+    ),
+)
+@click.option(
+    "--output-tokens",
+    default=128,
+    type=int,
+    help="Output tokens for streaming requests",
+)
+@click.option(
+    "--output-tokens-std",
+    default=0,
+    type=float,
+    help=(
+        "Output tokens standard deviation (normal distribution) for streaming requests"
+    ),
+)
+def mock_server(
+    host: str,
+    port: int,
+    workers: int,
+    model: str,
+    request_latency: float,
+    request_latency_std: float,
+    ttft_ms: float,
+    ttft_ms_std: float,
+    itl_ms: float,
+    itl_ms_std: float,
+    output_tokens: int,
+    output_tokens_std: float,
+):
+    """
+    Start a GuideLLM mock OpenAI/vLLM-compatible server for testing and development.
+
+    Launches a mock server that simulates model inference with configurable latency
+    characteristics, token generation patterns, and response timing. Useful for
+    testing GuideLLM benchmarks without requiring actual model deployment or for
+    development scenarios requiring predictable server behavior.
+    """
+
+    config = ServerConfig(
+        host=host,
+        port=port,
+        workers=workers,
+        model=model,
+        request_latency=request_latency,
+        request_latency_std=request_latency_std,
+        ttft_ms=ttft_ms,
+        ttft_ms_std=ttft_ms_std,
+        itl_ms=itl_ms,
+        itl_ms_std=itl_ms_std,
+        output_tokens=output_tokens,
+        output_tokens_std=output_tokens_std,
+    )
+
+    server = MockServer(config)
+    console = Console()
+    console.print_update(
+        title="GuideLLM mock server starting...",
+        details=f"Listening on http://{host}:{port} for model {model}",
+        status="success",
+    )
+    server.run()
+
+
 if __name__ == "__main__":
     cli()
@@ -0,0 +1,7 @@
+"""
+GuideLLM Mock Server for OpenAI and vLLM API compatibility.
+"""
+
+from .server import MockServer
+
+__all__ = ["MockServer"]