Skip to content

Commit ae90cde

Browse files
committed
Add mock server for testing
1 parent a3bc824 commit ae90cde

File tree

14 files changed

+2571
-23
lines changed

14 files changed

+2571
-23
lines changed

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ dependencies = [
4747
"culsans~=0.9.0",
4848
"datasets",
4949
"eval_type_backport",
50+
"faker",
5051
"ftfy>=6.0.0",
5152
"httpx[http2]<1.0.0",
5253
"loguru",
@@ -59,6 +60,7 @@ dependencies = [
5960
"pyhumps>=3.8.0",
6061
"pyyaml>=6.0.0",
6162
"rich",
63+
"sanic",
6264
"transformers",
6365
"uvloop>=0.18",
6466
]
@@ -79,7 +81,7 @@ dev = [
7981
# testing
8082
"lorem~=0.1.1",
8183
"pytest~=8.2.2",
82-
"pytest-asyncio~=0.23.8",
84+
"pytest-asyncio~=1.1.0",
8385
"pytest-cov~=5.0.0",
8486
"pytest-mock~=3.14.0",
8587
"pytest-rerunfailures~=14.0",

src/guidellm/__main__.py

Lines changed: 248 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,34 @@
1+
"""
2+
GuideLLM command-line interface providing benchmarking, dataset preprocessing, and
3+
mock server functionality.
4+
5+
This module serves as the primary entry point for the GuideLLM CLI application,
6+
offering a comprehensive suite of tools for language model evaluation and testing.
7+
It provides three main command groups: benchmark operations for performance testing
8+
against generative models, dataset preprocessing utilities for data preparation and
9+
transformation, and a mock server for testing and development scenarios. The CLI
10+
supports various backends, output formats, and configuration options to accommodate
11+
different benchmarking needs and deployment environments.
12+
13+
Example:
14+
::
15+
# Run a benchmark against a model
16+
guidellm benchmark run --target http://localhost:8000 --data dataset.json \\
17+
--profile sweep
18+
19+
# Preprocess a dataset
20+
guidellm preprocess dataset input.json output.json --processor gpt2
21+
22+
# Start a mock server for testing
23+
guidellm mock-server --host 0.0.0.0 --port 8080
24+
"""
25+
26+
from __future__ import annotations
27+
128
import asyncio
229
import codecs
330
from pathlib import Path
4-
from typing import Union
31+
from typing import Annotated, Union
532

633
import click
734

@@ -16,18 +43,62 @@
1643
from guidellm.benchmark.scenario import (
1744
GenerativeTextScenario,
1845
)
46+
from guidellm.mock_server import MockServer, ServerConfig
1947
from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
2048
from guidellm.scheduler import StrategyType
2149
from guidellm.settings import print_config
22-
from guidellm.utils import DefaultGroupHandler, get_literal_vals
50+
from guidellm.utils import Console, DefaultGroupHandler, get_literal_vals
2351
from guidellm.utils import cli as cli_tools
2452

25-
STRATEGY_PROFILE_CHOICES = list(get_literal_vals(Union[ProfileType, StrategyType]))
53+
__all__ = [
54+
"STRATEGY_PROFILE_CHOICES",
55+
"benchmark",
56+
"cli",
57+
"config",
58+
"dataset",
59+
"decode_escaped_str",
60+
"from_file",
61+
"mock_server",
62+
"preprocess",
63+
"run",
64+
]
65+
66+
STRATEGY_PROFILE_CHOICES: Annotated[
67+
list[str], "Available strategy and profile choices for benchmark execution types"
68+
] = list(get_literal_vals(Union[ProfileType, StrategyType]))
69+
70+
71+
def decode_escaped_str(_ctx, _param, value):
72+
"""
73+
Decode escape sequences in Click option values.
74+
75+
Click automatically escapes characters in option values, converting sequences
76+
like "\\n" to "\\\\n". This function properly decodes these escape sequences
77+
to their intended characters for use in CLI options.
78+
79+
:param _ctx: Click context (unused)
80+
:param _param: Click parameter (unused)
81+
:param value: String value to decode escape sequences from
82+
:return: Decoded string with proper escape sequences
83+
:raises click.BadParameter: When escape sequence decoding fails
84+
"""
85+
if value is None:
86+
return None
87+
try:
88+
return codecs.decode(value, "unicode_escape")
89+
except Exception as e:
90+
raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
2691

2792

2893
@click.group()
2994
def cli():
30-
pass
95+
"""
96+
Main entry point for the GuideLLM command-line interface.
97+
98+
This is the root command group that organizes all GuideLLM CLI functionality
99+
into logical subgroups for benchmarking, preprocessing, configuration, and
100+
mock server operations.
101+
"""
31102

32103

33104
@cli.group(
@@ -36,7 +107,13 @@ def cli():
36107
default="run",
37108
)
38109
def benchmark():
39-
pass
110+
"""
111+
Benchmark command group for running and managing performance tests.
112+
113+
This command group provides functionality to execute new benchmarks against
114+
generative models and load previously saved benchmark reports for analysis.
115+
Supports various benchmarking strategies, output formats, and backend types.
116+
"""
40117

41118

42119
@benchmark.command(
@@ -264,9 +341,24 @@ def benchmark():
264341
"If None, will run until max_seconds or the data is exhausted."
265342
),
266343
)
267-
@click.option("--max-errors", type=int, default=None, help="")
268-
@click.option("--max-error-rate", type=float, default=None, help="")
269-
@click.option("--max-global-error-rate", type=float, default=None, help="")
344+
@click.option(
345+
"--max-errors",
346+
type=int,
347+
default=None,
348+
help="Maximum number of errors allowed before stopping the benchmark",
349+
)
350+
@click.option(
351+
"--max-error-rate",
352+
type=float,
353+
default=None,
354+
help="Maximum error rate allowed before stopping the benchmark",
355+
)
356+
@click.option(
357+
"--max-global-error-rate",
358+
type=float,
359+
default=None,
360+
help="Maximum global error rate allowed across all benchmarks",
361+
)
270362
def run(
271363
target,
272364
data,
@@ -301,6 +393,14 @@ def run(
301393
max_error_rate,
302394
max_global_error_rate,
303395
):
396+
"""
397+
Execute a generative text benchmark against a target model backend.
398+
399+
Runs comprehensive performance testing using various strategies and profiles,
400+
collecting metrics on latency, throughput, error rates, and resource usage.
401+
Supports multiple backends, data sources, output formats, and constraint types
402+
for flexible benchmark configuration.
403+
"""
304404
asyncio.run(
305405
benchmark_generative_text(
306406
target=target,
@@ -375,21 +475,14 @@ def run(
375475
),
376476
)
377477
def from_file(path, output_path):
378-
reimport_benchmarks_report(path, output_path)
379-
380-
381-
def decode_escaped_str(_ctx, _param, value):
382478
"""
383-
Click auto adds characters. For example, when using --pad-char "\n",
384-
it parses it as "\\n". This method decodes the string to handle escape
385-
sequences correctly.
479+
Load and optionally re-export a previously saved benchmark report.
480+
481+
Imports benchmark results from a saved file and provides optional conversion
482+
to different output formats. Supports JSON, YAML, and CSV export formats
483+
based on the output file extension.
386484
"""
387-
if value is None:
388-
return None
389-
try:
390-
return codecs.decode(value, "unicode_escape")
391-
except Exception as e:
392-
raise click.BadParameter(f"Could not decode escape sequences: {e}") from e
485+
reimport_benchmarks_report(path, output_path)
393486

394487

395488
@cli.command(
@@ -400,12 +493,25 @@ def decode_escaped_str(_ctx, _param, value):
400493
),
401494
)
402495
def config():
496+
"""
497+
Display available GuideLLM configuration environment variables.
498+
499+
Prints a comprehensive list of all environment variables that can be used
500+
to configure GuideLLM behavior, including their current values, defaults,
501+
and descriptions.
502+
"""
403503
print_config()
404504

405505

406506
@cli.group(help="General preprocessing tools and utilities.")
407507
def preprocess():
408-
pass
508+
"""
509+
Preprocessing command group for dataset preparation and transformation.
510+
511+
This command group provides utilities for converting, processing, and
512+
optimizing datasets for use in GuideLLM benchmarks. Includes functionality
513+
for token count adjustments, format conversions, and data validation.
514+
"""
409515

410516

411517
@preprocess.command(
@@ -521,6 +627,13 @@ def dataset(
521627
hub_dataset_id,
522628
random_seed,
523629
):
630+
"""
631+
Convert and process datasets for specific prompt and output token requirements.
632+
633+
Transforms datasets to meet target token length specifications using various
634+
strategies for handling short prompts and output length adjustments. Supports
635+
multiple input formats and can optionally push results to Hugging Face Hub.
636+
"""
524637
process_dataset(
525638
data=data,
526639
output_path=output_path,
@@ -538,5 +651,118 @@ def dataset(
538651
)
539652

540653

654+
@cli.command(help="Start the GuideLLM mock OpenAI/vLLM server for testing.")
655+
@click.option("--host", default="127.0.0.1", help="Host to bind the server to")
656+
@click.option("--port", default=8000, type=int, help="Port to bind the server to")
657+
@click.option("--workers", default=1, type=int, help="Number of worker processes")
658+
@click.option(
659+
"--model", default="llama-3.1-8b-instruct", help="The name of the model to mock"
660+
)
661+
@click.option(
662+
"--request-latency",
663+
default=3,
664+
type=float,
665+
help="Request latency in seconds for non-streaming requests",
666+
)
667+
@click.option(
668+
"--request-latency-std",
669+
default=0,
670+
type=float,
671+
help=(
672+
"Request latency standard deviation (normal distribution) "
673+
"in seconds for non-streaming requests"
674+
),
675+
)
676+
@click.option(
677+
"--ttft-ms",
678+
default=150,
679+
type=float,
680+
help="Time to first token in milliseconds for streaming requests",
681+
)
682+
@click.option(
683+
"--ttft-ms-std",
684+
default=0,
685+
type=float,
686+
help=(
687+
"Time to first token standard deviation (normal distribution) in milliseconds"
688+
),
689+
)
690+
@click.option(
691+
"--itl-ms",
692+
default=10,
693+
type=float,
694+
help="Inter token latency in milliseconds for streaming requests",
695+
)
696+
@click.option(
697+
"--itl-ms-std",
698+
default=0,
699+
type=float,
700+
help=(
701+
"Inter token latency standard deviation (normal distribution) "
702+
"in milliseconds for streaming requests"
703+
),
704+
)
705+
@click.option(
706+
"--output-tokens",
707+
default=128,
708+
type=int,
709+
help="Output tokens for streaming requests",
710+
)
711+
@click.option(
712+
"--output-tokens-std",
713+
default=0,
714+
type=float,
715+
help=(
716+
"Output tokens standard deviation (normal distribution) for streaming requests"
717+
),
718+
)
719+
def mock_server(
720+
host: str,
721+
port: int,
722+
workers: int,
723+
model: str,
724+
request_latency: float,
725+
request_latency_std: float,
726+
ttft_ms: float,
727+
ttft_ms_std: float,
728+
itl_ms: float,
729+
itl_ms_std: float,
730+
output_tokens: int,
731+
output_tokens_std: float,
732+
):
733+
"""
734+
Start a GuideLLM mock OpenAI/vLLM-compatible server for testing and development.
735+
736+
Launches a mock server that simulates model inference with configurable latency
737+
characteristics, token generation patterns, and response timing. Useful for
738+
testing GuideLLM benchmarks without requiring actual model deployment or for
739+
development scenarios requiring predictable server behavior.
740+
"""
741+
742+
config = ServerConfig(
743+
host=host,
744+
port=port,
745+
workers=workers,
746+
model=model,
747+
request_latency=request_latency,
748+
request_latency_std=request_latency_std,
749+
ttft_ms=ttft_ms,
750+
ttft_ms_std=ttft_ms_std,
751+
itl_ms=itl_ms,
752+
itl_ms_std=itl_ms_std,
753+
output_tokens=output_tokens,
754+
output_tokens_std=output_tokens_std,
755+
)
756+
757+
server = MockServer(config)
758+
console = Console()
759+
console.print_update(
760+
title="GuideLLM mock server starting...",
761+
details=f"Listening on http://{host}:{port} for model {model}",
762+
status="success",
763+
)
764+
server.run()
765+
766+
541767
if __name__ == "__main__":
542768
cli()
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
"""
2+
GuideLLM Mock Server for OpenAI and vLLM API compatibility.
3+
"""
4+
5+
from .server import MockServer
6+
7+
__all__ = ["MockServer"]

0 commit comments

Comments
 (0)