1
+ """
2
+ GuideLLM command-line interface providing benchmarking, dataset preprocessing, and
3
+ mock server functionality.
4
+
5
+ This module serves as the primary entry point for the GuideLLM CLI application,
6
+ offering a comprehensive suite of tools for language model evaluation and testing.
7
+ It provides three main command groups: benchmark operations for performance testing
8
+ against generative models, dataset preprocessing utilities for data preparation and
9
+ transformation, and a mock server for testing and development scenarios. The CLI
10
+ supports various backends, output formats, and configuration options to accommodate
11
+ different benchmarking needs and deployment environments.
12
+
13
+ Example:
14
+ ::
15
+ # Run a benchmark against a model
16
+ guidellm benchmark run --target http://localhost:8000 --data dataset.json \\
17
+ --profile sweep
18
+
19
+ # Preprocess a dataset
20
+ guidellm preprocess dataset input.json output.json --processor gpt2
21
+
22
+ # Start a mock server for testing
23
+ guidellm mock-server --host 0.0.0.0 --port 8080
24
+ """
25
+
26
+ from __future__ import annotations
27
+
1
28
import asyncio
2
29
import codecs
3
30
from pathlib import Path
4
- from typing import Union
31
+ from typing import Annotated , Union
5
32
6
33
import click
7
34
16
43
from guidellm .benchmark .scenario import (
17
44
GenerativeTextScenario ,
18
45
)
46
+ from guidellm .mock_server import MockServer , ServerConfig
19
47
from guidellm .preprocess .dataset import ShortPromptStrategy , process_dataset
20
48
from guidellm .scheduler import StrategyType
21
49
from guidellm .settings import print_config
22
- from guidellm .utils import DefaultGroupHandler , get_literal_vals
50
+ from guidellm .utils import Console , DefaultGroupHandler , get_literal_vals
23
51
from guidellm .utils import cli as cli_tools
24
52
25
- STRATEGY_PROFILE_CHOICES = list (get_literal_vals (Union [ProfileType , StrategyType ]))
53
+ __all__ = [
54
+ "STRATEGY_PROFILE_CHOICES" ,
55
+ "benchmark" ,
56
+ "cli" ,
57
+ "config" ,
58
+ "dataset" ,
59
+ "decode_escaped_str" ,
60
+ "from_file" ,
61
+ "mock_server" ,
62
+ "preprocess" ,
63
+ "run" ,
64
+ ]
65
+
66
+ STRATEGY_PROFILE_CHOICES : Annotated [
67
+ list [str ], "Available strategy and profile choices for benchmark execution types"
68
+ ] = list (get_literal_vals (Union [ProfileType , StrategyType ]))
69
+
70
+
71
+ def decode_escaped_str (_ctx , _param , value ):
72
+ """
73
+ Decode escape sequences in Click option values.
74
+
75
+ Click automatically escapes characters in option values, converting sequences
76
+ like "\\ n" to "\\ \\ n". This function properly decodes these escape sequences
77
+ to their intended characters for use in CLI options.
78
+
79
+ :param _ctx: Click context (unused)
80
+ :param _param: Click parameter (unused)
81
+ :param value: String value to decode escape sequences from
82
+ :return: Decoded string with proper escape sequences
83
+ :raises click.BadParameter: When escape sequence decoding fails
84
+ """
85
+ if value is None :
86
+ return None
87
+ try :
88
+ return codecs .decode (value , "unicode_escape" )
89
+ except Exception as e :
90
+ raise click .BadParameter (f"Could not decode escape sequences: { e } " ) from e
26
91
27
92
28
93
@click .group ()
29
94
def cli ():
30
- pass
95
+ """
96
+ Main entry point for the GuideLLM command-line interface.
97
+
98
+ This is the root command group that organizes all GuideLLM CLI functionality
99
+ into logical subgroups for benchmarking, preprocessing, configuration, and
100
+ mock server operations.
101
+ """
31
102
32
103
33
104
@cli .group (
@@ -36,7 +107,13 @@ def cli():
36
107
default = "run" ,
37
108
)
38
109
def benchmark ():
39
- pass
110
+ """
111
+ Benchmark command group for running and managing performance tests.
112
+
113
+ This command group provides functionality to execute new benchmarks against
114
+ generative models and load previously saved benchmark reports for analysis.
115
+ Supports various benchmarking strategies, output formats, and backend types.
116
+ """
40
117
41
118
42
119
@benchmark .command (
@@ -264,9 +341,24 @@ def benchmark():
264
341
"If None, will run until max_seconds or the data is exhausted."
265
342
),
266
343
)
267
- @click .option ("--max-errors" , type = int , default = None , help = "" )
268
- @click .option ("--max-error-rate" , type = float , default = None , help = "" )
269
- @click .option ("--max-global-error-rate" , type = float , default = None , help = "" )
344
+ @click .option (
345
+ "--max-errors" ,
346
+ type = int ,
347
+ default = None ,
348
+ help = "Maximum number of errors allowed before stopping the benchmark" ,
349
+ )
350
+ @click .option (
351
+ "--max-error-rate" ,
352
+ type = float ,
353
+ default = None ,
354
+ help = "Maximum error rate allowed before stopping the benchmark" ,
355
+ )
356
+ @click .option (
357
+ "--max-global-error-rate" ,
358
+ type = float ,
359
+ default = None ,
360
+ help = "Maximum global error rate allowed across all benchmarks" ,
361
+ )
270
362
def run (
271
363
target ,
272
364
data ,
@@ -301,6 +393,14 @@ def run(
301
393
max_error_rate ,
302
394
max_global_error_rate ,
303
395
):
396
+ """
397
+ Execute a generative text benchmark against a target model backend.
398
+
399
+ Runs comprehensive performance testing using various strategies and profiles,
400
+ collecting metrics on latency, throughput, error rates, and resource usage.
401
+ Supports multiple backends, data sources, output formats, and constraint types
402
+ for flexible benchmark configuration.
403
+ """
304
404
asyncio .run (
305
405
benchmark_generative_text (
306
406
target = target ,
@@ -375,21 +475,14 @@ def run(
375
475
),
376
476
)
377
477
def from_file (path , output_path ):
378
- reimport_benchmarks_report (path , output_path )
379
-
380
-
381
- def decode_escaped_str (_ctx , _param , value ):
382
478
"""
383
- Click auto adds characters. For example, when using --pad-char "\n ",
384
- it parses it as "\\ n". This method decodes the string to handle escape
385
- sequences correctly.
479
+ Load and optionally re-export a previously saved benchmark report.
480
+
481
+ Imports benchmark results from a saved file and provides optional conversion
482
+ to different output formats. Supports JSON, YAML, and CSV export formats
483
+ based on the output file extension.
386
484
"""
387
- if value is None :
388
- return None
389
- try :
390
- return codecs .decode (value , "unicode_escape" )
391
- except Exception as e :
392
- raise click .BadParameter (f"Could not decode escape sequences: { e } " ) from e
485
+ reimport_benchmarks_report (path , output_path )
393
486
394
487
395
488
@cli .command (
@@ -400,12 +493,25 @@ def decode_escaped_str(_ctx, _param, value):
400
493
),
401
494
)
402
495
def config ():
496
+ """
497
+ Display available GuideLLM configuration environment variables.
498
+
499
+ Prints a comprehensive list of all environment variables that can be used
500
+ to configure GuideLLM behavior, including their current values, defaults,
501
+ and descriptions.
502
+ """
403
503
print_config ()
404
504
405
505
406
506
@cli .group (help = "General preprocessing tools and utilities." )
407
507
def preprocess ():
408
- pass
508
+ """
509
+ Preprocessing command group for dataset preparation and transformation.
510
+
511
+ This command group provides utilities for converting, processing, and
512
+ optimizing datasets for use in GuideLLM benchmarks. Includes functionality
513
+ for token count adjustments, format conversions, and data validation.
514
+ """
409
515
410
516
411
517
@preprocess .command (
@@ -521,6 +627,13 @@ def dataset(
521
627
hub_dataset_id ,
522
628
random_seed ,
523
629
):
630
+ """
631
+ Convert and process datasets for specific prompt and output token requirements.
632
+
633
+ Transforms datasets to meet target token length specifications using various
634
+ strategies for handling short prompts and output length adjustments. Supports
635
+ multiple input formats and can optionally push results to Hugging Face Hub.
636
+ """
524
637
process_dataset (
525
638
data = data ,
526
639
output_path = output_path ,
@@ -538,5 +651,118 @@ def dataset(
538
651
)
539
652
540
653
654
+ @cli .command (help = "Start the GuideLLM mock OpenAI/vLLM server for testing." )
655
+ @click .option ("--host" , default = "127.0.0.1" , help = "Host to bind the server to" )
656
+ @click .option ("--port" , default = 8000 , type = int , help = "Port to bind the server to" )
657
+ @click .option ("--workers" , default = 1 , type = int , help = "Number of worker processes" )
658
+ @click .option (
659
+ "--model" , default = "llama-3.1-8b-instruct" , help = "The name of the model to mock"
660
+ )
661
+ @click .option (
662
+ "--request-latency" ,
663
+ default = 3 ,
664
+ type = float ,
665
+ help = "Request latency in seconds for non-streaming requests" ,
666
+ )
667
+ @click .option (
668
+ "--request-latency-std" ,
669
+ default = 0 ,
670
+ type = float ,
671
+ help = (
672
+ "Request latency standard deviation (normal distribution) "
673
+ "in seconds for non-streaming requests"
674
+ ),
675
+ )
676
+ @click .option (
677
+ "--ttft-ms" ,
678
+ default = 150 ,
679
+ type = float ,
680
+ help = "Time to first token in milliseconds for streaming requests" ,
681
+ )
682
+ @click .option (
683
+ "--ttft-ms-std" ,
684
+ default = 0 ,
685
+ type = float ,
686
+ help = (
687
+ "Time to first token standard deviation (normal distribution) in milliseconds"
688
+ ),
689
+ )
690
+ @click .option (
691
+ "--itl-ms" ,
692
+ default = 10 ,
693
+ type = float ,
694
+ help = "Inter token latency in milliseconds for streaming requests" ,
695
+ )
696
+ @click .option (
697
+ "--itl-ms-std" ,
698
+ default = 0 ,
699
+ type = float ,
700
+ help = (
701
+ "Inter token latency standard deviation (normal distribution) "
702
+ "in milliseconds for streaming requests"
703
+ ),
704
+ )
705
+ @click .option (
706
+ "--output-tokens" ,
707
+ default = 128 ,
708
+ type = int ,
709
+ help = "Output tokens for streaming requests" ,
710
+ )
711
+ @click .option (
712
+ "--output-tokens-std" ,
713
+ default = 0 ,
714
+ type = float ,
715
+ help = (
716
+ "Output tokens standard deviation (normal distribution) for streaming requests"
717
+ ),
718
+ )
719
+ def mock_server (
720
+ host : str ,
721
+ port : int ,
722
+ workers : int ,
723
+ model : str ,
724
+ request_latency : float ,
725
+ request_latency_std : float ,
726
+ ttft_ms : float ,
727
+ ttft_ms_std : float ,
728
+ itl_ms : float ,
729
+ itl_ms_std : float ,
730
+ output_tokens : int ,
731
+ output_tokens_std : float ,
732
+ ):
733
+ """
734
+ Start a GuideLLM mock OpenAI/vLLM-compatible server for testing and development.
735
+
736
+ Launches a mock server that simulates model inference with configurable latency
737
+ characteristics, token generation patterns, and response timing. Useful for
738
+ testing GuideLLM benchmarks without requiring actual model deployment or for
739
+ development scenarios requiring predictable server behavior.
740
+ """
741
+
742
+ config = ServerConfig (
743
+ host = host ,
744
+ port = port ,
745
+ workers = workers ,
746
+ model = model ,
747
+ request_latency = request_latency ,
748
+ request_latency_std = request_latency_std ,
749
+ ttft_ms = ttft_ms ,
750
+ ttft_ms_std = ttft_ms_std ,
751
+ itl_ms = itl_ms ,
752
+ itl_ms_std = itl_ms_std ,
753
+ output_tokens = output_tokens ,
754
+ output_tokens_std = output_tokens_std ,
755
+ )
756
+
757
+ server = MockServer (config )
758
+ console = Console ()
759
+ console .print_update (
760
+ title = "GuideLLM mock server starting..." ,
761
+ details = f"Listening on http://{ host } :{ port } for model { model } " ,
762
+ status = "success" ,
763
+ )
764
+ server .run ()
765
+
766
+
541
767
if __name__ == "__main__" :
542
768
cli ()
0 commit comments