Skip to content

Commit cfdc2ed

Browse files
committed
Working clis and entrypoints
1 parent 34f851c commit cfdc2ed

28 files changed

+1498
-1249
lines changed

plot.png

-42.3 KB
Binary file not shown.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ dev = [
7878

7979

8080
[project.entry-points.console_scripts]
81-
guidellm = "guidellm.main:generate_benchmark_report_cli"
81+
guidellm = "guidellm.__main__:cli"
8282
guidellm-config = "guidellm.config:print_config"
8383

8484

src/guidellm/__init__.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,22 @@
66
# flake8: noqa
77

88
import os
9-
import transformers # type: ignore
9+
import logging
10+
import contextlib
1011

11-
os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers
12-
transformers.logging.set_verbosity_error() # Silence warnings for transformers
1312

13+
with open(os.devnull, "w") as devnull, contextlib.redirect_stderr(
14+
devnull
15+
), contextlib.redirect_stdout(devnull):
16+
from transformers.utils import logging as hf_logging
17+
18+
# Set the log level for the transformers library to ERROR
19+
# to ignore None of PyTorch, TensorFlow found
20+
os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers
21+
hf_logging.set_verbosity_error()
22+
logging.getLogger("transformers").setLevel(logging.ERROR)
1423

1524
from .config import settings
1625
from .logger import configure_logger, logger
17-
# from .main import generate_benchmark_report
1826

1927
__all__ = ["configure_logger", "logger", "settings", "generate_benchmark_report"]

src/guidellm/__main__.py

Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
import asyncio
2+
import json
3+
from pathlib import Path
4+
from typing import get_args
5+
6+
import click
7+
8+
from guidellm.backend import BackendType
9+
from guidellm.benchmark import ProfileType, benchmark_generative_text
10+
from guidellm.scheduler import StrategyType
11+
12+
STRATEGY_PROFILE_CHOICES = set(
13+
list(get_args(ProfileType)) + list(get_args(StrategyType))
14+
)
15+
16+
17+
def parse_json(ctx, param, value):
18+
if value is None:
19+
return None
20+
try:
21+
return json.loads(value)
22+
except json.JSONDecodeError as err:
23+
raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
24+
25+
26+
def parse_number_str(ctx, param, value):
27+
if value is None:
28+
return None
29+
30+
values = value.split(",") if "," in value else [value]
31+
32+
try:
33+
return [int(val) if val.isdigit() else float(val) for val in values]
34+
except ValueError as err:
35+
raise click.BadParameter(
36+
f"{param.name} must be a number or comma-separated list of numbers."
37+
) from err
38+
39+
40+
@click.group()
41+
def cli():
42+
pass
43+
44+
45+
@cli.command()
46+
@click.option(
47+
"--target",
48+
required=True,
49+
type=str,
50+
help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
51+
)
52+
@click.option(
53+
"--backend-type",
54+
type=click.Choice(list(get_args(BackendType))),
55+
help=(
56+
"The type of backend to use to run requests against. Defaults to 'openai_http'."
57+
f" Supported types: {', '.join(get_args(BackendType))}"
58+
),
59+
default="openai_http",
60+
)
61+
@click.option(
62+
"--backend-args",
63+
callback=parse_json,
64+
default=None,
65+
help=(
66+
"A JSON string containing any arguments to pass to the backend as a "
67+
"dict with **kwargs."
68+
),
69+
)
70+
@click.option(
71+
"--model",
72+
default=None,
73+
type=str,
74+
help=(
75+
"The ID of the model to benchmark within the backend. "
76+
"If None provided (default), then it will use the first model available."
77+
),
78+
)
79+
@click.option(
80+
"--processor",
81+
default=None,
82+
type=str,
83+
help=(
84+
"The processor or tokenizer to use to calculate token counts for statistics "
85+
"and synthetic data generation. If None provided (default), will load "
86+
"using the model arg, if needed."
87+
),
88+
)
89+
@click.option(
90+
"--processor-args",
91+
default=None,
92+
callback=parse_json,
93+
help=(
94+
"A JSON string containing any arguments to pass to the processor constructor "
95+
"as a dict with **kwargs."
96+
),
97+
)
98+
@click.option(
99+
"--data",
100+
required=True,
101+
type=str,
102+
help=(
103+
"The HuggingFace dataset ID, a path to a HuggingFace dataset, "
104+
"a path to a data file csv, json, jsonl, or txt, "
105+
"or a synthetic data config as a json or key=value string."
106+
),
107+
)
108+
@click.option(
109+
"--data-args",
110+
callback=parse_json,
111+
help=(
112+
"A JSON string containing any arguments to pass to the dataset creation "
113+
"as a dict with **kwargs."
114+
),
115+
)
116+
@click.option(
117+
"--data-sampler",
118+
default=None,
119+
type=click.Choice(["random"]),
120+
help=(
121+
"The data sampler type to use. 'random' will add a random shuffle on the data. "
122+
"Defaults to None"
123+
),
124+
)
125+
@click.option(
126+
"--rate-type",
127+
required=True,
128+
type=click.Choice(STRATEGY_PROFILE_CHOICES),
129+
help=(
130+
"The type of benchmark to run. "
131+
f"Supported types {', '.join(STRATEGY_PROFILE_CHOICES)}. "
132+
),
133+
)
134+
@click.option(
135+
"--rate",
136+
default=None,
137+
callback=parse_number_str,
138+
help=(
139+
"The rates to run the benchmark at. "
140+
"Can be a single number or a comma-separated list of numbers. "
141+
"For rate-type=sweep, this is the number of benchmarks it runs in the sweep. "
142+
"For rate-type=concurrent, this is the number of concurrent requests. "
143+
"For rate-type=async,constant,poisson, this is the rate requests per second. "
144+
"For rate-type=synchronous,throughput, this must not be set."
145+
),
146+
)
147+
@click.option(
148+
"--max-seconds",
149+
type=float,
150+
help=(
151+
"The maximum number of seconds each benchmark can run for. "
152+
"If None, will run until max_requests or the data is exhausted."
153+
),
154+
)
155+
@click.option(
156+
"--max-requests",
157+
type=int,
158+
help=(
159+
"The maximum number of requests each benchmark can run for. "
160+
"If None, will run until max_seconds or the data is exhausted."
161+
),
162+
)
163+
@click.option(
164+
"--warmup-percent",
165+
type=float,
166+
default=None,
167+
help=(
168+
"The percent of the benchmark (based on max-seconds, max-requets, "
169+
"or lenth of dataset) to run as a warmup and not include in the final results. "
170+
"Defaults to None."
171+
),
172+
)
173+
@click.option(
174+
"--cooldown-percent",
175+
type=float,
176+
help=(
177+
"The percent of the benchmark (based on max-seconds, max-requets, or lenth "
178+
"of dataset) to run as a cooldown and not include in the final results. "
179+
"Defaults to None."
180+
),
181+
)
182+
@click.option(
183+
"--disable-progress",
184+
is_flag=True,
185+
help="Set this flag to disable progress updates to the console",
186+
)
187+
@click.option(
188+
"--display-scheduler-stats",
189+
is_flag=True,
190+
help="Set this flag to display stats for the processes running the benchmarks",
191+
)
192+
@click.option(
193+
"--disable-console-outputs",
194+
is_flag=True,
195+
help="Set this flag to disable console output",
196+
)
197+
@click.option(
198+
"--output-path",
199+
type=click.Path(),
200+
default=Path.cwd() / "benchmarks.json",
201+
help=(
202+
"The path to save the output to. If it is a directory, "
203+
"it will save benchmarks.json under it. "
204+
"Otherwise, json, yaml, or csv files are supported for output types "
205+
"which will be read from the extension for the file path."
206+
),
207+
)
208+
@click.option(
209+
"--output-extras",
210+
callback=parse_json,
211+
help="A JSON string of extra data to save with the output benchmarks",
212+
)
213+
@click.option(
214+
"--random-seed",
215+
default=42,
216+
type=int,
217+
help="The random seed to use for benchmarking to ensure reproducibility.",
218+
)
219+
def benchmark(
220+
target,
221+
backend_type,
222+
backend_args,
223+
model,
224+
processor,
225+
processor_args,
226+
data,
227+
data_args,
228+
data_sampler,
229+
rate_type,
230+
rate,
231+
max_seconds,
232+
max_requests,
233+
warmup_percent,
234+
cooldown_percent,
235+
disable_progress,
236+
display_scheduler_stats,
237+
disable_console_outputs,
238+
output_path,
239+
output_extras,
240+
random_seed,
241+
):
242+
asyncio.run(
243+
benchmark_generative_text(
244+
target=target,
245+
backend_type=backend_type,
246+
backend_args=backend_args,
247+
model=model,
248+
processor=processor,
249+
processor_args=processor_args,
250+
data=data,
251+
data_args=data_args,
252+
data_sampler=data_sampler,
253+
rate_type=rate_type,
254+
rate=rate,
255+
max_seconds=max_seconds,
256+
max_requests=max_requests,
257+
warmup_percent=warmup_percent,
258+
cooldown_percent=cooldown_percent,
259+
show_progress=not disable_progress,
260+
show_progress_scheduler_stats=display_scheduler_stats,
261+
output_console=not disable_console_outputs,
262+
output_path=output_path,
263+
output_extras=output_extras,
264+
random_seed=random_seed,
265+
)
266+
)
267+
268+
269+
if __name__ == "__main__":
270+
cli()

src/guidellm/backend/backend.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,8 @@ async def validate(self):
115115
If not successful, raises the appropriate exception.
116116
"""
117117
logger.info("{} validating backend {}", self.__class__.__name__, self.type_)
118-
self.check_setup()
119-
models = self.available_models()
118+
await self.check_setup()
119+
models = await self.available_models()
120120
if not models:
121121
raise ValueError("No models available for the backend")
122122

@@ -126,7 +126,7 @@ async def validate(self):
126126
pass
127127

128128
@abstractmethod
129-
def check_setup(self):
129+
async def check_setup(self):
130130
"""
131131
Check the setup for the backend.
132132
If unsuccessful, raises the appropriate exception.
@@ -136,7 +136,17 @@ def check_setup(self):
136136
...
137137

138138
@abstractmethod
139-
def available_models(self) -> List[str]:
139+
async def prepare_multiprocessing(self):
140+
"""
141+
Prepare the backend for use in a multiprocessing environment.
142+
This is useful for backends that have instance state that can not
143+
be shared across processes and should be cleared out and re-initialized
144+
for each new process.
145+
"""
146+
...
147+
148+
@abstractmethod
149+
async def available_models(self) -> List[str]:
140150
"""
141151
Get the list of available models for the backend.
142152

0 commit comments

Comments
 (0)