Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
1e21612
Adds aiohttp backend
anmarques Dec 6, 2024
ce0c3c5
quality fixes
anmarques Sep 5, 2024
8cb9876
Quality fixes
anmarques Sep 6, 2024
039900a
Rework for OpenAI backend to use native http requests with httpx and …
markurtz Mar 6, 2025
2b82bd5
Finalize implementation, fix bugs, and ensure unit tests are passing …
markurtz Mar 8, 2025
a01db7d
Initial state for Scheduling system rework. Still needs testing and f…
markurtz Mar 10, 2025
85dee09
Enable temp testing script to work, refactor strategy to be more gene…
markurtz Mar 10, 2025
fb3fdd7
Polishing for scheduler
markurtz Mar 15, 2025
12835e0
styling fixes
markurtz Mar 15, 2025
65869d7
initial groundwork for new benchmark package and classes along with r…
markurtz Mar 17, 2025
53e0943
finalize benchmark model objects
markurtz Mar 17, 2025
faacb63
Latest state with datasets and benchmarker created
markurtz Mar 22, 2025
34f851c
latest working state with data loaders and benchmarking API
markurtz Mar 28, 2025
cfdc2ed
Working clis and entrypoints
markurtz Apr 2, 2025
d4f8c1a
Fixes for bugs surfaced from testing and enhancements to features bas…
markurtz Apr 9, 2025
0a6230b
Update src/guidellm/scheduler/scheduler.py
markurtz Apr 9, 2025
62cd7e9
Fix synthetic data generation edge case where text is much larger tha…
markurtz Apr 9, 2025
94efbd4
Update src/guidellm/dataset/synthetic.py
markurtz Apr 10, 2025
a3e86d8
Update src/guidellm/benchmark/benchmark.py
markurtz Apr 10, 2025
c7476ab
Update src/guidellm/benchmark/benchmark.py
markurtz Apr 10, 2025
3d8cd62
Update src/guidellm/benchmark/benchmark.py
markurtz Apr 10, 2025
dbc4789
Update src/guidellm/benchmark/benchmark.py
markurtz Apr 10, 2025
649a86d
Updates for pydantic export with polymorphism and general cleanup
markurtz Apr 10, 2025
4183512
Fixes for json / yaml output
markurtz Apr 10, 2025
f8161ed
Ensure style and types pass, remove tests that are no longer relevant…
markurtz Apr 11, 2025
3b821c8
Move metrics to subobject in output
sjmonson Apr 11, 2025
5e63061
Move requests to subobject in output
sjmonson Apr 11, 2025
82a381f
Replace Request breakdown with generic class
sjmonson Apr 11, 2025
31adea4
Define sampling sizes and counts as StatusBreakdowns
sjmonson Apr 11, 2025
f8c5e7a
Set a default type for SuccessfulT
sjmonson Apr 11, 2025
93f0fd1
Fix case chnage on requests_per_second
sjmonson Apr 11, 2025
00a210d
Plumb output changes though progress and summary
sjmonson Apr 11, 2025
cf160b6
Pluralization is hard
sjmonson Apr 11, 2025
2bedc6d
Fix changes after rebase
sjmonson Apr 11, 2025
331978b
Fix/ignore linting errors due to line length changes
sjmonson Apr 11, 2025
c449bde
Fix double quotes inside a double qoute f-string
sjmonson Apr 11, 2025
4cd904d
importlib.resources.files requires valid module
sjmonson Apr 11, 2025
48098fc
Fix for restructuring of output and general simplification based on f…
markurtz Apr 11, 2025
0263361
Fix quality, unit, integration, and e2e tests
markurtz Apr 11, 2025
b164b4b
Just kidding, let's try that again and hopefully fix quality and tests
markurtz Apr 11, 2025
b411db3
Trying one more time for quality
markurtz Apr 11, 2025
d0d31d3
Bump min python to 3.9
sjmonson Apr 11, 2025
70aa669
Revert "Bump min python to 3.9"
sjmonson Apr 11, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dependencies = [
"loguru",
"numpy",
"pillow",
"protobuf",
"pydantic>=2.0.0",
"pydantic-settings>=2.0.0",
"pyyaml>=6.0.0",
Expand Down Expand Up @@ -77,7 +78,7 @@ dev = [


[project.entry-points.console_scripts]
guidellm = "guidellm.main:generate_benchmark_report_cli"
guidellm = "guidellm.__main__:cli"
guidellm-config = "guidellm.config:print_config"


Expand Down
16 changes: 12 additions & 4 deletions src/guidellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,22 @@
# flake8: noqa

import os
import transformers # type: ignore
import logging
import contextlib

os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers
transformers.logging.set_verbosity_error() # Silence warnings for transformers

with open(os.devnull, "w") as devnull, contextlib.redirect_stderr(
devnull
), contextlib.redirect_stdout(devnull):
from transformers.utils import logging as hf_logging

# Set the log level for the transformers library to ERROR
# to ignore None of PyTorch, TensorFlow found
os.environ["TOKENIZERS_PARALLELISM"] = "false" # Silence warnings for tokenizers
hf_logging.set_verbosity_error()
logging.getLogger("transformers").setLevel(logging.ERROR)

from .config import settings
from .logger import configure_logger, logger
from .main import generate_benchmark_report

__all__ = ["configure_logger", "logger", "settings", "generate_benchmark_report"]
270 changes: 270 additions & 0 deletions src/guidellm/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
import asyncio
import json
from pathlib import Path
from typing import get_args

import click

from guidellm.backend import BackendType
from guidellm.benchmark import ProfileType, benchmark_generative_text
from guidellm.scheduler import StrategyType

STRATEGY_PROFILE_CHOICES = set(
list(get_args(ProfileType)) + list(get_args(StrategyType))
)


def parse_json(ctx, param, value):
if value is None:
return None
try:
return json.loads(value)
except json.JSONDecodeError as err:
raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err


def parse_number_str(ctx, param, value):
if value is None:
return None

values = value.split(",") if "," in value else [value]

try:
return [int(val) if val.isdigit() else float(val) for val in values]
except ValueError as err:
raise click.BadParameter(
f"{param.name} must be a number or comma-separated list of numbers."
) from err


@click.group()
def cli():
pass


@cli.command()
@click.option(
"--target",
required=True,
type=str,
help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
)
@click.option(
"--backend-type",
type=click.Choice(list(get_args(BackendType))),
help=(
"The type of backend to use to run requests against. Defaults to 'openai_http'."
f" Supported types: {', '.join(get_args(BackendType))}"
),
default="openai_http",
)
@click.option(
"--backend-args",
callback=parse_json,
default=None,
help=(
"A JSON string containing any arguments to pass to the backend as a "
"dict with **kwargs."
),
)
@click.option(
"--model",
default=None,
type=str,
help=(
"The ID of the model to benchmark within the backend. "
"If None provided (default), then it will use the first model available."
),
)
@click.option(
"--processor",
default=None,
type=str,
help=(
"The processor or tokenizer to use to calculate token counts for statistics "
"and synthetic data generation. If None provided (default), will load "
"using the model arg, if needed."
),
)
@click.option(
"--processor-args",
default=None,
callback=parse_json,
help=(
"A JSON string containing any arguments to pass to the processor constructor "
"as a dict with **kwargs."
),
)
@click.option(
"--data",
required=True,
type=str,
help=(
"The HuggingFace dataset ID, a path to a HuggingFace dataset, "
"a path to a data file csv, json, jsonl, or txt, "
"or a synthetic data config as a json or key=value string."
),
)
@click.option(
"--data-args",
callback=parse_json,
help=(
"A JSON string containing any arguments to pass to the dataset creation "
"as a dict with **kwargs."
),
)
@click.option(
"--data-sampler",
default=None,
type=click.Choice(["random"]),
help=(
"The data sampler type to use. 'random' will add a random shuffle on the data. "
"Defaults to None"
),
)
@click.option(
"--rate-type",
required=True,
type=click.Choice(STRATEGY_PROFILE_CHOICES),
help=(
"The type of benchmark to run. "
f"Supported types {', '.join(STRATEGY_PROFILE_CHOICES)}. "
),
)
@click.option(
"--rate",
default=None,
callback=parse_number_str,
help=(
"The rates to run the benchmark at. "
"Can be a single number or a comma-separated list of numbers. "
"For rate-type=sweep, this is the number of benchmarks it runs in the sweep. "
"For rate-type=concurrent, this is the number of concurrent requests. "
"For rate-type=async,constant,poisson, this is the rate requests per second. "
"For rate-type=synchronous,throughput, this must not be set."
),
)
@click.option(
"--max-seconds",
type=float,
help=(
"The maximum number of seconds each benchmark can run for. "
"If None, will run until max_requests or the data is exhausted."
),
)
@click.option(
"--max-requests",
type=int,
help=(
"The maximum number of requests each benchmark can run for. "
"If None, will run until max_seconds or the data is exhausted."
),
)
@click.option(
"--warmup-percent",
type=float,
default=None,
help=(
"The percent of the benchmark (based on max-seconds, max-requets, "
"or lenth of dataset) to run as a warmup and not include in the final results. "
"Defaults to None."
),
)
@click.option(
"--cooldown-percent",
type=float,
help=(
"The percent of the benchmark (based on max-seconds, max-requets, or lenth "
"of dataset) to run as a cooldown and not include in the final results. "
"Defaults to None."
),
)
@click.option(
"--disable-progress",
is_flag=True,
help="Set this flag to disable progress updates to the console",
)
@click.option(
"--display-scheduler-stats",
is_flag=True,
help="Set this flag to display stats for the processes running the benchmarks",
)
@click.option(
"--disable-console-outputs",
is_flag=True,
help="Set this flag to disable console output",
)
@click.option(
"--output-path",
type=click.Path(),
default=Path.cwd() / "benchmarks.json",
help=(
"The path to save the output to. If it is a directory, "
"it will save benchmarks.json under it. "
"Otherwise, json, yaml, or csv files are supported for output types "
"which will be read from the extension for the file path."
),
)
@click.option(
"--output-extras",
callback=parse_json,
help="A JSON string of extra data to save with the output benchmarks",
)
@click.option(
"--random-seed",
default=42,
type=int,
help="The random seed to use for benchmarking to ensure reproducibility.",
)
def benchmark(
target,
backend_type,
backend_args,
model,
processor,
processor_args,
data,
data_args,
data_sampler,
rate_type,
rate,
max_seconds,
max_requests,
warmup_percent,
cooldown_percent,
disable_progress,
display_scheduler_stats,
disable_console_outputs,
output_path,
output_extras,
random_seed,
):
asyncio.run(
benchmark_generative_text(
target=target,
backend_type=backend_type,
backend_args=backend_args,
model=model,
processor=processor,
processor_args=processor_args,
data=data,
data_args=data_args,
data_sampler=data_sampler,
rate_type=rate_type,
rate=rate,
max_seconds=max_seconds,
max_requests=max_requests,
warmup_percent=warmup_percent,
cooldown_percent=cooldown_percent,
show_progress=not disable_progress,
show_progress_scheduler_stats=display_scheduler_stats,
output_console=not disable_console_outputs,
output_path=output_path,
output_extras=output_extras,
random_seed=random_seed,
)
)


if __name__ == "__main__":
cli()
40 changes: 27 additions & 13 deletions src/guidellm/backend/backend.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import asyncio
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any, AsyncGenerator, Dict, List, Literal, Optional, Type, Union
Expand Down Expand Up @@ -102,27 +101,32 @@ def model(self) -> Optional[str]:
"""
...

def validate(self):
@property
@abstractmethod
def info(self) -> Dict[str, Any]:
"""
:return: The information about the backend.
"""
...

async def validate(self):
"""
Handle final setup and validate the backend is ready for use.
If not successful, raises the appropriate exception.
"""
logger.info("{} validating backend {}", self.__class__.__name__, self.type_)
self.check_setup()
models = self.available_models()
await self.check_setup()
models = await self.available_models()
if not models:
raise ValueError("No models available for the backend")

async def _test_request():
async for _ in self.text_completions(
prompt="Test connection", output_token_count=1
): # type: ignore[attr-defined]
pass

asyncio.run(_test_request())
async for _ in self.text_completions(
prompt="Test connection", output_token_count=1
): # type: ignore[attr-defined]
pass

@abstractmethod
def check_setup(self):
async def check_setup(self):
"""
Check the setup for the backend.
If unsuccessful, raises the appropriate exception.
Expand All @@ -132,7 +136,17 @@ def check_setup(self):
...

@abstractmethod
def available_models(self) -> List[str]:
async def prepare_multiprocessing(self):
"""
Prepare the backend for use in a multiprocessing environment.
This is useful for backends that have instance state that can not
be shared across processes and should be cleared out and re-initialized
for each new process.
"""
...

@abstractmethod
async def available_models(self) -> List[str]:
"""
Get the list of available models for the backend.

Expand Down
Loading
Loading