From a65b5b11fa8950ca9070ea7b028fe34068c24335 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Wed, 20 Aug 2025 11:32:12 -0400 Subject: [PATCH 01/11] Reapply Fix errors with metric accumulation (#266) Fixes a issue in metric calculation that caused incorrect statistics at extreme changes in concurrency and an issue where the first decode token was not counted in total tokens per second. - [x] Fixed issue where merged concurrency change events would double-count concurrency - [x] Ensure first decode token is counted when calculating total tokens per second - Run unit tests: `tox -e test-unit -- -m "regression and sanity"` --- - [x] "I certify that all code in this PR is my own, except as noted below." - [x] Includes AI-assisted code completion - [ ] Includes code generated by an AI application - [x] Includes AI-generated tests (NOTE: AI written tests should have a docstring that includes `## WRITTEN BY AI ##`) --------- Signed-off-by: Samuel Monson --- src/guidellm/utils/statistics.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/guidellm/utils/statistics.py b/src/guidellm/utils/statistics.py index c820de9d..acd9d4f1 100644 --- a/src/guidellm/utils/statistics.py +++ b/src/guidellm/utils/statistics.py @@ -275,18 +275,9 @@ def from_request_times( """ if distribution_type == "concurrency": # convert to delta changes based on when requests were running - time_deltas: dict[float, int] = defaultdict(int) - for start, end in requests: - time_deltas[start] += 1 - time_deltas[end] -= 1 - - # convert to the events over time measuring concurrency changes - events = [] - active = 0 - - for time, delta in sorted(time_deltas.items()): - active += delta - events.append((time, active)) + events = [(start, 1) for start, _ in requests] + [ + (end, -1) for _, end in requests + ] elif distribution_type == "rate": # convert to events for when requests finished global_start = min(start for start, _ in requests) if requests else 0 @@ -313,6 +304,16 @@ def from_request_times( else: flattened_events.append((time, val)) + if distribution_type == "concurrency": + # convert to the events over time measuring concurrency changes + events_over_time: list[tuple[float, float]] = [] + active = 0 + for time, delta in flattened_events: + active += delta # type: ignore [assignment] + events_over_time.append((time, active)) + + flattened_events = events_over_time + # convert to value distribution function distribution: dict[float, float] = defaultdict(float) From e1fb966db608e80e20b0f7acece4f342b2b82632 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Tue, 7 Oct 2025 14:27:19 -0400 Subject: [PATCH 02/11] Disable base class initialization Signed-off-by: Samuel Monson --- src/guidellm/utils/pydantic_utils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/guidellm/utils/pydantic_utils.py b/src/guidellm/utils/pydantic_utils.py index 27c2e1cf..d3e3f6e6 100644 --- a/src/guidellm/utils/pydantic_utils.py +++ b/src/guidellm/utils/pydantic_utils.py @@ -275,6 +275,17 @@ class DatabaseConfig(BaseConfig): schema_discriminator: ClassVar[str] = "model_type" + def __new__(cls, *args, **kwargs): # noqa: ARG004 + """ + Prevent direct instantiation of base classes that use this mixin. + + Only allows instantiation of concrete subclasses, not the base class. + """ + base_type = cls.__pydantic_schema_base_type__() + if cls is base_type: + raise TypeError(f"only children of '{cls.__name__}' may be instantiated") + return super().__new__(cls) + @classmethod def register_decorator( cls, clazz: RegisterClassT, name: str | list[str] | None = None From a9aad63c46274e3534be62e26d134e26ae6b7c5a Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Mon, 6 Oct 2025 12:09:23 -0400 Subject: [PATCH 03/11] Test cleanup Signed-off-by: Samuel Monson --- tests/unit/objects/__init__.py | 0 tests/unit/objects/test_pydantic.py | 43 ------------------- .../{objects => utils}/test_statistics.py | 2 +- 3 files changed, 1 insertion(+), 44 deletions(-) delete mode 100644 tests/unit/objects/__init__.py delete mode 100644 tests/unit/objects/test_pydantic.py rename tests/unit/{objects => utils}/test_statistics.py (99%) diff --git a/tests/unit/objects/__init__.py b/tests/unit/objects/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unit/objects/test_pydantic.py b/tests/unit/objects/test_pydantic.py deleted file mode 100644 index cb7f438f..00000000 --- a/tests/unit/objects/test_pydantic.py +++ /dev/null @@ -1,43 +0,0 @@ -import pytest -from pydantic import computed_field - -from guidellm.objects.pydantic import StandardBaseModel - - -class ExampleModel(StandardBaseModel): - name: str - age: int - - @computed_field # type: ignore[misc] - @property - def computed(self) -> str: - return self.name + " " + str(self.age) - - -@pytest.mark.smoke -def test_standard_base_model_initialization(): - example = ExampleModel(name="John Doe", age=30) - assert example.name == "John Doe" - assert example.age == 30 - assert example.computed == "John Doe 30" - - -@pytest.mark.smoke -def test_standard_base_model_invalid_initialization(): - with pytest.raises(ValueError): - ExampleModel(name="John Doe", age="thirty") # type: ignore[arg-type] - - -@pytest.mark.smoke -def test_standard_base_model_marshalling(): - example = ExampleModel(name="John Doe", age=30) - serialized = example.model_dump() - assert serialized["name"] == "John Doe" - assert serialized["age"] == 30 - assert serialized["computed"] == "John Doe 30" - - serialized["computed"] = "Jane Doe 40" - deserialized = ExampleModel.model_validate(serialized) - assert deserialized.name == "John Doe" - assert deserialized.age == 30 - assert deserialized.computed == "John Doe 30" diff --git a/tests/unit/objects/test_statistics.py b/tests/unit/utils/test_statistics.py similarity index 99% rename from tests/unit/objects/test_statistics.py rename to tests/unit/utils/test_statistics.py index ede77175..d0f04d99 100644 --- a/tests/unit/objects/test_statistics.py +++ b/tests/unit/utils/test_statistics.py @@ -5,7 +5,7 @@ import numpy as np import pytest -from guidellm.objects import ( +from guidellm.utils.statistics import ( DistributionSummary, Percentiles, RunningStats, From 440b4e3d0af9c6a91a92992760517214c11f28a7 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Mon, 6 Oct 2025 15:59:44 -0400 Subject: [PATCH 04/11] Fix backend tests Signed-off-by: Samuel Monson --- src/guidellm/scheduler/objects.py | 2 ++ tests/unit/{backend => backends}/__init__.py | 0 tests/unit/{backend => backends}/test_backend.py | 2 +- tests/unit/{backend => backends}/test_objects.py | 1 - tests/unit/{backend => backends}/test_openai_backend.py | 4 ++-- 5 files changed, 5 insertions(+), 4 deletions(-) rename tests/unit/{backend => backends}/__init__.py (100%) rename tests/unit/{backend => backends}/test_backend.py (99%) rename tests/unit/{backend => backends}/test_objects.py (99%) rename tests/unit/{backend => backends}/test_openai_backend.py (99%) diff --git a/src/guidellm/scheduler/objects.py b/src/guidellm/scheduler/objects.py index fdca28b3..21d30ec8 100644 --- a/src/guidellm/scheduler/objects.py +++ b/src/guidellm/scheduler/objects.py @@ -20,6 +20,7 @@ Protocol, TypeVar, Union, + runtime_checkable, ) from pydantic import Field, computed_field @@ -232,6 +233,7 @@ def model_copy(self, **kwargs) -> ScheduledRequestInfo: # type: ignore[override ) +@runtime_checkable class BackendInterface(Protocol, Generic[RequestT, ResponseT]): """ Abstract interface for request processing backends. diff --git a/tests/unit/backend/__init__.py b/tests/unit/backends/__init__.py similarity index 100% rename from tests/unit/backend/__init__.py rename to tests/unit/backends/__init__.py diff --git a/tests/unit/backend/test_backend.py b/tests/unit/backends/test_backend.py similarity index 99% rename from tests/unit/backend/test_backend.py rename to tests/unit/backends/test_backend.py index 49b65077..ebd0da87 100644 --- a/tests/unit/backend/test_backend.py +++ b/tests/unit/backends/test_backend.py @@ -80,7 +80,7 @@ async def default_model(self) -> str | None: def test_class_signatures(self): """Test Backend inheritance and type relationships.""" assert issubclass(Backend, RegistryMixin) - assert issubclass(Backend, BackendInterface) + assert isinstance(Backend, BackendInterface) assert hasattr(Backend, "create") assert hasattr(Backend, "register") assert hasattr(Backend, "get_registered_object") diff --git a/tests/unit/backend/test_objects.py b/tests/unit/backends/test_objects.py similarity index 99% rename from tests/unit/backend/test_objects.py rename to tests/unit/backends/test_objects.py index 34a6350c..bf903733 100644 --- a/tests/unit/backend/test_objects.py +++ b/tests/unit/backends/test_objects.py @@ -397,7 +397,6 @@ def valid_instances(self, request): def test_class_signatures(self): """Test GenerationRequestTimings inheritance and type relationships.""" assert issubclass(GenerationRequestTimings, MeasuredRequestTimings) - assert issubclass(GenerationRequestTimings, StandardBaseModel) assert hasattr(GenerationRequestTimings, "model_dump") assert hasattr(GenerationRequestTimings, "model_validate") diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backends/test_openai_backend.py similarity index 99% rename from tests/unit/backend/test_openai_backend.py rename to tests/unit/backends/test_openai_backend.py index 7c7f528d..2180b501 100644 --- a/tests/unit/backend/test_openai_backend.py +++ b/tests/unit/backends/test_openai_backend.py @@ -237,7 +237,7 @@ async def test_info(self): target="http://test", model="test-model", timeout=30.0 ) - info = backend.info() + info = backend.info assert info["target"] == "http://test" assert info["model"] == "test-model" @@ -1074,7 +1074,7 @@ def test_get_chat_message_media_item_jpeg_file(self): mock_image = Mock(spec=Image.Image) mock_image.tobytes.return_value = b"fake_jpeg_data" - with patch("guidellm.backend.openai.Image.open", return_value=mock_image): + with patch("guidellm.backends.openai.Image.open", return_value=mock_image): result = backend._get_chat_message_media_item(mock_jpeg_path) expected_data = base64.b64encode(b"fake_jpeg_data").decode("utf-8") From 272304c316586d7ce25936935757fd3e1a762aaf Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Mon, 6 Oct 2025 16:22:30 -0400 Subject: [PATCH 05/11] Initial scheduler test fixes Signed-off-by: Samuel Monson --- tests/unit/scheduler/test_constraints.py | 4 ++-- tests/unit/scheduler/test_environment.py | 2 +- tests/unit/scheduler/test_objects.py | 7 +++---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/unit/scheduler/test_constraints.py b/tests/unit/scheduler/test_constraints.py index 931af413..1e343a57 100644 --- a/tests/unit/scheduler/test_constraints.py +++ b/tests/unit/scheduler/test_constraints.py @@ -286,11 +286,11 @@ def test_create_constraint_raises(self, valid_instances): def test_call_raises(self, valid_instances): """Test that calling constraint raises RuntimeError.""" instance, _ = valid_instances - state = SchedulerState(node_id="test_node", num_processes=1, start_time=0.0) + state = SchedulerState(node_id=0, num_processes=1, start_time=0.0) request = ScheduledRequestInfo( request_id="test_request", status="pending", - scheduler_node_id="test_node", + scheduler_node_id=0, scheduler_process_id=1, scheduler_start_time=0.0, ) diff --git a/tests/unit/scheduler/test_environment.py b/tests/unit/scheduler/test_environment.py index c73abe42..ba0e2787 100644 --- a/tests/unit/scheduler/test_environment.py +++ b/tests/unit/scheduler/test_environment.py @@ -246,7 +246,7 @@ async def test_sync_run_start(self, valid_instances, mock_time, delay, expected) with ( patch("time.time", return_value=mock_time), - patch("guidellm.scheduler.environment.settings") as mock_settings, + patch("guidellm.scheduler.environments.settings") as mock_settings, ): mock_settings.scheduler_start_delay_non_distributed = delay start_time = await instance.sync_run_start() diff --git a/tests/unit/scheduler/test_objects.py b/tests/unit/scheduler/test_objects.py index df794ff8..2fc63988 100644 --- a/tests/unit/scheduler/test_objects.py +++ b/tests/unit/scheduler/test_objects.py @@ -110,7 +110,7 @@ def test_generic_type_parameters(self): if hasattr(generic_base, "__args__"): type_params = generic_base.__args__ - assert len(type_params) == 3, "Should have 3 type parameters" + assert len(type_params) == 2, "Should have 2 type parameters" param_names = [param.__name__ for param in type_params] expected_names = ["RequestT", "ResponseT"] assert param_names == expected_names @@ -119,7 +119,7 @@ def test_generic_type_parameters(self): def test_implementation_construction(self): """Test that a complete concrete implementation can be instantiated.""" - class ConcreteBackend(BackendInterface[str, MeasuredRequestTimings, str]): + class ConcreteBackend(BackendInterface[str, str]): @property def processes_limit(self) -> int | None: return 4 @@ -162,7 +162,7 @@ async def resolve( async def test_implementation_async_methods(self): # noqa: C901 """Test that async methods work correctly in concrete implementation.""" - class AsyncBackend(BackendInterface[dict, MeasuredRequestTimings, dict]): + class AsyncBackend(BackendInterface[dict, dict]): def __init__(self): self.startup_called = False self.validate_called = False @@ -434,7 +434,6 @@ def valid_instances(self, request): @pytest.mark.smoke def test_class_signatures(self): """Test MeasuredRequestTimings inheritance and type relationships.""" - assert issubclass(MeasuredRequestTimings, StandardBaseModel) assert hasattr(MeasuredRequestTimings, "model_dump") assert hasattr(MeasuredRequestTimings, "model_validate") From 544c8887cec6fccb7a24dfde23ebe108e19d4e08 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Tue, 7 Oct 2025 15:41:02 -0400 Subject: [PATCH 06/11] Fix MeasuredRequestTimings tests Signed-off-by: Samuel Monson --- tests/unit/scheduler/test_objects.py | 55 ++++++++++++++++------------ 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/tests/unit/scheduler/test_objects.py b/tests/unit/scheduler/test_objects.py index 2fc63988..fc5610fd 100644 --- a/tests/unit/scheduler/test_objects.py +++ b/tests/unit/scheduler/test_objects.py @@ -3,7 +3,7 @@ import inspect import typing from collections.abc import AsyncIterator -from typing import Any, Optional, TypeVar, Union +from typing import Any, Literal, Optional, TypeVar, Union import pytest from pydantic import ValidationError @@ -25,6 +25,13 @@ from guidellm.utils import StandardBaseModel +@MeasuredRequestTimings.register("test_request_timings") +class ConcreteMeasuredRequestTimings(MeasuredRequestTimings): + """Concrete test implementation of MeasuredRequestTimings for testing.""" + + timings_type: Literal["test_request_timings"] = "test_request_timings" + + def test_request_t(): """Validate that RequestT is a TypeVar usable for generics and isn't bound.""" assert isinstance(RequestT, TypeVar) @@ -400,19 +407,23 @@ class TestRequestTimings: @pytest.fixture( params=[ - {}, + {"timings_type": "test_request_timings"}, { + "timings_type": "test_request_timings", "request_start": None, "request_end": None, }, { + "timings_type": "test_request_timings", "request_start": 1000.0, "request_end": 1100.0, }, { + "timings_type": "test_request_timings", "request_start": 1000.0, }, { + "timings_type": "test_request_timings", "request_start": 0.0, "request_end": 0.0, }, @@ -428,7 +439,7 @@ class TestRequestTimings: def valid_instances(self, request): """Creates various valid configurations of MeasuredRequestTimings.""" constructor_args = request.param - instance = MeasuredRequestTimings(**constructor_args) + instance = MeasuredRequestTimings.model_validate(constructor_args) return instance, constructor_args @pytest.mark.smoke @@ -446,7 +457,13 @@ def test_class_signatures(self): assert field_info.default is None @pytest.mark.smoke - def test_initialization(self, valid_instances): + def test_initialization(self): + """Base class initialization should fail.""" + with pytest.raises(TypeError): + MeasuredRequestTimings() + + @pytest.mark.smoke + def test_validation(self, valid_instances): """Test initialization with valid configurations.""" instance, constructor_args = valid_instances assert isinstance(instance, MeasuredRequestTimings) @@ -467,9 +484,9 @@ def test_initialization(self, valid_instances): ) def test_invalid_initialization(self, field, value): """Test invalid initialization scenarios.""" - kwargs = {field: value} + kwargs = {"timings_type": "test_request_timings", field: value} with pytest.raises(ValidationError): - MeasuredRequestTimings(**kwargs) + MeasuredRequestTimings.model_validate(kwargs) @pytest.mark.smoke def test_marshalling(self, valid_instances): @@ -533,6 +550,7 @@ class TestScheduledRequestInfo: "finalized": 2150.0, }, "request_timings": { + "timings_type": "test_request_timings", "request_start": 2060.0, "request_end": 2110.0, }, @@ -585,8 +603,8 @@ def valid_instances(self, request): **constructor_args["scheduler_timings"] ) if "request_timings" in constructor_args: - constructor_args["request_timings"] = MeasuredRequestTimings( - **constructor_args["request_timings"] + constructor_args["request_timings"] = MeasuredRequestTimings.model_validate( + constructor_args["request_timings"] ) instance = ScheduledRequestInfo(**constructor_args) @@ -596,7 +614,6 @@ def valid_instances(self, request): def test_class_signatures(self): """Test ScheduledRequestInfo inheritance and type relationships.""" assert issubclass(ScheduledRequestInfo, StandardBaseModel) - assert issubclass(ScheduledRequestInfo, typing.Generic) assert hasattr(ScheduledRequestInfo, "model_dump") assert hasattr(ScheduledRequestInfo, "model_validate") @@ -606,18 +623,6 @@ def test_class_signatures(self): assert isinstance(ScheduledRequestInfo.started_at, property) assert isinstance(ScheduledRequestInfo.completed_at, property) - # Check that it's properly generic - orig_bases = getattr(ScheduledRequestInfo, "__orig_bases__", ()) - generic_base = next( - ( - base - for base in orig_bases - if hasattr(base, "__origin__") and base.__origin__ is typing.Generic - ), - None, - ) - assert generic_base is not None - # Check required fields fields = ScheduledRequestInfo.model_fields for key in self.CHECK_KEYS: @@ -719,7 +724,9 @@ def test_started_at_property(self): scheduler_process_id=0, scheduler_start_time=1000.0, scheduler_timings=RequestSchedulerTimings(resolve_start=2000.0), - request_timings=MeasuredRequestTimings(request_start=2100.0), + request_timings=MeasuredRequestTimings.model_validate( + {"timings_type": "test_request_timings", "request_start": 2100.0} + ), ) assert instance.started_at == 2100.0 @@ -755,7 +762,9 @@ def test_completed_at_property(self): scheduler_process_id=0, scheduler_start_time=1000.0, scheduler_timings=RequestSchedulerTimings(resolve_end=2000.0), - request_timings=MeasuredRequestTimings(request_end=2100.0), + request_timings=MeasuredRequestTimings.model_validate( + {"timings_type": "test_request_timings", "request_end": 2100.0} + ), ) assert instance.completed_at == 2100.0 From 5032e9e3bc5ae5caba70af8f09e0eb4a10b14915 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Tue, 7 Oct 2025 17:14:34 -0400 Subject: [PATCH 07/11] Patch time.time in workgroup lifecycle test Signed-off-by: Samuel Monson --- tests/unit/scheduler/test_worker_group.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/unit/scheduler/test_worker_group.py b/tests/unit/scheduler/test_worker_group.py index b72fb95b..ff87c0b9 100644 --- a/tests/unit/scheduler/test_worker_group.py +++ b/tests/unit/scheduler/test_worker_group.py @@ -9,6 +9,7 @@ from multiprocessing.process import BaseProcess from multiprocessing.synchronize import Barrier, Event from typing import Any, Generic, Literal +from unittest.mock import patch import pytest from pydantic import Field @@ -48,6 +49,23 @@ class MockRequestTimings(MeasuredRequestTimings): timings_type: Literal["mock"] = Field(default="mock") +class MockTime: + """Deterministic time mock for testing.""" + + def __init__(self, start_time: float = 1000.0): + self.current_time = start_time + self.increment = 0.1 + + def time(self) -> float: + """Return current mock time and increment for next call.""" + current = self.current_time + self.current_time += self.increment + return current + + +mock_time = MockTime() + + class MockBackend(BackendInterface): """Mock backend for testing worker group functionality.""" @@ -67,6 +85,7 @@ def processes_limit(self) -> int | None: def requests_limit(self) -> int | None: return self._requests_limit + @property def info(self) -> dict[str, Any]: return {"type": "mock"} @@ -249,6 +268,7 @@ def test_invalid_initialization_missing(self): @pytest.mark.smoke @async_timeout(10) @pytest.mark.asyncio + @patch.object(time, "time", mock_time.time) async def test_lifecycle(self, valid_instances: tuple[WorkerProcessGroup, dict]): # noqa: C901, PLR0912 """Test the lifecycle methods of WorkerProcessGroup.""" instance, constructor_args = valid_instances From 4971e561f892c2fd744472c51543529eb0b20dfc Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Tue, 7 Oct 2025 18:14:29 -0400 Subject: [PATCH 08/11] Tear down worker process group in instance fixture Signed-off-by: Samuel Monson --- tests/unit/scheduler/test_worker_group.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/unit/scheduler/test_worker_group.py b/tests/unit/scheduler/test_worker_group.py index ff87c0b9..80bb6c23 100644 --- a/tests/unit/scheduler/test_worker_group.py +++ b/tests/unit/scheduler/test_worker_group.py @@ -163,7 +163,19 @@ def valid_instances(self, request): """Fixture providing test data for WorkerProcessGroup.""" constructor_args = request.param.copy() instance = WorkerProcessGroup(**request.param, backend=MockBackend()) - return instance, constructor_args + yield instance, constructor_args + + # Shutting down. Attempting shut down. + try: + if hasattr(instance, "processes") and instance.processes is not None: + asyncio.run(instance.shutdown()) + # It's not...it's-it's not...it's not shutting down...it's not... + except Exception: # noqa: BLE001 + if hasattr(instance, "processes") and instance.processes is not None: + # Gahhh...! + for proc in instance.processes: + proc.kill() + proc.join(timeout=1.0) @pytest.mark.smoke def test_class_signatures(self, valid_instances): From 567689595f5cafd1b30d93f1d8e698aab5d36c2c Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Wed, 8 Oct 2025 16:02:35 -0400 Subject: [PATCH 09/11] Match main tests to current CLI Signed-off-by: Samuel Monson --- tests/unit/test_main.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py index e813dba4..c8fa71c2 100644 --- a/tests/unit/test_main.py +++ b/tests/unit/test_main.py @@ -19,10 +19,10 @@ def test_benchmark_run_with_backend_args(): "--backend-args", '{"headers": {"Authorization": "Bearer my-token"}, "verify": false}', "--target", - "http://localhost:8000", + "http://localhost:9", "--data", "prompt_tokens=1,output_tokens=1", - "--rate-type", + "--profile", "constant", "--rate", "1", @@ -36,7 +36,7 @@ def test_benchmark_run_with_backend_args(): assert "Invalid header format" not in result.output -@patch("guidellm.__main__.benchmark_with_scenario") +@patch("guidellm.__main__.benchmark_generative_text") def test_cli_backend_args_header_removal(mock_benchmark_func, tmp_path: Path): """ Tests that --backend-args from the CLI correctly overrides scenario @@ -47,11 +47,11 @@ def test_cli_backend_args_header_removal(mock_benchmark_func, tmp_path: Path): # Create a scenario file with a header that should be overridden and removed scenario_content = { "backend_type": "openai_http", - "backend_args": {"headers": {"Authorization": "should-be-removed"}}, + "backend_kwargs": {"headers": {"Authorization": "should-be-removed"}}, "data": "prompt_tokens=10,output_tokens=10", "max_requests": 1, "target": "http://dummy-target", - "rate_type": "synchronous", + "profile": "synchronous", "processor": "gpt2", } with scenario_path.open("w") as f: @@ -65,7 +65,7 @@ def test_cli_backend_args_header_removal(mock_benchmark_func, tmp_path: Path): "run", "--scenario", str(scenario_path), - "--backend-args", + "--backend-kwargs", '{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}', ], catch_exceptions=False, @@ -79,6 +79,6 @@ def test_cli_backend_args_header_removal(mock_benchmark_func, tmp_path: Path): scenario = call_args["scenario"] # Verify the backend_args were merged correctly - backend_args = scenario.backend_args + backend_args = scenario.backend_kwargs expected_headers = {"Authorization": None, "Custom-Header": "Custom-Value"} assert backend_args["headers"] == expected_headers From 5f36174595d0a3ed0a581005f453b57a8dc772fb Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Wed, 8 Oct 2025 16:04:30 -0400 Subject: [PATCH 10/11] Various small fixes to utils tests Signed-off-by: Samuel Monson --- tests/unit/utils/test_auto_importer.py | 3 ++- tests/unit/utils/test_registry.py | 4 +++- tests/unit/utils/test_text.py | 28 ++------------------------ 3 files changed, 7 insertions(+), 28 deletions(-) diff --git a/tests/unit/utils/test_auto_importer.py b/tests/unit/utils/test_auto_importer.py index cc71bce3..5f930ba2 100644 --- a/tests/unit/utils/test_auto_importer.py +++ b/tests/unit/utils/test_auto_importer.py @@ -4,6 +4,7 @@ from __future__ import annotations +import sys from unittest import mock import pytest @@ -191,9 +192,9 @@ class TestClass(AutoImporterMixin): mock_import.assert_any_call("test.package.subpackage") @pytest.mark.sanity - @mock.patch("sys.modules", {"test.package.existing": mock.MagicMock()}) @mock.patch("importlib.import_module") @mock.patch("pkgutil.walk_packages") + @mock.patch.dict(sys.modules, {"test.package.existing": mock.MagicMock()}) def test_skip_already_imported_modules(self, mock_walk, mock_import): """Test that modules already in sys.modules are tracked but not re-imported.""" diff --git a/tests/unit/utils/test_registry.py b/tests/unit/utils/test_registry.py index eed126d3..47253b72 100644 --- a/tests/unit/utils/test_registry.py +++ b/tests/unit/utils/test_registry.py @@ -579,7 +579,9 @@ def validate_value(value: int) -> bool: if hasattr(inspect, "get_annotations"): # Python 3.10+ try: - annotations = inspect.get_annotations(registered_class.__init__) + annotations = inspect.get_annotations( + registered_class.__init__, eval_str=True + ) assert "value" in annotations assert annotations["value"] is int return_ann = annotations.get("return") diff --git a/tests/unit/utils/test_text.py b/tests/unit/utils/test_text.py index 3774ca1f..154291d6 100644 --- a/tests/unit/utils/test_text.py +++ b/tests/unit/utils/test_text.py @@ -42,7 +42,7 @@ class TestFormatValueDisplay: "expected", ), [ - (42.0, "test", "", None, None, None, "42 [info]test[/info]"), + (42.0, "test", "", None, None, 0, "42 [info]test[/info]"), (42.5, "test", "ms", None, None, 1, "42.5ms [info]test[/info]"), (42.123, "test", "", None, 5, 2, " 42.12 [info]test[/info]"), ( @@ -78,34 +78,10 @@ def test_invocation( assert label in result assert units in result value_check = ( - str(int(value)) - if decimal_places == 0 - else ( - f"{value:.{decimal_places}f}" - if decimal_places is not None - else str(value) - ) + str(int(value)) if decimal_places == 0 else f"{value:.{decimal_places}f}" ) assert value_check in result or str(value) in result - @pytest.mark.sanity - @pytest.mark.parametrize( - ("value", "label"), - [ - (None, "test"), - (42.0, None), - ("not_number", "test"), - ], - ) - def test_invocation_with_none_values(self, value, label): - """Test format_value_display with None/invalid inputs still works.""" - result = format_value_display(value, label) - assert isinstance(result, str) - if label is not None: - assert str(label) in result - if value is not None: - assert str(value) in result - class TestSplitTextListByLength: """Test suite for split_text_list_by_length.""" From 155623631824f56b8bab6364516ffb6235ac3d03 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Wed, 8 Oct 2025 17:20:35 -0400 Subject: [PATCH 11/11] Fix typing import for python3.10 Signed-off-by: Samuel Monson --- src/guidellm/benchmark/types.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/guidellm/benchmark/types.py b/src/guidellm/benchmark/types.py index 04ad4061..1ef65a68 100644 --- a/src/guidellm/benchmark/types.py +++ b/src/guidellm/benchmark/types.py @@ -2,12 +2,13 @@ from collections.abc import Iterable from pathlib import Path -from typing import Any, TypeAliasType +from typing import Any from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict from transformers import ( # type: ignore[import] PreTrainedTokenizerBase, ) +from typing_extensions import TypeAliasType from guidellm.benchmark.aggregator import ( Aggregator,