From a65b5b11fa8950ca9070ea7b028fe34068c24335 Mon Sep 17 00:00:00 2001
From: Samuel Monson <smonson@redhat.com>
Date: Wed, 20 Aug 2025 11:32:12 -0400
Subject: [PATCH 01/11] Reapply Fix errors with metric accumulation (#266)

<!--
Include a short paragraph of the changes introduced in this PR.
If this PR requires additional context or rationale, explain why
the changes are necessary.
-->
Fixes a issue in metric calculation that caused incorrect statistics at
extreme changes in concurrency and an issue where the first decode token
was not counted in total tokens per second.

<!--
Provide a detailed list of all changes introduced in this pull request.
-->
- [x] Fixed issue where merged concurrency change events would
double-count concurrency
- [x] Ensure first decode token is counted when calculating total tokens
per second

<!--
List the steps needed to test this PR.
-->
- Run unit tests: `tox -e test-unit -- -m "regression and sanity"`

---

- [x] "I certify that all code in this PR is my own, except as noted
below."

- [x] Includes AI-assisted code completion
- [ ] Includes code generated by an AI application
- [x] Includes AI-generated tests (NOTE: AI written tests should have a
docstring that includes `## WRITTEN BY AI ##`)

---------

Signed-off-by: Samuel Monson <smonson@redhat.com>
---
 src/guidellm/utils/statistics.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/guidellm/utils/statistics.py b/src/guidellm/utils/statistics.py
index c820de9d..acd9d4f1 100644
--- a/src/guidellm/utils/statistics.py
+++ b/src/guidellm/utils/statistics.py
@@ -275,18 +275,9 @@ def from_request_times(
         """
         if distribution_type == "concurrency":
             # convert to delta changes based on when requests were running
-            time_deltas: dict[float, int] = defaultdict(int)
-            for start, end in requests:
-                time_deltas[start] += 1
-                time_deltas[end] -= 1
-
-            # convert to the events over time measuring concurrency changes
-            events = []
-            active = 0
-
-            for time, delta in sorted(time_deltas.items()):
-                active += delta
-                events.append((time, active))
+            events = [(start, 1) for start, _ in requests] + [
+                (end, -1) for _, end in requests
+            ]
         elif distribution_type == "rate":
             # convert to events for when requests finished
             global_start = min(start for start, _ in requests) if requests else 0
@@ -313,6 +304,16 @@ def from_request_times(
             else:
                 flattened_events.append((time, val))
 
+        if distribution_type == "concurrency":
+            # convert to the events over time measuring concurrency changes
+            events_over_time: list[tuple[float, float]] = []
+            active = 0
+            for time, delta in flattened_events:
+                active += delta  # type: ignore [assignment]
+                events_over_time.append((time, active))
+
+            flattened_events = events_over_time
+
         # convert to value distribution function
         distribution: dict[float, float] = defaultdict(float)
 

From e1fb966db608e80e20b0f7acece4f342b2b82632 Mon Sep 17 00:00:00 2001
From: Samuel Monson <smonson@redhat.com>
Date: Tue, 7 Oct 2025 14:27:19 -0400
Subject: [PATCH 02/11] Disable base class initialization

Signed-off-by: Samuel Monson <smonson@redhat.com>
---
 src/guidellm/utils/pydantic_utils.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/guidellm/utils/pydantic_utils.py b/src/guidellm/utils/pydantic_utils.py
index 27c2e1cf..d3e3f6e6 100644
--- a/src/guidellm/utils/pydantic_utils.py
+++ b/src/guidellm/utils/pydantic_utils.py
@@ -275,6 +275,17 @@ class DatabaseConfig(BaseConfig):
 
     schema_discriminator: ClassVar[str] = "model_type"
 
+    def __new__(cls, *args, **kwargs):  # noqa: ARG004
+        """
+        Prevent direct instantiation of base classes that use this mixin.
+
+        Only allows instantiation of concrete subclasses, not the base class.
+        """
+        base_type = cls.__pydantic_schema_base_type__()
+        if cls is base_type:
+            raise TypeError(f"only children of '{cls.__name__}' may be instantiated")
+        return super().__new__(cls)
+
     @classmethod
     def register_decorator(
         cls, clazz: RegisterClassT, name: str | list[str] | None = None

From a9aad63c46274e3534be62e26d134e26ae6b7c5a Mon Sep 17 00:00:00 2001
From: Samuel Monson <smonson@redhat.com>
Date: Mon, 6 Oct 2025 12:09:23 -0400
Subject: [PATCH 03/11] Test cleanup

Signed-off-by: Samuel Monson <smonson@redhat.com>
---
 tests/unit/objects/__init__.py                |  0
 tests/unit/objects/test_pydantic.py           | 43 -------------------
 .../{objects => utils}/test_statistics.py     |  2 +-
 3 files changed, 1 insertion(+), 44 deletions(-)
 delete mode 100644 tests/unit/objects/__init__.py
 delete mode 100644 tests/unit/objects/test_pydantic.py
 rename tests/unit/{objects => utils}/test_statistics.py (99%)

diff --git a/tests/unit/objects/__init__.py b/tests/unit/objects/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/unit/objects/test_pydantic.py b/tests/unit/objects/test_pydantic.py
deleted file mode 100644
index cb7f438f..00000000
--- a/tests/unit/objects/test_pydantic.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import pytest
-from pydantic import computed_field
-
-from guidellm.objects.pydantic import StandardBaseModel
-
-
-class ExampleModel(StandardBaseModel):
-    name: str
-    age: int
-
-    @computed_field  # type: ignore[misc]
-    @property
-    def computed(self) -> str:
-        return self.name + " " + str(self.age)
-
-
-@pytest.mark.smoke
-def test_standard_base_model_initialization():
-    example = ExampleModel(name="John Doe", age=30)
-    assert example.name == "John Doe"
-    assert example.age == 30
-    assert example.computed == "John Doe 30"
-
-
-@pytest.mark.smoke
-def test_standard_base_model_invalid_initialization():
-    with pytest.raises(ValueError):
-        ExampleModel(name="John Doe", age="thirty")  # type: ignore[arg-type]
-
-
-@pytest.mark.smoke
-def test_standard_base_model_marshalling():
-    example = ExampleModel(name="John Doe", age=30)
-    serialized = example.model_dump()
-    assert serialized["name"] == "John Doe"
-    assert serialized["age"] == 30
-    assert serialized["computed"] == "John Doe 30"
-
-    serialized["computed"] = "Jane Doe 40"
-    deserialized = ExampleModel.model_validate(serialized)
-    assert deserialized.name == "John Doe"
-    assert deserialized.age == 30
-    assert deserialized.computed == "John Doe 30"
diff --git a/tests/unit/objects/test_statistics.py b/tests/unit/utils/test_statistics.py
similarity index 99%
rename from tests/unit/objects/test_statistics.py
rename to tests/unit/utils/test_statistics.py
index ede77175..d0f04d99 100644
--- a/tests/unit/objects/test_statistics.py
+++ b/tests/unit/utils/test_statistics.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pytest
 
-from guidellm.objects import (
+from guidellm.utils.statistics import (
     DistributionSummary,
     Percentiles,
     RunningStats,

From 440b4e3d0af9c6a91a92992760517214c11f28a7 Mon Sep 17 00:00:00 2001
From: Samuel Monson <smonson@redhat.com>
Date: Mon, 6 Oct 2025 15:59:44 -0400
Subject: [PATCH 04/11] Fix backend tests

Signed-off-by: Samuel Monson <smonson@redhat.com>
---
 src/guidellm/scheduler/objects.py                       | 2 ++
 tests/unit/{backend => backends}/__init__.py            | 0
 tests/unit/{backend => backends}/test_backend.py        | 2 +-
 tests/unit/{backend => backends}/test_objects.py        | 1 -
 tests/unit/{backend => backends}/test_openai_backend.py | 4 ++--
 5 files changed, 5 insertions(+), 4 deletions(-)
 rename tests/unit/{backend => backends}/__init__.py (100%)
 rename tests/unit/{backend => backends}/test_backend.py (99%)
 rename tests/unit/{backend => backends}/test_objects.py (99%)
 rename tests/unit/{backend => backends}/test_openai_backend.py (99%)

diff --git a/src/guidellm/scheduler/objects.py b/src/guidellm/scheduler/objects.py
index fdca28b3..21d30ec8 100644
--- a/src/guidellm/scheduler/objects.py
+++ b/src/guidellm/scheduler/objects.py
@@ -20,6 +20,7 @@
     Protocol,
     TypeVar,
     Union,
+    runtime_checkable,
 )
 
 from pydantic import Field, computed_field
@@ -232,6 +233,7 @@ def model_copy(self, **kwargs) -> ScheduledRequestInfo:  # type: ignore[override
         )
 
 
+@runtime_checkable
 class BackendInterface(Protocol, Generic[RequestT, ResponseT]):
     """
     Abstract interface for request processing backends.
diff --git a/tests/unit/backend/__init__.py b/tests/unit/backends/__init__.py
similarity index 100%
rename from tests/unit/backend/__init__.py
rename to tests/unit/backends/__init__.py
diff --git a/tests/unit/backend/test_backend.py b/tests/unit/backends/test_backend.py
similarity index 99%
rename from tests/unit/backend/test_backend.py
rename to tests/unit/backends/test_backend.py
index 49b65077..ebd0da87 100644
--- a/tests/unit/backend/test_backend.py
+++ b/tests/unit/backends/test_backend.py
@@ -80,7 +80,7 @@ async def default_model(self) -> str | None:
     def test_class_signatures(self):
         """Test Backend inheritance and type relationships."""
         assert issubclass(Backend, RegistryMixin)
-        assert issubclass(Backend, BackendInterface)
+        assert isinstance(Backend, BackendInterface)
         assert hasattr(Backend, "create")
         assert hasattr(Backend, "register")
         assert hasattr(Backend, "get_registered_object")
diff --git a/tests/unit/backend/test_objects.py b/tests/unit/backends/test_objects.py
similarity index 99%
rename from tests/unit/backend/test_objects.py
rename to tests/unit/backends/test_objects.py
index 34a6350c..bf903733 100644
--- a/tests/unit/backend/test_objects.py
+++ b/tests/unit/backends/test_objects.py
@@ -397,7 +397,6 @@ def valid_instances(self, request):
     def test_class_signatures(self):
         """Test GenerationRequestTimings inheritance and type relationships."""
         assert issubclass(GenerationRequestTimings, MeasuredRequestTimings)
-        assert issubclass(GenerationRequestTimings, StandardBaseModel)
         assert hasattr(GenerationRequestTimings, "model_dump")
         assert hasattr(GenerationRequestTimings, "model_validate")
 
diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backends/test_openai_backend.py
similarity index 99%
rename from tests/unit/backend/test_openai_backend.py
rename to tests/unit/backends/test_openai_backend.py
index 7c7f528d..2180b501 100644
--- a/tests/unit/backend/test_openai_backend.py
+++ b/tests/unit/backends/test_openai_backend.py
@@ -237,7 +237,7 @@ async def test_info(self):
             target="http://test", model="test-model", timeout=30.0
         )
 
-        info = backend.info()
+        info = backend.info
 
         assert info["target"] == "http://test"
         assert info["model"] == "test-model"
@@ -1074,7 +1074,7 @@ def test_get_chat_message_media_item_jpeg_file(self):
         mock_image = Mock(spec=Image.Image)
         mock_image.tobytes.return_value = b"fake_jpeg_data"
 
-        with patch("guidellm.backend.openai.Image.open", return_value=mock_image):
+        with patch("guidellm.backends.openai.Image.open", return_value=mock_image):
             result = backend._get_chat_message_media_item(mock_jpeg_path)
 
         expected_data = base64.b64encode(b"fake_jpeg_data").decode("utf-8")

From 272304c316586d7ce25936935757fd3e1a762aaf Mon Sep 17 00:00:00 2001
From: Samuel Monson <smonson@redhat.com>
Date: Mon, 6 Oct 2025 16:22:30 -0400
Subject: [PATCH 05/11] Initial scheduler test fixes

Signed-off-by: Samuel Monson <smonson@redhat.com>
---
 tests/unit/scheduler/test_constraints.py | 4 ++--
 tests/unit/scheduler/test_environment.py | 2 +-
 tests/unit/scheduler/test_objects.py     | 7 +++----
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/tests/unit/scheduler/test_constraints.py b/tests/unit/scheduler/test_constraints.py
index 931af413..1e343a57 100644
--- a/tests/unit/scheduler/test_constraints.py
+++ b/tests/unit/scheduler/test_constraints.py
@@ -286,11 +286,11 @@ def test_create_constraint_raises(self, valid_instances):
     def test_call_raises(self, valid_instances):
         """Test that calling constraint raises RuntimeError."""
         instance, _ = valid_instances
-        state = SchedulerState(node_id="test_node", num_processes=1, start_time=0.0)
+        state = SchedulerState(node_id=0, num_processes=1, start_time=0.0)
         request = ScheduledRequestInfo(
             request_id="test_request",
             status="pending",
-            scheduler_node_id="test_node",
+            scheduler_node_id=0,
             scheduler_process_id=1,
             scheduler_start_time=0.0,
         )
diff --git a/tests/unit/scheduler/test_environment.py b/tests/unit/scheduler/test_environment.py
index c73abe42..ba0e2787 100644
--- a/tests/unit/scheduler/test_environment.py
+++ b/tests/unit/scheduler/test_environment.py
@@ -246,7 +246,7 @@ async def test_sync_run_start(self, valid_instances, mock_time, delay, expected)
 
         with (
             patch("time.time", return_value=mock_time),
-            patch("guidellm.scheduler.environment.settings") as mock_settings,
+            patch("guidellm.scheduler.environments.settings") as mock_settings,
         ):
             mock_settings.scheduler_start_delay_non_distributed = delay
             start_time = await instance.sync_run_start()
diff --git a/tests/unit/scheduler/test_objects.py b/tests/unit/scheduler/test_objects.py
index df794ff8..2fc63988 100644
--- a/tests/unit/scheduler/test_objects.py
+++ b/tests/unit/scheduler/test_objects.py
@@ -110,7 +110,7 @@ def test_generic_type_parameters(self):
 
         if hasattr(generic_base, "__args__"):
             type_params = generic_base.__args__
-            assert len(type_params) == 3, "Should have 3 type parameters"
+            assert len(type_params) == 2, "Should have 2 type parameters"
             param_names = [param.__name__ for param in type_params]
             expected_names = ["RequestT", "ResponseT"]
             assert param_names == expected_names
@@ -119,7 +119,7 @@ def test_generic_type_parameters(self):
     def test_implementation_construction(self):
         """Test that a complete concrete implementation can be instantiated."""
 
-        class ConcreteBackend(BackendInterface[str, MeasuredRequestTimings, str]):
+        class ConcreteBackend(BackendInterface[str, str]):
             @property
             def processes_limit(self) -> int | None:
                 return 4
@@ -162,7 +162,7 @@ async def resolve(
     async def test_implementation_async_methods(self):  # noqa: C901
         """Test that async methods work correctly in concrete implementation."""
 
-        class AsyncBackend(BackendInterface[dict, MeasuredRequestTimings, dict]):
+        class AsyncBackend(BackendInterface[dict, dict]):
             def __init__(self):
                 self.startup_called = False
                 self.validate_called = False
@@ -434,7 +434,6 @@ def valid_instances(self, request):
     @pytest.mark.smoke
     def test_class_signatures(self):
         """Test MeasuredRequestTimings inheritance and type relationships."""
-        assert issubclass(MeasuredRequestTimings, StandardBaseModel)
         assert hasattr(MeasuredRequestTimings, "model_dump")
         assert hasattr(MeasuredRequestTimings, "model_validate")
 

From 544c8887cec6fccb7a24dfde23ebe108e19d4e08 Mon Sep 17 00:00:00 2001
From: Samuel Monson <smonson@redhat.com>
Date: Tue, 7 Oct 2025 15:41:02 -0400
Subject: [PATCH 06/11] Fix MeasuredRequestTimings tests

Signed-off-by: Samuel Monson <smonson@redhat.com>
---
 tests/unit/scheduler/test_objects.py | 55 ++++++++++++++++------------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/tests/unit/scheduler/test_objects.py b/tests/unit/scheduler/test_objects.py
index 2fc63988..fc5610fd 100644
--- a/tests/unit/scheduler/test_objects.py
+++ b/tests/unit/scheduler/test_objects.py
@@ -3,7 +3,7 @@
 import inspect
 import typing
 from collections.abc import AsyncIterator
-from typing import Any, Optional, TypeVar, Union
+from typing import Any, Literal, Optional, TypeVar, Union
 
 import pytest
 from pydantic import ValidationError
@@ -25,6 +25,13 @@
 from guidellm.utils import StandardBaseModel
 
 
+@MeasuredRequestTimings.register("test_request_timings")
+class ConcreteMeasuredRequestTimings(MeasuredRequestTimings):
+    """Concrete test implementation of MeasuredRequestTimings for testing."""
+
+    timings_type: Literal["test_request_timings"] = "test_request_timings"
+
+
 def test_request_t():
     """Validate that RequestT is a TypeVar usable for generics and isn't bound."""
     assert isinstance(RequestT, TypeVar)
@@ -400,19 +407,23 @@ class TestRequestTimings:
 
     @pytest.fixture(
         params=[
-            {},
+            {"timings_type": "test_request_timings"},
             {
+                "timings_type": "test_request_timings",
                 "request_start": None,
                 "request_end": None,
             },
             {
+                "timings_type": "test_request_timings",
                 "request_start": 1000.0,
                 "request_end": 1100.0,
             },
             {
+                "timings_type": "test_request_timings",
                 "request_start": 1000.0,
             },
             {
+                "timings_type": "test_request_timings",
                 "request_start": 0.0,
                 "request_end": 0.0,
             },
@@ -428,7 +439,7 @@ class TestRequestTimings:
     def valid_instances(self, request):
         """Creates various valid configurations of MeasuredRequestTimings."""
         constructor_args = request.param
-        instance = MeasuredRequestTimings(**constructor_args)
+        instance = MeasuredRequestTimings.model_validate(constructor_args)
         return instance, constructor_args
 
     @pytest.mark.smoke
@@ -446,7 +457,13 @@ def test_class_signatures(self):
             assert field_info.default is None
 
     @pytest.mark.smoke
-    def test_initialization(self, valid_instances):
+    def test_initialization(self):
+        """Base class initialization should fail."""
+        with pytest.raises(TypeError):
+            MeasuredRequestTimings()
+
+    @pytest.mark.smoke
+    def test_validation(self, valid_instances):
         """Test initialization with valid configurations."""
         instance, constructor_args = valid_instances
         assert isinstance(instance, MeasuredRequestTimings)
@@ -467,9 +484,9 @@ def test_initialization(self, valid_instances):
     )
     def test_invalid_initialization(self, field, value):
         """Test invalid initialization scenarios."""
-        kwargs = {field: value}
+        kwargs = {"timings_type": "test_request_timings", field: value}
         with pytest.raises(ValidationError):
-            MeasuredRequestTimings(**kwargs)
+            MeasuredRequestTimings.model_validate(kwargs)
 
     @pytest.mark.smoke
     def test_marshalling(self, valid_instances):
@@ -533,6 +550,7 @@ class TestScheduledRequestInfo:
                     "finalized": 2150.0,
                 },
                 "request_timings": {
+                    "timings_type": "test_request_timings",
                     "request_start": 2060.0,
                     "request_end": 2110.0,
                 },
@@ -585,8 +603,8 @@ def valid_instances(self, request):
                 **constructor_args["scheduler_timings"]
             )
         if "request_timings" in constructor_args:
-            constructor_args["request_timings"] = MeasuredRequestTimings(
-                **constructor_args["request_timings"]
+            constructor_args["request_timings"] = MeasuredRequestTimings.model_validate(
+                constructor_args["request_timings"]
             )
 
         instance = ScheduledRequestInfo(**constructor_args)
@@ -596,7 +614,6 @@ def valid_instances(self, request):
     def test_class_signatures(self):
         """Test ScheduledRequestInfo inheritance and type relationships."""
         assert issubclass(ScheduledRequestInfo, StandardBaseModel)
-        assert issubclass(ScheduledRequestInfo, typing.Generic)
         assert hasattr(ScheduledRequestInfo, "model_dump")
         assert hasattr(ScheduledRequestInfo, "model_validate")
 
@@ -606,18 +623,6 @@ def test_class_signatures(self):
         assert isinstance(ScheduledRequestInfo.started_at, property)
         assert isinstance(ScheduledRequestInfo.completed_at, property)
 
-        # Check that it's properly generic
-        orig_bases = getattr(ScheduledRequestInfo, "__orig_bases__", ())
-        generic_base = next(
-            (
-                base
-                for base in orig_bases
-                if hasattr(base, "__origin__") and base.__origin__ is typing.Generic
-            ),
-            None,
-        )
-        assert generic_base is not None
-
         # Check required fields
         fields = ScheduledRequestInfo.model_fields
         for key in self.CHECK_KEYS:
@@ -719,7 +724,9 @@ def test_started_at_property(self):
             scheduler_process_id=0,
             scheduler_start_time=1000.0,
             scheduler_timings=RequestSchedulerTimings(resolve_start=2000.0),
-            request_timings=MeasuredRequestTimings(request_start=2100.0),
+            request_timings=MeasuredRequestTimings.model_validate(
+                {"timings_type": "test_request_timings", "request_start": 2100.0}
+            ),
         )
         assert instance.started_at == 2100.0
 
@@ -755,7 +762,9 @@ def test_completed_at_property(self):
             scheduler_process_id=0,
             scheduler_start_time=1000.0,
             scheduler_timings=RequestSchedulerTimings(resolve_end=2000.0),
-            request_timings=MeasuredRequestTimings(request_end=2100.0),
+            request_timings=MeasuredRequestTimings.model_validate(
+                {"timings_type": "test_request_timings", "request_end": 2100.0}
+            ),
         )
         assert instance.completed_at == 2100.0
 

From 5032e9e3bc5ae5caba70af8f09e0eb4a10b14915 Mon Sep 17 00:00:00 2001
From: Samuel Monson <smonson@redhat.com>
Date: Tue, 7 Oct 2025 17:14:34 -0400
Subject: [PATCH 07/11] Patch time.time in workgroup lifecycle test

Signed-off-by: Samuel Monson <smonson@redhat.com>
---
 tests/unit/scheduler/test_worker_group.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tests/unit/scheduler/test_worker_group.py b/tests/unit/scheduler/test_worker_group.py
index b72fb95b..ff87c0b9 100644
--- a/tests/unit/scheduler/test_worker_group.py
+++ b/tests/unit/scheduler/test_worker_group.py
@@ -9,6 +9,7 @@
 from multiprocessing.process import BaseProcess
 from multiprocessing.synchronize import Barrier, Event
 from typing import Any, Generic, Literal
+from unittest.mock import patch
 
 import pytest
 from pydantic import Field
@@ -48,6 +49,23 @@ class MockRequestTimings(MeasuredRequestTimings):
     timings_type: Literal["mock"] = Field(default="mock")
 
 
+class MockTime:
+    """Deterministic time mock for testing."""
+
+    def __init__(self, start_time: float = 1000.0):
+        self.current_time = start_time
+        self.increment = 0.1
+
+    def time(self) -> float:
+        """Return current mock time and increment for next call."""
+        current = self.current_time
+        self.current_time += self.increment
+        return current
+
+
+mock_time = MockTime()
+
+
 class MockBackend(BackendInterface):
     """Mock backend for testing worker group functionality."""
 
@@ -67,6 +85,7 @@ def processes_limit(self) -> int | None:
     def requests_limit(self) -> int | None:
         return self._requests_limit
 
+    @property
     def info(self) -> dict[str, Any]:
         return {"type": "mock"}
 
@@ -249,6 +268,7 @@ def test_invalid_initialization_missing(self):
     @pytest.mark.smoke
     @async_timeout(10)
     @pytest.mark.asyncio
+    @patch.object(time, "time", mock_time.time)
     async def test_lifecycle(self, valid_instances: tuple[WorkerProcessGroup, dict]):  # noqa: C901, PLR0912
         """Test the lifecycle methods of WorkerProcessGroup."""
         instance, constructor_args = valid_instances

From 4971e561f892c2fd744472c51543529eb0b20dfc Mon Sep 17 00:00:00 2001
From: Samuel Monson <smonson@redhat.com>
Date: Tue, 7 Oct 2025 18:14:29 -0400
Subject: [PATCH 08/11] Tear down worker process group in instance fixture

Signed-off-by: Samuel Monson <smonson@redhat.com>
---
 tests/unit/scheduler/test_worker_group.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tests/unit/scheduler/test_worker_group.py b/tests/unit/scheduler/test_worker_group.py
index ff87c0b9..80bb6c23 100644
--- a/tests/unit/scheduler/test_worker_group.py
+++ b/tests/unit/scheduler/test_worker_group.py
@@ -163,7 +163,19 @@ def valid_instances(self, request):
         """Fixture providing test data for WorkerProcessGroup."""
         constructor_args = request.param.copy()
         instance = WorkerProcessGroup(**request.param, backend=MockBackend())
-        return instance, constructor_args
+        yield instance, constructor_args
+
+        # Shutting down. Attempting shut down.
+        try:
+            if hasattr(instance, "processes") and instance.processes is not None:
+                asyncio.run(instance.shutdown())
+        # It's not...it's-it's not...it's not shutting down...it's not...
+        except Exception:  # noqa: BLE001
+            if hasattr(instance, "processes") and instance.processes is not None:
+                # Gahhh...!
+                for proc in instance.processes:
+                    proc.kill()
+                    proc.join(timeout=1.0)
 
     @pytest.mark.smoke
     def test_class_signatures(self, valid_instances):

From 567689595f5cafd1b30d93f1d8e698aab5d36c2c Mon Sep 17 00:00:00 2001
From: Samuel Monson <smonson@redhat.com>
Date: Wed, 8 Oct 2025 16:02:35 -0400
Subject: [PATCH 09/11] Match main tests to current CLI

Signed-off-by: Samuel Monson <smonson@redhat.com>
---
 tests/unit/test_main.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py
index e813dba4..c8fa71c2 100644
--- a/tests/unit/test_main.py
+++ b/tests/unit/test_main.py
@@ -19,10 +19,10 @@ def test_benchmark_run_with_backend_args():
             "--backend-args",
             '{"headers": {"Authorization": "Bearer my-token"}, "verify": false}',
             "--target",
-            "http://localhost:8000",
+            "http://localhost:9",
             "--data",
             "prompt_tokens=1,output_tokens=1",
-            "--rate-type",
+            "--profile",
             "constant",
             "--rate",
             "1",
@@ -36,7 +36,7 @@ def test_benchmark_run_with_backend_args():
     assert "Invalid header format" not in result.output
 
 
-@patch("guidellm.__main__.benchmark_with_scenario")
+@patch("guidellm.__main__.benchmark_generative_text")
 def test_cli_backend_args_header_removal(mock_benchmark_func, tmp_path: Path):
     """
     Tests that --backend-args from the CLI correctly overrides scenario
@@ -47,11 +47,11 @@ def test_cli_backend_args_header_removal(mock_benchmark_func, tmp_path: Path):
     # Create a scenario file with a header that should be overridden and removed
     scenario_content = {
         "backend_type": "openai_http",
-        "backend_args": {"headers": {"Authorization": "should-be-removed"}},
+        "backend_kwargs": {"headers": {"Authorization": "should-be-removed"}},
         "data": "prompt_tokens=10,output_tokens=10",
         "max_requests": 1,
         "target": "http://dummy-target",
-        "rate_type": "synchronous",
+        "profile": "synchronous",
         "processor": "gpt2",
     }
     with scenario_path.open("w") as f:
@@ -65,7 +65,7 @@ def test_cli_backend_args_header_removal(mock_benchmark_func, tmp_path: Path):
             "run",
             "--scenario",
             str(scenario_path),
-            "--backend-args",
+            "--backend-kwargs",
             '{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}',
         ],
         catch_exceptions=False,
@@ -79,6 +79,6 @@ def test_cli_backend_args_header_removal(mock_benchmark_func, tmp_path: Path):
     scenario = call_args["scenario"]
 
     # Verify the backend_args were merged correctly
-    backend_args = scenario.backend_args
+    backend_args = scenario.backend_kwargs
     expected_headers = {"Authorization": None, "Custom-Header": "Custom-Value"}
     assert backend_args["headers"] == expected_headers

From 5f36174595d0a3ed0a581005f453b57a8dc772fb Mon Sep 17 00:00:00 2001
From: Samuel Monson <smonson@redhat.com>
Date: Wed, 8 Oct 2025 16:04:30 -0400
Subject: [PATCH 10/11] Various small fixes to utils tests

Signed-off-by: Samuel Monson <smonson@redhat.com>
---
 tests/unit/utils/test_auto_importer.py |  3 ++-
 tests/unit/utils/test_registry.py      |  4 +++-
 tests/unit/utils/test_text.py          | 28 ++------------------------
 3 files changed, 7 insertions(+), 28 deletions(-)

diff --git a/tests/unit/utils/test_auto_importer.py b/tests/unit/utils/test_auto_importer.py
index cc71bce3..5f930ba2 100644
--- a/tests/unit/utils/test_auto_importer.py
+++ b/tests/unit/utils/test_auto_importer.py
@@ -4,6 +4,7 @@
 
 from __future__ import annotations
 
+import sys
 from unittest import mock
 
 import pytest
@@ -191,9 +192,9 @@ class TestClass(AutoImporterMixin):
             mock_import.assert_any_call("test.package.subpackage")
 
     @pytest.mark.sanity
-    @mock.patch("sys.modules", {"test.package.existing": mock.MagicMock()})
     @mock.patch("importlib.import_module")
     @mock.patch("pkgutil.walk_packages")
+    @mock.patch.dict(sys.modules, {"test.package.existing": mock.MagicMock()})
     def test_skip_already_imported_modules(self, mock_walk, mock_import):
         """Test that modules already in sys.modules are tracked but not re-imported."""
 
diff --git a/tests/unit/utils/test_registry.py b/tests/unit/utils/test_registry.py
index eed126d3..47253b72 100644
--- a/tests/unit/utils/test_registry.py
+++ b/tests/unit/utils/test_registry.py
@@ -579,7 +579,9 @@ def validate_value(value: int) -> bool:
         if hasattr(inspect, "get_annotations"):
             # Python 3.10+
             try:
-                annotations = inspect.get_annotations(registered_class.__init__)
+                annotations = inspect.get_annotations(
+                    registered_class.__init__, eval_str=True
+                )
                 assert "value" in annotations
                 assert annotations["value"] is int
                 return_ann = annotations.get("return")
diff --git a/tests/unit/utils/test_text.py b/tests/unit/utils/test_text.py
index 3774ca1f..154291d6 100644
--- a/tests/unit/utils/test_text.py
+++ b/tests/unit/utils/test_text.py
@@ -42,7 +42,7 @@ class TestFormatValueDisplay:
             "expected",
         ),
         [
-            (42.0, "test", "", None, None, None, "42 [info]test[/info]"),
+            (42.0, "test", "", None, None, 0, "42 [info]test[/info]"),
             (42.5, "test", "ms", None, None, 1, "42.5ms [info]test[/info]"),
             (42.123, "test", "", None, 5, 2, " 42.12 [info]test[/info]"),
             (
@@ -78,34 +78,10 @@ def test_invocation(
         assert label in result
         assert units in result
         value_check = (
-            str(int(value))
-            if decimal_places == 0
-            else (
-                f"{value:.{decimal_places}f}"
-                if decimal_places is not None
-                else str(value)
-            )
+            str(int(value)) if decimal_places == 0 else f"{value:.{decimal_places}f}"
         )
         assert value_check in result or str(value) in result
 
-    @pytest.mark.sanity
-    @pytest.mark.parametrize(
-        ("value", "label"),
-        [
-            (None, "test"),
-            (42.0, None),
-            ("not_number", "test"),
-        ],
-    )
-    def test_invocation_with_none_values(self, value, label):
-        """Test format_value_display with None/invalid inputs still works."""
-        result = format_value_display(value, label)
-        assert isinstance(result, str)
-        if label is not None:
-            assert str(label) in result
-        if value is not None:
-            assert str(value) in result
-
 
 class TestSplitTextListByLength:
     """Test suite for split_text_list_by_length."""

From 155623631824f56b8bab6364516ffb6235ac3d03 Mon Sep 17 00:00:00 2001
From: Samuel Monson <smonson@redhat.com>
Date: Wed, 8 Oct 2025 17:20:35 -0400
Subject: [PATCH 11/11] Fix typing import for python3.10

Signed-off-by: Samuel Monson <smonson@redhat.com>
---
 src/guidellm/benchmark/types.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/guidellm/benchmark/types.py b/src/guidellm/benchmark/types.py
index 04ad4061..1ef65a68 100644
--- a/src/guidellm/benchmark/types.py
+++ b/src/guidellm/benchmark/types.py
@@ -2,12 +2,13 @@
 
 from collections.abc import Iterable
 from pathlib import Path
-from typing import Any, TypeAliasType
+from typing import Any
 
 from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
 from transformers import (  # type: ignore[import]
     PreTrainedTokenizerBase,
 )
+from typing_extensions import TypeAliasType
 
 from guidellm.benchmark.aggregator import (
     Aggregator,