Fixes for benchmarker aggregator and fixes from cursor for openai.py, aggregator.py, and benchmarker.py

markurtz · markurtz · commit 3c6819f06fc8 · 2025-08-19T11:07:34.000-04:00
diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py
@@ -139,6 +139,7 @@ def __init__(
         self._in_process = False
         self._async_client: Optional[httpx.AsyncClient] = None
 
+    @property
     def info(self) -> dict[str, Any]:
         """
         :return: Dictionary containing backend configuration details.
diff --git a/src/guidellm/benchmark/aggregator.py b/src/guidellm/benchmark/aggregator.py
diff --git a/src/guidellm/benchmark/benchmarker.py b/src/guidellm/benchmark/benchmarker.py
@@ -26,7 +26,7 @@
 )
 
 from guidellm.benchmark.aggregator import Aggregator, CompilableAggregator
-from guidellm.benchmark.objects import BenchmarkT
+from guidellm.benchmark.objects import BenchmarkerDict, BenchmarkT, SchedulerDict
 from guidellm.benchmark.profile import Profile
 from guidellm.scheduler import (
     BackendInterface,
@@ -41,6 +41,7 @@
     SchedulingStrategy,
 )
 from guidellm.utils import InfoMixin, ThreadSafeSingletonMixin
+from guidellm.utils.pydantic_utils import StandardBaseDict
 
 __all__ = ["Benchmarker"]
 
@@ -114,9 +115,7 @@ async def run(
                     request,
                     request_info,
                     scheduler_state,
-                ) in Scheduler[
-                    BackendInterface, RequestT, MeasuredRequestTimingsT, ResponseT
-                ]().run(
+                ) in Scheduler[RequestT, MeasuredRequestTimingsT, ResponseT]().run(
                     requests=requests,
                     backend=backend,
                     strategy=strategy,
@@ -200,43 +199,65 @@ def _compile_benchmark_kwargs(
         benchmark_kwargs = {
             "run_id": run_id,
             "run_index": run_index,
-            "scheduler": {
-                "strategy": strategy,
-                "constraints": {
-                    key: InfoMixin.extract_from_obj(val) for key, val in constraints
+            "scheduler": SchedulerDict(
+                strategy=strategy,
+                constraints={
+                    key: InfoMixin.extract_from_obj(val)
+                    for key, val in constraints.items()
                 },
-                "state": scheduler_state,
-            },
-            "benchmarker": {
-                "profile": profile,
-                "requests": InfoMixin.extract_from_obj(requests),
-                "backend": InfoMixin.extract_from_obj(backend),
-                "environment": InfoMixin.extract_from_obj(environment),
-                "aggregators": {
+                state=scheduler_state,
+            ),
+            "benchmarker": BenchmarkerDict(
+                profile=profile,
+                requests=InfoMixin.extract_from_obj(requests),
+                backend=backend.info,
+                environment=environment.info,
+                aggregators={
                     key: InfoMixin.extract_from_obj(aggregator)
                     for key, aggregator in aggregators.items()
                 },
-            },
-            "system": {},
-            "extras": {},
+            ),
+            "env_args": StandardBaseDict(),
+            "extras": StandardBaseDict(),
         }
+
+        def _combine(
+            existing: dict[str, Any] | StandardBaseDict,
+            addition: dict[str, Any] | StandardBaseDict,
+        ) -> dict[str, Any] | StandardBaseDict:
+            if not isinstance(existing, (dict, StandardBaseDict)):
+                raise ValueError(
+                    f"Existing value {existing} (type: {type(existing).__name__}) "
+                    f"is not a valid type for merging."
+                )
+            if not isinstance(addition, (dict, StandardBaseDict)):
+                raise ValueError(
+                    f"Addition value {addition} (type: {type(addition).__name__}) "
+                    f"is not a valid type for merging."
+                )
+
+            add_kwargs = (
+                addition if isinstance(addition, dict) else addition.model_dump()
+            )
+
+            if isinstance(existing, dict):
+                return {**add_kwargs, **existing}
+
+            return existing.__class__(**{**add_kwargs, **existing.model_dump()})
+
         for key, aggregator in aggregators.items():
             if not isinstance(aggregator, CompilableAggregator):
                 continue
 
             compiled = aggregator.compile(aggregators_state[key], scheduler_state)
 
-            if key not in benchmark_kwargs:
-                benchmark_kwargs[key] = compiled
-                continue
-
-            existing_val = benchmark_kwargs[key]
-            if not (isinstance(existing_val, dict) and isinstance(compiled, dict)):
-                raise ValueError(
-                    f"Key '{key}' already exists with value {existing_val} "
-                    f"(type: {type(existing_val).__name__}) and cannot be "
-                    f"overwritten with {compiled} (type: {type(compiled).__name__})"
-                )
-            existing_val.update(compiled)
+            for field_name, field_val in compiled.items():
+                if field_name in benchmark_kwargs:
+                    # If the key already exists, merge the values
+                    benchmark_kwargs[field_name] = _combine(
+                        benchmark_kwargs[field_name], field_val
+                    )
+                else:
+                    benchmark_kwargs[field_name] = field_val
 
         return benchmark_kwargs
diff --git a/src/guidellm/benchmark/objects.py b/src/guidellm/benchmark/objects.py
@@ -240,10 +240,10 @@ def total_tokens(self) -> int | None:
 
         :return: Sum of prompt and output tokens, or None if either is unavailable.
         """
-        if self.prompt_tokens is None or self.output_tokens is None:
+        if self.prompt_tokens is None and self.output_tokens is None:
             return None
 
-        return self.prompt_tokens + self.output_tokens
+        return (self.prompt_tokens or 0) + (self.output_tokens or 0)
 
     @computed_field  # type: ignore[misc]
     @property
diff --git a/src/guidellm/scheduler/objects.py b/src/guidellm/scheduler/objects.py
@@ -224,6 +224,7 @@ def requests_limit(self) -> int | None:
         :return: The maximum concurrent requests supported, or None if unlimited
         """
 
+    @property
     @abstractmethod
     def info(self) -> dict[str, Any]:
         """
diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py
@@ -2,7 +2,16 @@
 from .colors import Colors
 from .default_group import DefaultGroupHandler
 from .encoding import MsgpackEncoding
-from .general import UNSET, UnsetType
+from .general import (
+    UNSET,
+    UnsetType,
+    all_defined,
+    safe_add,
+    safe_divide,
+    safe_getattr,
+    safe_multiply,
+    safe_subtract,
+)
 from .hf_datasets import (
     SUPPORTED_TYPES,
     save_dataset_to_file,
@@ -64,12 +73,18 @@
     "ThreadSafeSingletonMixin",
     "TimeRunningStats",
     "UnsetType",
+    "all_defined",
     "check_load_processor",
     "clean_text",
     "filter_text",
     "format_value_display",
     "is_puncutation",
     "load_text",
+    "safe_add",
+    "safe_divide",
+    "safe_getattr",
+    "safe_multiply",
+    "safe_subtract",
     "save_dataset_to_file",
     "split_text",
     "split_text_list_by_length",
diff --git a/src/guidellm/utils/general.py b/src/guidellm/utils/general.py
@@ -1,6 +1,17 @@
-from typing import Final
+from __future__ import annotations
 
-__all__ = ["UNSET", "UnsetType"]
+from typing import Any, Final
+
+__all__ = [
+    "UNSET",
+    "UnsetType",
+    "all_defined",
+    "safe_add",
+    "safe_divide",
+    "safe_getattr",
+    "safe_multiply",
+    "safe_subtract",
+]
 
 
 class UnsetType:
@@ -11,3 +22,61 @@ def __repr__(self) -> str:
 
 
 UNSET: Final = UnsetType()
+
+
+def safe_getattr(obj: Any | None, attr: str, default: Any = None) -> Any:
+    """
+    Safely get an attribute from an object or return a default value.
+
+    :param obj: The object to get the attribute from.
+    :param attr: The name of the attribute to get.
+    :param default: The default value to return if the attribute is not found.
+    :return: The value of the attribute or the default value.
+    """
+    if obj is None:
+        return default
+
+    return getattr(obj, attr, default)
+
+
+def all_defined(*values: Any | None) -> bool:
+    """
+    Check if all values are defined (not None).
+
+    :param values: The values to check.
+    :return: True if all values are defined, False otherwise.
+    """
+    return all(value is not None for value in values)
+
+
+def safe_divide(
+    numerator: float | None,
+    denominator: float | None,
+    num_default: float = 0.0,
+    den_default: float = 1.0,
+) -> float:
+    numerator = numerator if numerator is not None else num_default
+    denominator = denominator if denominator is not None else den_default
+
+    return numerator / (denominator or 1e-10)
+
+
+def safe_multiply(*values: int | float | None, default: float = 1.0) -> float:
+    result = default
+    for val in values:
+        result *= val if val is not None else 1.0
+    return result
+
+
+def safe_add(*values: int | float | None, default: float = 0.0) -> float:
+    result = default
+    for val in values:
+        result += val if val is not None else 0.0
+    return result
+
+
+def safe_subtract(*values: int | float | None, default: float = 0.0) -> float:
+    result = default
+    for val in values:
+        result -= val if val is not None else 0.0
+    return result
diff --git a/tests/unit/benchmark/test_aggregator.py b/tests/unit/benchmark/test_aggregator.py