Share type alises between entrypoints and scenario

jaredoconnell · jaredoconnell · commit bb74a55ef7e4 · 2025-09-29T17:27:35.000-04:00
diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py
@@ -1,23 +1,15 @@
 from __future__ import annotations
 
-from collections.abc import Iterable
 from pathlib import Path
 from typing import Any, Literal
 
-from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
-from transformers import (  # type: ignore[import]
-    PreTrainedTokenizerBase,
-)
-
 from guidellm.backends import (
     Backend,
     BackendType,
     GenerationRequest,
     GenerationResponse,
 )
 from guidellm.benchmark.aggregator import (
-    Aggregator,
-    CompilableAggregator,
     GenerativeRequestsAggregator,
     GenerativeStatsProgressAggregator,
     SchedulerStatsAggregator,
@@ -29,11 +21,10 @@
     GenerativeBenchmarkerOutput,
 )
 from guidellm.benchmark.profile import Profile, ProfileType
-from guidellm.benchmark.progress import (
-    BenchmarkerProgress,
-    BenchmarkerProgressGroup,
-)
+from guidellm.benchmark.progress import BenchmarkerProgressGroup
 from guidellm.benchmark.scenario import enable_scenarios
+from guidellm.benchmark.type import OutputFormatType, DataInputType, ProcessorInputType, ProgressInputType, \
+    AggregatorInputType
 from guidellm.request import GenerativeRequestLoader
 from guidellm.scheduler import (
     ConstraintInitializer,
@@ -51,27 +42,6 @@
 _CURRENT_WORKING_DIR = Path.cwd()
 
 
-# Data types
-
-DataType = (
-    Iterable[str]
-    | Iterable[dict[str, Any]]
-    | Dataset
-    | DatasetDict
-    | IterableDataset
-    | IterableDatasetDict
-    | str
-    | Path
-)
-
-OutputFormatType = (
-    tuple[str, ...]
-    | list[str]
-    | dict[str, str | dict[str, Any] | GenerativeBenchmarkerOutput]
-    | None
-)
-
-
 # Helper functions
 
 async def initialize_backend(
@@ -147,7 +117,7 @@ async def finalize_outputs(
 @enable_scenarios
 async def benchmark_generative_text(  # noqa: C901
     target: str,
-    data: DataType,
+    data: DataInputType,
     profile: StrategyType | ProfileType | Profile,
     rate: list[float] | None = None,
     random_seed: int = 42,
@@ -156,20 +126,18 @@ async def benchmark_generative_text(  # noqa: C901
     backend_kwargs: dict[str, Any] | None = None,
     model: str | None = None,
     # Data configuration
-    processor: str | Path | PreTrainedTokenizerBase | None = None,
+    processor: ProcessorInputType | None = None,
     processor_args: dict[str, Any] | None = None,
     data_args: dict[str, Any] | None = None,
     data_sampler: Literal["random"] | None = None,
     # Output configuration
     output_path: str | Path | None = _CURRENT_WORKING_DIR,
     output_formats: OutputFormatType = ("console", "json", "html", "csv"),
     # Updates configuration
-    progress: tuple[str, ...] | list[str] | list[BenchmarkerProgress] | None = None,
+    progress: ProgressInputType | None = None,
     print_updates: bool = False,
     # Aggregators configuration
-    add_aggregators: (
-        dict[str, str | dict[str, Any] | Aggregator | CompilableAggregator] | None
-    ) = None,
+    add_aggregators: AggregatorInputType | None = None,
     warmup: float | None = None,
     cooldown: float | None = None,
     request_samples: int | None = 20,
diff --git a/src/guidellm/benchmark/scenario.py b/src/guidellm/benchmark/scenario.py
@@ -1,26 +1,21 @@
 from __future__ import annotations
 
 import json
-from collections.abc import Iterable
 from functools import cache, wraps
 from inspect import Parameter, signature
 from pathlib import Path
 from typing import Annotated, Any, Callable, Literal, TypeVar
 
 import yaml
-from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
 from loguru import logger
 from pydantic import BeforeValidator, Field, PositiveFloat, PositiveInt, SkipValidation
 from transformers.tokenization_utils_base import (  # type: ignore[import]
     PreTrainedTokenizerBase,
 )
 
 from guidellm.backends import Backend, BackendType
-from guidellm.benchmark.aggregator import (
-    Aggregator,
-    CompilableAggregator,
-)
 from guidellm.benchmark.profile import Profile, ProfileType
+from guidellm.benchmark.type import DataInputType, ProcessorInputType, AggregatorInputType
 from guidellm.scheduler import StrategyType
 from guidellm.utils import StandardBaseModel
 
@@ -116,14 +111,7 @@ class Config:
         arbitrary_types_allowed = True
 
     data: Annotated[
-        Iterable[str]
-        | Iterable[dict[str, Any]]
-        | Dataset
-        | DatasetDict
-        | IterableDataset
-        | IterableDatasetDict
-        | str
-        | Path,
+        DataInputType,
         # BUG: See https://github.com/pydantic/pydantic/issues/9541
         SkipValidation,
     ]
@@ -137,14 +125,12 @@ class Config:
     backend_kwargs: dict[str, Any] | None = None
     model: str | None = None
     # Data configuration
-    processor: str | Path | PreTrainedTokenizerBase | None = None
+    processor: ProcessorInputType | None = None
     processor_args: dict[str, Any] | None = None
     data_args: dict[str, Any] | None = None
     data_sampler: Literal["random"] | None = None
     # Aggregators configuration
-    add_aggregators: (
-        dict[str, str | dict[str, Any] | Aggregator | CompilableAggregator] | None
-    ) = None
+    add_aggregators: AggregatorInputType | None = None
     warmup: Annotated[float | None, Field(gt=0, le=1)] = None
     cooldown: Annotated[float | None, Field(gt=0, le=1)] = None
     request_samples: PositiveInt | None = 20
diff --git a/src/guidellm/benchmark/type.py b/src/guidellm/benchmark/type.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+from collections.abc import Iterable
+from typing import Any
+from pathlib import Path
+from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
+
+from guidellm.benchmark.output import (
+    GenerativeBenchmarkerOutput,
+)
+
+from transformers import (  # type: ignore[import]
+    PreTrainedTokenizerBase,
+)
+
+from guidellm.benchmark.progress import BenchmarkerProgress
+
+from guidellm.benchmark.aggregator import (
+    Aggregator,
+    CompilableAggregator,
+)
+
+
+DataInputType = (
+    Iterable[str]
+    | Iterable[dict[str, Any]]
+    | Dataset
+    | DatasetDict
+    | IterableDataset
+    | IterableDatasetDict
+    | str
+    | Path
+)
+
+OutputFormatType = (
+    tuple[str, ...]
+    | list[str]
+    | dict[str, str | dict[str, Any] | GenerativeBenchmarkerOutput]
+    | None
+)
+
+ProcessorInputType = str | Path | PreTrainedTokenizerBase
+
+ProgressInputType = tuple[str, ...] | list[str] | list[BenchmarkerProgress]
+
+AggregatorInputType = dict[str, str | dict[str, Any] | Aggregator | CompilableAggregator]