Merge branch 'main' into support_sharegpt

sjmonson · web-flow · commit aa03d528e0e8 · 2025-11-03T15:58:10.000-05:00
diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -33,7 +33,7 @@
 try:
     import uvloop
 except ImportError:
-    uvloop = None # type: ignore[assignment] # Optional dependency
+    uvloop = None  # type: ignore[assignment] # Optional dependency
 
 from guidellm.backends import BackendType
 from guidellm.benchmark import (
@@ -116,6 +116,7 @@ def benchmark():
 )
 @click.option(
     "--scenario",
+    "-c",
     type=cli_tools.Union(
         click.Path(
             exists=True,
@@ -392,8 +393,10 @@ def run(**kwargs):
     disable_progress = kwargs.pop("disable_progress", False)
 
     try:
+        # Only set CLI args that differ from click defaults
+        new_kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs)
         args = BenchmarkGenerativeTextArgs.create(
-            scenario=kwargs.pop("scenario", None), **kwargs
+            scenario=new_kwargs.pop("scenario", None), **new_kwargs
         )
     except ValidationError as err:
         # Translate pydantic valdation error to click argument error
diff --git a/src/guidellm/benchmark/benchmarker.py b/src/guidellm/benchmark/benchmarker.py
@@ -13,7 +13,7 @@
 import uuid
 from abc import ABC
 from collections.abc import AsyncIterator, Iterable
-from typing import Generic
+from typing import Any, Generic
 
 from guidellm.benchmark.profile import Profile
 from guidellm.benchmark.progress import BenchmarkerProgress
diff --git a/src/guidellm/benchmark/schemas.py b/src/guidellm/benchmark/schemas.py
@@ -23,7 +23,17 @@
 from typing import Any, ClassVar, Literal, TypeVar, cast
 
 import yaml
-from pydantic import ConfigDict, Field, computed_field, model_serializer
+from pydantic import (
+    AliasChoices,
+    AliasGenerator,
+    ConfigDict,
+    Field,
+    ValidationError,
+    ValidatorFunctionWrapHandler,
+    computed_field,
+    field_validator,
+    model_serializer,
+)
 from torch.utils.data import Sampler
 from transformers import PreTrainedTokenizerBase
 
@@ -1142,7 +1152,8 @@ def update_estimate(
         )
         request_duration = (
             (request_end_time - request_start_time)
-            if request_end_time and request_start_time else None
+            if request_end_time and request_start_time
+            else None
         )
 
         # Always track concurrency
@@ -1669,7 +1680,7 @@ def compile(
         estimated_state: EstimatedBenchmarkState,
         scheduler_state: SchedulerState,
         profile: Profile,
-        requests: Iterable,
+        requests: Iterable,  # noqa: ARG003
         backend: BackendInterface,
         environment: Environment,
         strategy: SchedulingStrategy,
@@ -1787,9 +1798,8 @@ def create(
                 scenario_data = scenario_data["args"]
             constructor_kwargs.update(scenario_data)
 
-        for key, value in kwargs.items():
-            if value != cls.get_default(key):
-                constructor_kwargs[key] = value
+        # Apply overrides from kwargs
+        constructor_kwargs.update(kwargs)
 
         return cls.model_validate(constructor_kwargs)
 
@@ -1818,13 +1828,19 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
         else:
             return factory({})  # type: ignore[call-arg] # Confirmed correct at runtime by code above
 
-
-
     model_config = ConfigDict(
         extra="ignore",
         use_enum_values=True,
         from_attributes=True,
         arbitrary_types_allowed=True,
+        validate_by_alias=True,
+        validate_by_name=True,
+        alias_generator=AliasGenerator(
+            # Support field names with hyphens
+            validation_alias=lambda field_name: AliasChoices(
+                field_name, field_name.replace("_", "-")
+            ),
+        ),
     )
 
     # Required
@@ -1838,7 +1854,7 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
     profile: StrategyType | ProfileType | Profile = Field(
         default="sweep", description="Benchmark profile or scheduling strategy type"
     )
-    rate: float | list[float] | None = Field(
+    rate: list[float] | None = Field(
         default=None, description="Request rate(s) for rate-based scheduling"
     )
     # Backend configuration
@@ -1871,6 +1887,12 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
     data_request_formatter: DatasetPreprocessor | dict[str, str] | str = Field(
         default="chat_completions",
         description="Request formatting preprocessor or template name",
+        validation_alias=AliasChoices(
+            "data_request_formatter",
+            "data-request-formatter",
+            "request_type",
+            "request-type",
+        ),
     )
     data_collator: Callable | Literal["generative"] | None = Field(
         default="generative", description="Data collator for batch processing"
@@ -1931,6 +1953,26 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
         default=None, description="Maximum global error rate (0-1) before stopping"
     )
 
+    @field_validator("data", "data_args", "rate", mode="wrap")
+    @classmethod
+    def single_to_list(
+        cls, value: Any, handler: ValidatorFunctionWrapHandler
+    ) -> list[Any]:
+        """
+        Ensures field is always a list.
+
+        :param value: Input value for the 'data' field
+        :return: List of data sources
+        """
+        try:
+            return handler(value)
+        except ValidationError as err:
+            # If validation fails, try wrapping the value in a list
+            if err.errors()[0]["type"] == "list_type":
+                return handler([value])
+            else:
+                raise
+
     @model_serializer
     def serialize_model(self):
         """
diff --git a/src/guidellm/data/deserializers/deserializer.py b/src/guidellm/data/deserializers/deserializer.py
@@ -107,8 +107,10 @@ def _deserialize_with_registered_deserializers(
 
         if len(errors) > 0:
             err_msgs = ""
+
             def sort_key(item):
                 return (isinstance(item[1], DataNotSupportedError), item[0])
+
             for key, err in sorted(errors.items(), key=sort_key):
                 err_msgs += f"\n  - Deserializer '{key}': ({type(err).__name__}) {err}"
             raise ValueError(
@@ -141,4 +143,3 @@ def _deserialize_with_specified_deserializer(
             random_seed=random_seed,
             **data_kwargs,
         )
-
diff --git a/src/guidellm/data/deserializers/synthetic.py b/src/guidellm/data/deserializers/synthetic.py
@@ -9,7 +9,7 @@
 import yaml
 from datasets import Features, IterableDataset, Value
 from faker import Faker
-from pydantic import ConfigDict, Field, model_validator
+from pydantic import ConfigDict, Field, ValidationError, model_validator
 from transformers import PreTrainedTokenizerBase
 
 from guidellm.data.deserializers.deserializer import (
@@ -242,6 +242,10 @@ def __call__(
         if (config := self._load_config_str(data)) is not None:
             return self(config, processor_factory, random_seed, **data_kwargs)
 
+        # Try to parse dict-like data directly
+        if (config := self._load_config_dict(data)) is not None:
+            return self(config, processor_factory, random_seed, **data_kwargs)
+
         if not isinstance(data, SyntheticTextDatasetConfig):
             raise DataNotSupportedError(
                 "Unsupported data for SyntheticTextDatasetDeserializer, "
@@ -266,6 +270,15 @@ def __call__(
             ),
         )
 
+    def _load_config_dict(self, data: Any) -> SyntheticTextDatasetConfig | None:
+        if not isinstance(data, dict | list):
+            return None
+
+        try:
+            return SyntheticTextDatasetConfig.model_validate(data)
+        except ValidationError:
+            return None
+
     def _load_config_file(self, data: Any) -> SyntheticTextDatasetConfig | None:
         if (not isinstance(data, str) and not isinstance(data, Path)) or (
             not Path(data).is_file()
diff --git a/src/guidellm/data/loaders.py b/src/guidellm/data/loaders.py
@@ -17,7 +17,6 @@
 __all__ = ["DataLoader", "DatasetsIterator"]
 
 
-
 class DatasetsIterator(TorchIterableDataset):
     def __init__(
         self,
diff --git a/src/guidellm/data/preprocessors/formatters.py b/src/guidellm/data/preprocessors/formatters.py
@@ -56,9 +56,7 @@ def __init__(
         self.stream: bool = stream
         self.max_tokens: int | None = max_tokens or max_completion_tokens
 
-    def __call__(
-        self, columns: dict[str, list[Any]]
-    ) -> GenerationRequest:
+    def __call__(self, columns: dict[str, list[Any]]) -> GenerationRequest:
         """
         :param columns: A dict of GenerativeDatasetColumnType to Any
         """
@@ -396,9 +394,7 @@ def __call__(  # noqa: C901
 class GenerativeAudioTranslationRequestFormatter(
     GenerativeAudioTranscriptionRequestFormatter
 ):
-    def __call__(
-        self, columns: dict[str, list[Any]]
-    ) -> GenerationRequest:
+    def __call__(self, columns: dict[str, list[Any]]) -> GenerationRequest:
         result = super().__call__(columns)
         result.request_type = "audio_translations"
         return result
diff --git a/src/guidellm/data/preprocessors/mappers.py b/src/guidellm/data/preprocessors/mappers.py
@@ -167,9 +167,7 @@ def __init__(
             dict[GenerativeDatasetColumnType, list[tuple[int, str]]] | None
         )
 
-    def __call__(
-        self, row: dict[str, Any]
-    ) -> dict[str, list[Any]]:
+    def __call__(self, row: dict[str, Any]) -> dict[str, list[Any]]:
         if self.datasets_column_mappings is None:
             raise ValueError("DefaultGenerativeColumnMapper not setup with data.")
 
diff --git a/src/guidellm/data/preprocessors/preprocessor.py b/src/guidellm/data/preprocessors/preprocessor.py
@@ -12,8 +12,7 @@
 
 @runtime_checkable
 class DatasetPreprocessor(Protocol):
-    def __call__(self, item: dict[str, Any]) -> (
-        GenerationRequest | dict[str, Any]): ...
+    def __call__(self, item: dict[str, Any]) -> GenerationRequest | dict[str, Any]: ...
 
 
 @runtime_checkable
diff --git a/src/guidellm/preprocess/dataset.py b/src/guidellm/preprocess/dataset.py
@@ -238,7 +238,7 @@ def process_dataset(
     prompt_tokens: str | Path,
     output_tokens: str | Path,
     processor_args: dict[str, Any] | None = None,
-    data_args: dict[str, Any] | None = None,
+    data_args: dict[str, Any] | None = None,  # noqa: ARG001
     short_prompt_strategy: ShortPromptStrategy = ShortPromptStrategy.IGNORE,
     pad_char: str | None = None,
     concat_delimiter: str | None = None,
diff --git a/src/guidellm/scheduler/strategies.py b/src/guidellm/scheduler/strategies.py
@@ -506,8 +506,10 @@ def init_processes_start(self, start_time: float):
         if self._processes_lock is None:
             raise RuntimeError("_processes_lock is None in init_processes_start")
         if self._offset is None:
-            raise RuntimeError("_offset is None in init_processes_start; was "
-                               "init_processes_timings not called?")
+            raise RuntimeError(
+                "_offset is None in init_processes_start; was "
+                "init_processes_timings not called?"
+            )
         with self._processes_lock:
             self._offset.value = start_time
 
@@ -527,11 +529,15 @@ async def next_request_time(self, offset: int) -> float:
         next_delay = self._random.expovariate(self.rate)
 
         if self._processes_lock is None:
-            raise RuntimeError("_processes_lock is None in next_request_time; was "
-                               "init_processes_timings not called?")
+            raise RuntimeError(
+                "_processes_lock is None in next_request_time; was "
+                "init_processes_timings not called?"
+            )
         if self._offset is None:
-            raise RuntimeError("_offset is None in next_request_time; was "
-                               "init_processes_timings not called?")
+            raise RuntimeError(
+                "_offset is None in next_request_time; was "
+                "init_processes_timings not called?"
+            )
         with self._processes_lock:
             self._offset.value += next_delay
 
diff --git a/src/guidellm/scheduler/worker.py b/src/guidellm/scheduler/worker.py
@@ -363,7 +363,6 @@ async def _process_next_request(self, target_start: float):
             async for resp, info in self.backend.resolve(  # type: ignore[attr-defined]
                 request, request_info, None
             ):
-
                 response = resp
                 request_info = info
                 if request_info is None:
@@ -407,10 +406,7 @@ async def _dequeue_next_request(
         return request, request_info
 
     async def _schedule_request(
-        self,
-        request: RequestT,
-        request_info: RequestInfo,
-        target_start: float
+        self, request: RequestT, request_info: RequestInfo, target_start: float
     ):
         current_time = time.time()
         request_info.timings.scheduled_at = current_time
diff --git a/src/guidellm/utils/cli.py b/src/guidellm/utils/cli.py
@@ -31,6 +31,7 @@ def parse_list_floats(ctx, param, value):  # noqa: ARG001
             f"of floats/ints. Error: {e}"
         ) from e
 
+
 def parse_json(ctx, param, value):  # noqa: ARG001
     if value is None or value == [None]:
         return None
diff --git a/tests/unit/benchmark/test_output.py b/tests/unit/benchmark/test_output.py
@@ -91,7 +91,6 @@ def test_file_yaml():
     mock_path.unlink()
 
 
-@pytest.mark.xfail(reason="old and broken", run=False)
 @pytest.mark.asyncio
 async def test_file_csv():
     args = BenchmarkGenerativeTextArgs(target="http://localhost:8000", data=["test"])
diff --git a/tests/unit/mock_benchmark.py b/tests/unit/mock_benchmark.py
@@ -105,9 +105,7 @@ def mock_generative_benchmark() -> GenerativeBenchmark:
         benchmarker=BenchmarkerDict(
             profile=SynchronousProfile.create("synchronous", rate=None),
             requests={
-                "attributes": {
-                    "data": "prompt_tokens=256,output_tokens=128",
-                },
+                "data": "prompt_tokens=256,output_tokens=128",
             },
             backend={},
             environment={},
diff --git a/tests/unit/utils/test_hf_transformers.py b/tests/unit/utils/test_hf_transformers.py
@@ -0,0 +1,40 @@
+import pytest
+import transformers
+from transformers import PreTrainedTokenizerBase
+
+from guidellm.utils.hf_transformers import check_load_processor
+
+
+class DummyTokenizer(PreTrainedTokenizerBase):
+    pass
+
+
+def test_processor_is_none():
+    with pytest.raises(ValueError, match="Processor/Tokenizer is required for test."):
+        check_load_processor(None, None, "test")
+
+
+def test_processor_not_isinstance():
+    with pytest.raises(ValueError, match="Invalid processor/Tokenizer for test."):
+        check_load_processor(123, None, "test")  # type: ignore
+
+
+def test_processor_load_by_path(monkeypatch, tmp_path):
+    monkeypatch.setattr(
+        transformers.AutoTokenizer,
+        "from_pretrained",
+        lambda *args, **kwargs: DummyTokenizer(),
+    )
+    tokenizer = check_load_processor(tmp_path, None, "test")
+    assert isinstance(tokenizer, PreTrainedTokenizerBase)
+
+
+def test_processor_load_error(monkeypatch):
+    def raise_error(*args, **kwargs):
+        raise RuntimeError("test error")
+
+    monkeypatch.setattr("transformers.AutoTokenizer.from_pretrained", raise_error)
+    with pytest.raises(
+        ValueError, match="Failed to load processor/Tokenizer for test."
+    ):
+        check_load_processor("gpt2", None, "test")

Original file line number	Diff line number	Diff line change
`@@ -167,9 +167,7 @@ def __init__(`
`167`	`167`	`dict[GenerativeDatasetColumnType, list[tuple[int, str]]] \| None`
`168`	`168`	`)`
`169`	`169`
`170`		`- def __call__(`
`171`		`- self, row: dict[str, Any]`
`172`		`- ) -> dict[str, list[Any]]:`
	`170`	`+ def __call__(self, row: dict[str, Any]) -> dict[str, list[Any]]:`
`173`	`171`	`if self.datasets_column_mappings is None:`
`174`	`172`	`raise ValueError("DefaultGenerativeColumnMapper not setup with data.")`
`175`	`173`