Fixes from review

markurtz · markurtz · commit ef36af111953 · 2025-10-13T14:59:30.000-04:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -78,10 +78,8 @@ dependencies = [
 
 [project.optional-dependencies]
 perf = ["orjson", "msgpack", "msgspec", "uvloop"]
-recommended = [
-    "tiktoken>=0.11.0", # For OpenAI tokenizer
-    "blobfile>=3.1.0",  # For OpenAI tokenizer
-]
+openai = ["tiktoken>=0.11.0", "blobfile>=3.1.0"]
+recommended = ["guidellm[perf,openai]"]
 dev = [
     # build
     "build>=1.0.0",
diff --git a/src/guidellm/backends/response_handlers.py b/src/guidellm/backends/response_handlers.py
@@ -10,16 +10,10 @@
 
 from __future__ import annotations
 
-import json
-from typing import Any, Protocol, cast
+from typing import Any, Protocol
 
 from guidellm.schemas import GenerationRequest, GenerationResponse, UsageMetrics
-from guidellm.utils import RegistryMixin
-
-try:
-    import orjson
-except ImportError:
-    orjson = None  # type: ignore[assignment]
+from guidellm.utils import RegistryMixin, json
 
 __all__ = [
     "AudioResponseHandler",
@@ -115,8 +109,7 @@ def compile_non_streaming(
         :param response: Complete API response containing choices and usage data
         :return: Standardized GenerationResponse with extracted text and metrics
         """
-        choices = cast("list[dict]", response.get("choices", []))
-        usage = cast("dict[str, int | dict[str, int]]", response.get("usage", {}))
+        choices, usage = self.extract_choices_and_usage(response)
         input_metrics, output_metrics = self.extract_metrics(usage)
 
         return GenerationResponse(
@@ -139,26 +132,17 @@ def add_streaming_line(self, line: str) -> int | None:
         :param line: Raw SSE line from the streaming response
         :return: 1 if text content was extracted, 0 if line ignored, None if done
         """
-        if line == "data: [DONE]":
-            return None
+        if not (data := self.extract_line_data(line)):
+            return None if data is None else 0
 
-        if not line or not (line := line.strip()) or not line.startswith("data:"):
-            return 0
-
-        line = line[len("data:") :].strip()
-        data = cast(
-            "dict[str, Any]",
-            json.loads(line) if orjson is None else orjson.loads(line),
-        )
         updated = False
+        choices, usage = self.extract_choices_and_usage(data)
 
-        if (choices := cast("list[dict]", data.get("choices"))) and (
-            text := choices[0].get("text")
-        ):
+        if text := choices[0].get("text"):
             self.streaming_texts.append(text)
             updated = True
 
-        if usage := cast("dict[str, int | dict[str, int]]", data.get("usage")):
+        if usage:
             self.streaming_usage = usage
 
         return 1 if updated else 0
@@ -182,6 +166,34 @@ def compile_streaming(self, request: GenerationRequest) -> GenerationResponse:
             output_metrics=output_metrics,
         )
 
+    def extract_line_data(self, line: str) -> dict[str, Any] | None:
+        """
+        Extract JSON data from a streaming response line.
+
+        :param line: Raw line from the streaming response
+        :return: Parsed JSON data as a dictionary, or None if line is invalid
+        """
+        if line == "data: [DONE]":
+            return None
+
+        if not line or not (line := line.strip()) or not line.startswith("data:"):
+            return {}
+
+        line = line[len("data:") :].strip()
+
+        return json.loads(line)
+
+    def extract_choices_and_usage(
+        self, response: dict
+    ) -> tuple[list[dict], dict[str, int | dict[str, int]]]:
+        """
+        Extract choices and usage data from the API response.
+
+        :param response: Complete API response containing choices and usage data
+        :return: Tuple of (choices list, usage dictionary)
+        """
+        return response.get("choices", []), response.get("usage", {})
+
     def extract_metrics(
         self, usage: dict[str, int | dict[str, int]] | None
     ) -> tuple[UsageMetrics, UsageMetrics]:
@@ -194,15 +206,14 @@ def extract_metrics(
         if not usage:
             return UsageMetrics(), UsageMetrics()
 
-        input_details = cast("dict[str, int]", usage.get("prompt_tokens_details", {}))
-        output_details = cast(
-            "dict[str, int]", usage.get("completion_tokens_details", {})
+        input_details: dict[str, int] = usage.get("prompt_tokens_details", {}) or {}
+        output_details: dict[str, int] = (
+            usage.get("completion_tokens_details", {}) or {}
         )
 
         return UsageMetrics(
             text_tokens=(
-                input_details.get("prompt_tokens")
-                or cast("int", usage.get("prompt_tokens"))
+                input_details.get("prompt_tokens") or usage.get("prompt_tokens")
             ),
             image_tokens=input_details.get("image_tokens"),
             video_tokens=input_details.get("video_tokens"),
@@ -211,7 +222,7 @@ def extract_metrics(
         ), UsageMetrics(
             text_tokens=(
                 output_details.get("completion_tokens")
-                or cast("int", usage.get("completion_tokens"))
+                or usage.get("completion_tokens")
             ),
             image_tokens=output_details.get("image_tokens"),
             video_tokens=output_details.get("video_tokens"),
@@ -243,18 +254,15 @@ def compile_non_streaming(
         :param response: Complete API response containing choices and usage data
         :return: Standardized GenerationResponse with extracted content and metrics
         """
-        choices = cast("list[dict]", response.get("choices", []))
-        usage = cast("dict[str, int | dict[str, int]]", response.get("usage", {}))
+        choices, usage = self.extract_choices_and_usage(response)
         input_metrics, output_metrics = self.extract_metrics(usage)
 
         return GenerationResponse(
             request_id=request.request_id,
             request_args=str(
                 request.arguments.model_dump() if request.arguments else None
             ),
-            text=cast("dict", choices[0].get("message", {})).get("content", "")
-            if choices
-            else "",
+            text=(choices[0].get("message", {}).get("content", "") if choices else ""),
             input_metrics=input_metrics,
             output_metrics=output_metrics,
         )
@@ -269,27 +277,17 @@ def add_streaming_line(self, line: str) -> int | None:
         :param line: Raw SSE line from the streaming response
         :return: 1 if content was extracted, 0 if line ignored, None if done
         """
-        if line == "data: [DONE]":
-            return None
+        if not (data := self.extract_line_data(line)):
+            return None if data is None else 0
 
-        if not line or not (line := line.strip()) or not line.startswith("data:"):
-            return 0
-
-        line = line[len("data:") :].strip()
-        data = cast(
-            "dict[str, Any]",
-            json.loads(line) if orjson is None else orjson.loads(line),
-        )
         updated = False
+        choices, usage = self.extract_choices_and_usage(data)
 
-        # Extract delta content for chat completion chunks
-        if choices := cast("list[dict]", data.get("choices")):
-            delta = choices[0].get("delta", {})
-            if content := delta.get("content"):
-                self.streaming_texts.append(content)
+        if choices and (content := choices[0].get("delta", {}).get("content")):
+            self.streaming_texts.append(content)
             updated = True
 
-        if usage := cast("dict[str, int | dict[str, int]]", data.get("usage")):
+        if usage:
             self.streaming_usage = usage
 
         return 1 if updated else 0
@@ -355,10 +353,10 @@ def compile_non_streaming(
         :param response: Complete API response containing text and usage data
         :return: Standardized GenerationResponse with extracted text and metrics
         """
-        usage = cast("dict[str, int]", response.get("usage", {}))
-        input_details = cast("dict[str, int]", usage.get("input_token_details", {}))
-        output_details = cast("dict[str, int]", usage.get("output_token_details", {}))
-        text = response.get("text", "")
+        usage: dict[str, int | dict[str, int]] = response.get("usage", {})
+        input_details: dict[str, int] = usage.get("input_token_details", {}) or {}
+        output_details: dict[str, int] = usage.get("output_token_details", {}) or {}
+        text: str = response.get("text", "")
 
         return GenerationResponse(
             request_id=request.request_id,
@@ -396,17 +394,16 @@ def add_streaming_line(self, line: str) -> int | None:
         if not line or not (line := line.strip()) or not line.startswith("{"):
             return 0
 
-        data = cast(
-            "dict[str, Any]",
-            json.loads(line) if orjson is None else orjson.loads(line),
-        )
+        data: dict[str, Any] = json.loads(line)
+        text: str
+        usage: dict[str, int | dict[str, int]]
         updated = False
 
         if text := data.get("text"):
             self.streaming_texts.append(text)
             updated = True
 
-        if usage := cast("dict[str, int | dict[str, int]]", data.get("usage")):
+        if usage := data.get("usage"):
             self.streaming_usage = usage
 
         return 1 if updated else 0
@@ -445,22 +442,15 @@ def extract_metrics(
         if not usage:
             return UsageMetrics(), UsageMetrics()
 
-        input_details = cast("dict[str, int]", usage.get("input_token_details", {}))
-        output_details = cast("dict[str, int]", usage.get("output_token_details", {}))
+        input_details: dict[str, int] = usage.get("input_token_details", {}) or {}
+        output_details: dict[str, int] = usage.get("output_token_details", {}) or {}
 
         return UsageMetrics(
-            text_tokens=(
-                input_details.get("text_tokens")
-                or cast("int", usage.get("input_tokens"))
-            ),
+            text_tokens=(input_details.get("text_tokens") or usage.get("input_tokens")),
             audio_tokens=(
-                input_details.get("audio_tokens")
-                or cast("int", usage.get("audio_tokens"))
-            ),
-            audio_seconds=(
-                input_details.get("seconds") or cast("int", usage.get("seconds"))
+                input_details.get("audio_tokens") or usage.get("audio_tokens")
             ),
+            audio_seconds=(input_details.get("seconds") or usage.get("seconds")),
         ), UsageMetrics(
-            text_tokens=output_details.get("text_tokens")
-            or cast("int", usage.get("output_tokens")),
+            text_tokens=output_details.get("text_tokens") or usage.get("output_tokens"),
         )
diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py
@@ -5,15 +5,14 @@
 from typing import Any, Literal
 
 from torch.utils.data import Sampler
-from transformers import PreTrainedTokenizerBase
-from typing_extensions import TypeAliasType
 
 from guidellm.backends import Backend, BackendType
 from guidellm.benchmark.benchmarker import Benchmarker
 from guidellm.benchmark.output import GenerativeBenchmarkerOutput
 from guidellm.benchmark.profile import Profile, ProfileType
-from guidellm.benchmark.progress import BenchmarkerProgress, BenchmarkerProgressGroup
+from guidellm.benchmark.progress import BenchmarkerProgressGroup
 from guidellm.benchmark.schemas import GenerativeBenchmark, GenerativeBenchmarksReport
+from guidellm.benchmark.types import OutputFormatT, ProcessorInputT, ProgressInputT
 from guidellm.data import (
     DataLoader,
     DatasetPreprocessor,
@@ -40,20 +39,6 @@
 
 _CURRENT_WORKING_DIR = Path.cwd()
 
-OutputFormatT = TypeAliasType(
-    "OutputFormatT",
-    tuple[str, ...]
-    | list[str]
-    | dict[str, str | dict[str, Any] | GenerativeBenchmarkerOutput]
-    | None,
-)
-
-ProcessorInputT = TypeAliasType("ProcessorInputT", str | Path | PreTrainedTokenizerBase)
-
-ProgressInputT = TypeAliasType(
-    "ProgressInputT", tuple[str, ...] | list[str] | list[BenchmarkerProgress]
-)
-
 
 # Helper Functions
 
diff --git a/src/guidellm/benchmark/types.py b/src/guidellm/benchmark/types.py
@@ -9,13 +9,7 @@
 from guidellm.benchmark.output import GenerativeBenchmarkerOutput
 from guidellm.benchmark.progress import BenchmarkerProgress
 
-__all__ = [
-    "AggregatorInputT",
-    "DataInputT",
-    "OutputFormatT",
-    "ProcessorInputT",
-    "ProgressInputT",
-]
+__all__ = ["OutputFormatT", "ProcessorInputT", "ProgressInputT"]
 
 
 OutputFormatT = TypeAliasType(
diff --git a/src/guidellm/data/deserializers/synthetic.py b/src/guidellm/data/deserializers/synthetic.py
@@ -209,7 +209,9 @@ def _create_prefix_iter(self, faker: Faker, rand: Random) -> Iterator[str]:
 
         # Create prefix list maintaining the correct distribution
         prefixes = []
-        for bucket, weight in zip(self.config.prefix_buckets, unnorm_weights, strict=False):
+        for bucket, weight in zip(
+            self.config.prefix_buckets, unnorm_weights, strict=False
+        ):
             bucket_prefixes = [
                 self._create_prompt(bucket.prefix_tokens, faker)
                 for _ in range(bucket.prefix_count)
diff --git a/src/guidellm/data/preprocessors/mappers.py b/src/guidellm/data/preprocessors/mappers.py
@@ -120,9 +120,16 @@ def datasets_mappings(
             for index, dataset in enumerate(datasets)
         }
 
+        # Parse out user mappings that were passed in and validate them
+        # Must be in the format of:
+        # {<column_type>: [<column_names>]}
+        # where <column_names> can be a single string or list of strings
+        # and each string can be any of:
+        # - a column name (assumes the first dataset was intended)
+        # - <int>.<column_name> where <int> is the dataset index
+        # - <str>.<column_name> where <str> is the dataset name
         for column_type, names in input_mappings.items():
             mappings[column_type] = []
-
             for name in names if isinstance(names, list) else [names]:
                 if "." in name:
                     dataset, column_name = name.split(".", 1)
diff --git a/src/guidellm/scheduler/worker.py b/src/guidellm/scheduler/worker.py
@@ -232,18 +232,18 @@ async def _processing_startup(self):
         self.backend_started = True
         await self.backend.validate()
 
-        # Wait for all processes to be ready
-        await wait_for_sync_barrier(
-            self.startup_barrier,
-            poll_interval=self.messaging.poll_interval,
-        )
-
         # Get messaging system ready
         await self.messaging.start(
             receive_stop_criteria=[self.requests_generated_event]
         )
         self.messaging_started = True
 
+        # Wait for all processes to be ready
+        await wait_for_sync_barrier(
+            self.startup_barrier,
+            poll_interval=self.messaging.poll_interval,
+        )
+
         self.startup_completed = True
 
     async def _processing_shutdown(self):
diff --git a/src/guidellm/settings.py b/src/guidellm/settings.py
@@ -46,7 +46,7 @@ class LoggingSettings(BaseModel):
 
     disabled: bool = False
     clear_loggers: bool = True
-    console_log_level: str = "DEBUG"
+    console_log_level: str = "WARNING"
     log_file: str | None = None
     log_file_level: str | None = None
 
@@ -145,7 +145,7 @@ class Settings(BaseSettings):
     mp_max_pending_buffer_percent: float = 0.5
     mp_max_worker_buffer_percent: float = 0.2
     max_concurrency: int = 512
-    max_worker_processes: int = 2
+    max_worker_processes: int = 10
     scheduler_start_delay_non_distributed: float = 1.0
     constraint_error_window_size: float = 30
     constraint_error_min_processed: float = 30
diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py
diff --git a/src/guidellm/utils/encoding.py b/src/guidellm/utils/encoding.py
diff --git a/src/guidellm/utils/imports.py b/src/guidellm/utils/imports.py