fix: address code review feedback from PR #193

viraatc · claude · viraatc · commit bc9cbbb08117 · 2026-03-20T17:00:42.000-07:00
- Add @pytest.mark.unit markers to new test_utils.py
- Fix resolver.py docstring: OfflineBenchmark/OnlineBenchmark -&gt; OfflineConfig/OnlineConfig
- Fix runner.py docstring: Typer -&gt; synchronous CLI
- Fix AGENTS.md CLI path: config/cli.py -&gt; cli.py, commands/benchmark/cli.py
- Fix schema.py standalone triple-quoted string -&gt; comment
- Add yaml.YAMLError to validate.py except clause
- Replace obscure SIGINT lambda with plain function in execute.py
- Fix CLI_QUICK_REFERENCE.md uppercase enum values
- Add SampleEventHandler.clear_hooks before register in execute.py

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/AGENTS.md b/AGENTS.md
@@ -46,16 +46,16 @@ Dataset Manager --> Load Generator --> Endpoint Client --> External Endpoint
 
 ### Key Components
 
-| Component           | Location                                    | Purpose                                                                                                                  |
-| ------------------- | ------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------ |
-| **Load Generator**  | `src/inference_endpoint/load_generator/`    | Central orchestrator: `BenchmarkSession` owns the lifecycle, `Scheduler` controls timing, `LoadGenerator` issues queries |
-| **Endpoint Client** | `src/inference_endpoint/endpoint_client/`   | Multi-process HTTP workers communicating via ZMQ IPC. `HTTPEndpointClient` is the main entry point                       |
-| **Dataset Manager** | `src/inference_endpoint/dataset_manager/`   | Loads pickle, HuggingFace, JSONL datasets. `Dataset` base class with `load_sample()`/`num_samples()` interface           |
-| **Metrics**         | `src/inference_endpoint/metrics/`           | `EventRecorder` writes to SQLite, `MetricsReporter` reads and aggregates (QPS, latency, TTFT, TPOT)                      |
-| **Config**          | `src/inference_endpoint/config/`            | Pydantic-based YAML schema (`schema.py`), ruleset registry for MLCommons compliance, `RuntimeSettings` for runtime state |
-| **CLI**             | `src/inference_endpoint/config/cli.py`      | cyclopts-based, auto-generated from `schema.py` Pydantic models. Flat shorthands via `cyclopts.Parameter(name=...)`      |
-| **Async Utils**     | `src/inference_endpoint/async_utils/`       | `LoopManager` (uvloop + eager_task_factory), ZMQ transport layer, event publisher                                        |
-| **OpenAI/SGLang**   | `src/inference_endpoint/openai/`, `sglang/` | Protocol adapters and response accumulators for different API formats                                                    |
+| Component           | Location                                                     | Purpose                                                                                                                  |
+| ------------------- | ------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------ |
+| **Load Generator**  | `src/inference_endpoint/load_generator/`                     | Central orchestrator: `BenchmarkSession` owns the lifecycle, `Scheduler` controls timing, `LoadGenerator` issues queries |
+| **Endpoint Client** | `src/inference_endpoint/endpoint_client/`                    | Multi-process HTTP workers communicating via ZMQ IPC. `HTTPEndpointClient` is the main entry point                       |
+| **Dataset Manager** | `src/inference_endpoint/dataset_manager/`                    | Loads pickle, HuggingFace, JSONL datasets. `Dataset` base class with `load_sample()`/`num_samples()` interface           |
+| **Metrics**         | `src/inference_endpoint/metrics/`                            | `EventRecorder` writes to SQLite, `MetricsReporter` reads and aggregates (QPS, latency, TTFT, TPOT)                      |
+| **Config**          | `src/inference_endpoint/config/`                             | Pydantic-based YAML schema (`schema.py`), ruleset registry for MLCommons compliance, `RuntimeSettings` for runtime state |
+| **CLI**             | `src/inference_endpoint/cli.py`, `commands/benchmark/cli.py` | cyclopts-based, auto-generated from `schema.py` Pydantic models. Flat shorthands via `cyclopts.Parameter(alias=...)`     |
+| **Async Utils**     | `src/inference_endpoint/async_utils/`                        | `LoopManager` (uvloop + eager_task_factory), ZMQ transport layer, event publisher                                        |
+| **OpenAI/SGLang**   | `src/inference_endpoint/openai/`, `sglang/`                  | Protocol adapters and response accumulators for different API formats                                                    |
 
 ### Hot-Path Architecture
 
diff --git a/docs/CLI_QUICK_REFERENCE.md b/docs/CLI_QUICK_REFERENCE.md
@@ -303,8 +303,8 @@ Note: For submission configs, `model_params.name` is optional when `submission_r
 - Online mode requires `--load-pattern` (poisson or concurrency)
   - `poisson` requires `--target-qps`
   - `concurrency` requires `--concurrency`
-- Use `--mode BOTH` for combined perf + accuracy runs
-- Streaming: AUTO (default) resolves to OFF for offline, ON for online
+- Use `--mode both` for combined perf + accuracy runs
+- Streaming: auto (default) resolves to off for offline, on for online
 
 **Best Practices:**
 
diff --git a/src/inference_endpoint/async_utils/runner.py b/src/inference_endpoint/async_utils/runner.py
@@ -30,7 +30,7 @@ def run_async(coro: Coroutine[object, object, T]) -> T:
     """Run a coroutine with uvloop and eager_task_factory.
 
     Creates a fresh event loop per invocation. This is the standard way for
-    Typer command handlers (which are sync) to execute async logic.
+    synchronous CLI command handlers to execute async logic.
     """
     with asyncio.Runner(loop_factory=uvloop.new_event_loop) as runner:
         runner.get_loop().set_task_factory(asyncio.eager_task_factory)  # type: ignore[arg-type]
diff --git a/src/inference_endpoint/commands/benchmark/execute.py b/src/inference_endpoint/commands/benchmark/execute.py
@@ -322,6 +322,7 @@ def run_benchmark_threaded(ctx: BenchmarkContext) -> tuple[Any, ResponseCollecto
         smoothing=0,  # smoothing=0 shows average instead of EMA
     )
     collector = ResponseCollector(collect_responses=ctx.collect_responses, pbar=pbar)
+    SampleEventHandler.clear_hooks(SampleEvent.COMPLETE)
     SampleEventHandler.register_hook(SampleEvent.COMPLETE, collector.on_complete_hook)
 
     # Create endpoint client
@@ -369,9 +370,10 @@ def run_benchmark_threaded(ctx: BenchmarkContext) -> tuple[Any, ResponseCollecto
             )
 
             # Wait for test end with ability to interrupt
-            old_handler = signal.signal(
-                signal.SIGINT, lambda *_: (_ for _ in ()).throw(KeyboardInterrupt())
-            )
+            def _raise_keyboard_interrupt(*_: object) -> None:
+                raise KeyboardInterrupt
+
+            old_handler = signal.signal(signal.SIGINT, _raise_keyboard_interrupt)
             try:
                 sess.wait_for_test_end()
             finally:
diff --git a/src/inference_endpoint/commands/validate.py b/src/inference_endpoint/commands/validate.py
@@ -18,6 +18,7 @@
 import logging
 from pathlib import Path
 
+import yaml
 from pydantic import ValidationError
 
 from ..config.schema import BenchmarkConfig
@@ -42,6 +43,6 @@ def execute_validate(config_path: Path) -> None:
                 f"ruleset={config.submission_ref.ruleset}"
             )
 
-    except (ValidationError, ValueError, FileNotFoundError) as e:
+    except (ValidationError, ValueError, FileNotFoundError, yaml.YAMLError) as e:
         logger.error("Validation failed")
         raise InputValidationError(f"Config validation failed: {e}") from e
diff --git a/src/inference_endpoint/config/schema.py b/src/inference_endpoint/config/schema.py
@@ -386,7 +386,7 @@ class OfflineSettings(Settings):
     )
 
 
-"""Online mode default settings."""
+# Online mode default settings.
 OnlineSettings = Settings
 
 

Original file line number	Diff line number	Diff line change
`@@ -386,7 +386,7 @@ class OfflineSettings(Settings):`
`386`	`386`	`)`
`387`	`387`
`388`	`388`
`389`		`-"""Online mode default settings."""`
	`389`	`+# Online mode default settings.`
`390`	`390`	`OnlineSettings = Settings`
`391`	`391`
`392`	`392`