mlcommons
diff --git a/‎examples/07_GPT-OSS-120B_SGLang_Example/run.py‎
Lines changed: 2 additions & 7 deletions b/‎examples/07_GPT-OSS-120B_SGLang_Example/run.py‎
Lines changed: 2 additions & 7 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 5 additions & 5 deletions b/‎pyproject.toml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎requirements/base.txt‎
Lines changed: 5 additions & 5 deletions b/‎requirements/base.txt‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎requirements/test.txt‎
Lines changed: 2 additions & 2 deletions b/‎requirements/test.txt‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/inference_endpoint/commands/benchmark.py‎
Lines changed: 3 additions & 8 deletions b/‎src/inference_endpoint/commands/benchmark.py‎
Lines changed: 3 additions & 8 deletions
diff --git a/‎src/inference_endpoint/commands/probe.py‎
Lines changed: 2 additions & 6 deletions b/‎src/inference_endpoint/commands/probe.py‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎src/inference_endpoint/config/schema.py‎
Lines changed: 8 additions & 5 deletions b/‎src/inference_endpoint/config/schema.py‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎src/inference_endpoint/endpoint_client/README.md‎
Lines changed: 4 additions & 30 deletions b/‎src/inference_endpoint/endpoint_client/README.md‎
Lines changed: 4 additions & 30 deletions
diff --git a/‎src/inference_endpoint/endpoint_client/__init__.py‎
Lines changed: 1 addition & 2 deletions b/‎src/inference_endpoint/endpoint_client/__init__.py‎
Lines changed: 1 addition & 2 deletions
@@ -43,10 +43,7 @@
     AIME_GPTOSS_SGLang,
 )
 from inference_endpoint.dataset_manager.predefined.gpqa import GPQA, GPQA_GPTOSS_SGLang
-from inference_endpoint.endpoint_client.configs import (
-    AioHttpConfig,
-    HTTPClientConfig,
-)
+from inference_endpoint.endpoint_client.configs import HTTPClientConfig
 from inference_endpoint.endpoint_client.http_client import HTTPEndpointClient
 from inference_endpoint.endpoint_client.http_sample_issuer import HttpClientSampleIssuer
 from inference_endpoint.evaluation.extractor import ABCDExtractor, BoxedMathExtractor
@@ -95,9 +92,7 @@ def create_sglang_client(tmp_dir: Path) -> HTTPEndpointClient:
         api_type="sglang",
     )
 
-    aiohttp_config = AioHttpConfig()
-
-    client = HTTPEndpointClient(http_config, aiohttp_config)
+    client = HTTPEndpointClient(http_config)
     return client
 
 
 
@@ -28,11 +28,11 @@ requires-python = ">=3.12"
 dependencies = [
     "asyncio",
     "typing-extensions>=4.0.0",
-    "orjson==3.11.0",
-    "aiohttp==3.12.15",
-    "pyzmq==27.0.2",
-    "uvloop==0.21.0",
-    "msgspec==0.19.0",
+    "orjson==3.11.5",
+    "pyzmq==27.1.0",
+    "uvloop==0.22.1",
+    "msgspec==0.20.0",
+    "httptools==0.7.1",
 ]
 
 [project.optional-dependencies]
 
@@ -13,14 +13,14 @@ psutil==6.1.1
 
 # Note: asyncio is part of Python 3.11+ standard library
 # Async and networking
-aiohttp==3.12.15
-pyzmq==27.0.2
-uvloop==0.21.0
+pyzmq==27.1.0
+uvloop==0.22.1
+httptools==0.7.1
 
 # Data handling
 duckdb==1.4.0
-orjson==3.11.0
-msgspec==0.19.0
+orjson==3.11.5
+msgspec==0.20.0
 pydantic==2.12.0
 pydantic_core==2.41.1
 
 
@@ -16,7 +16,7 @@ line-profiler==5.0.0
 Pympler==1.1
 scipy==1.16.3
 
-# HTTP server and client for mock server fixture
-aiohttp>=3.8.0
+# HTTP server and client for mock server fixture (test-only dependency)
+aiohttp==3.13.3
 pytest-timeout==2.4.0
 pytest-xdist==3.8.0
@@ -55,10 +55,7 @@
 from inference_endpoint.config.yaml_loader import ConfigError, ConfigLoader
 from inference_endpoint.core.types import QueryResult
 from inference_endpoint.dataset_manager.factory import DataLoaderFactory
-from inference_endpoint.endpoint_client.configs import (
-    AioHttpConfig,
-    HTTPClientConfig,
-)
+from inference_endpoint.endpoint_client.configs import HTTPClientConfig
 from inference_endpoint.endpoint_client.http_client import HTTPEndpointClient
 from inference_endpoint.endpoint_client.http_sample_issuer import HttpClientSampleIssuer
 from inference_endpoint.evaluation import Extractor
@@ -309,7 +306,7 @@ def _build_config_from_cli(
                 dataloader_random_seed=42,
             ),
             client=ClientSettings(
-                workers=args.workers if args.workers else 4,
+                workers=args.workers if args.workers else "auto",
                 log_level="DEBUG" if verbose_level >= 2 else "INFO",
             ),
         ),
@@ -622,9 +619,7 @@ def _run_benchmark(
             log_level=config.settings.client.log_level,
             cpu_affinity=config.settings.client.cpu_affinity,
         )
-        aiohttp_config = AioHttpConfig()
-
-        http_client = HTTPEndpointClient(http_config, aiohttp_config)
+        http_client = HTTPEndpointClient(http_config)
         sample_issuer = HttpClientSampleIssuer(http_client)
 
     except Exception as e:
 
@@ -23,10 +23,7 @@
 
 from inference_endpoint.config.schema import APIType
 from inference_endpoint.core.types import Query, QueryResult
-from inference_endpoint.endpoint_client.configs import (
-    AioHttpConfig,
-    HTTPClientConfig,
-)
+from inference_endpoint.endpoint_client.configs import HTTPClientConfig
 from inference_endpoint.endpoint_client.http_client import HTTPEndpointClient
 from inference_endpoint.exceptions import (
     ExecutionError,
@@ -70,9 +67,8 @@ async def run_probe_command(args: argparse.Namespace) -> None:
             api_type=api_type,
             num_workers=1,
         )
-        aiohttp_config = AioHttpConfig()
         # Client creates its own event loop in a separate thread
-        client = HTTPEndpointClient(http_config, aiohttp_config)
+        client = HTTPEndpointClient(http_config)
 
         logger.info(f"Sending {num_requests} requests...")
 
 
@@ -268,7 +268,10 @@ class ClientSettings(BaseModel):
 
     """
 
-    workers: int = 4
+    # Number of worker processes or "auto" for automatic detection
+    #   - "auto" uses max(4, loadgen_numa_domain_size - 1)
+    workers: int | str = "auto"
+
     record_worker_events: bool = False
     log_level: str = "INFO"
 
@@ -511,10 +514,10 @@ def validate_client_settings(self) -> None:
         Raises:
             ValueError: If settings are invalid
         """
-        if self.settings.client.workers < 1:
-            raise ValueError(
-                f"workers must be >= 1, got {self.settings.client.workers}"
-            )
+        workers = self.settings.client.workers
+        # "auto" is valid, integers must be >= 1
+        if isinstance(workers, int) and workers < 1:
+            raise ValueError(f"workers must be >= 1, got {workers}")
 
     def validate_runtime_settings(self) -> None:
         """Validate runtime settings are reasonable.
 
@@ -27,18 +27,11 @@ HTTP client for LLM inference with multiprocessing workers and ZMQ communication
 ## Usage
 
 ```python
-from inference_endpoint.endpoint_client import (
-    HTTPEndpointClient,
-    HTTPClientConfig,
-    AioHttpConfig,
-    ZMQConfig,
-)
+from inference_endpoint.endpoint_client import HTTPEndpointClient, HTTPClientConfig
 from inference_endpoint.core.types import Query
 
 client = HTTPEndpointClient(
-    HTTPClientConfig(endpoint_url="http://localhost:8000/v1/completions", num_workers=2),
-    AioHttpConfig(),
-    ZMQConfig(),
+    HTTPClientConfig(endpoint_url="http://localhost:8000/v1/completions")
 )
 
 # Sync issue (fire-and-forget)
@@ -64,19 +57,12 @@ if response:
 ### With HttpClientSampleIssuer
 
 ```python
-from inference_endpoint.endpoint_client import (
-    HTTPEndpointClient,
-    HTTPClientConfig,
-    AioHttpConfig,
-    ZMQConfig,
-)
+from inference_endpoint.endpoint_client import HTTPEndpointClient, HTTPClientConfig
 from inference_endpoint.endpoint_client.http_sample_issuer import HttpClientSampleIssuer
 from inference_endpoint.load_generator.sample import Sample
 
 client = HTTPEndpointClient(
-    HTTPClientConfig(endpoint_url="http://localhost:8000/v1/completions", num_workers=4),
-    AioHttpConfig(),
-    ZMQConfig(),
+    HTTPClientConfig(endpoint_url="http://localhost:8000/v1/completions", num_workers=4)
 )
 issuer = HttpClientSampleIssuer(client)
 
@@ -86,18 +72,6 @@ issuer.issue(Sample(
 ))
 ```
 
-## Configuration
-
-```python
-HTTPClientConfig(
-    endpoint_url="http://localhost:8000/v1/completions",
-    num_workers=4,  # Number of worker processes
-)
-
-AioHttpConfig()  # Socket, TCP, HTTP configs (use defaults)
-ZMQConfig()      # IPC configs (use defaults)
-```
-
 ## Shutdown
 
 Shutdown is optional. Workers and event loop thread are daemons - they terminate automatically with the main process.
 
@@ -19,12 +19,11 @@
 This module provides HTTP client implementation with multiprocessing and ZMQ.
 """
 
-from .configs import AioHttpConfig, HTTPClientConfig
+from .configs import HTTPClientConfig
 from .http_client import AsyncHttpEndpointClient, HTTPEndpointClient
 
 __all__ = [
     "AsyncHttpEndpointClient",
     "HTTPEndpointClient",
     "HTTPClientConfig",
-    "AioHttpConfig",
 ]