mlcommons
diff --git a/‎src/inference_endpoint/commands/benchmark.py‎
Lines changed: 1 addition & 1 deletion b/‎src/inference_endpoint/commands/benchmark.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/inference_endpoint/commands/probe.py‎
Lines changed: 1 addition & 1 deletion b/‎src/inference_endpoint/commands/probe.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/inference_endpoint/config/schema.py‎
Lines changed: 14 additions & 1 deletion b/‎src/inference_endpoint/config/schema.py‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎src/inference_endpoint/endpoint_client/configs.py‎
Lines changed: 5 additions & 16 deletions b/‎src/inference_endpoint/endpoint_client/configs.py‎
Lines changed: 5 additions & 16 deletions
diff --git a/‎src/inference_endpoint/endpoint_client/worker.py‎
Lines changed: 3 additions & 3 deletions b/‎src/inference_endpoint/endpoint_client/worker.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/inference_endpoint/openai/openai_adapter.py‎
Lines changed: 1 addition & 21 deletions b/‎src/inference_endpoint/openai/openai_adapter.py‎
Lines changed: 1 addition & 21 deletions
diff --git a/‎src/inference_endpoint/openai/openai_msgspec_adapter.py‎
Lines changed: 8 additions & 69 deletions b/‎src/inference_endpoint/openai/openai_msgspec_adapter.py‎
Lines changed: 8 additions & 69 deletions
diff --git a/‎src/inference_endpoint/openai/types.py‎
Lines changed: 112 additions & 0 deletions b/‎src/inference_endpoint/openai/types.py‎
Lines changed: 112 additions & 0 deletions
@@ -35,6 +35,7 @@
 from inference_endpoint.commands.utils import get_default_report_path
 from inference_endpoint.config.runtime_settings import RuntimeSettings
 from inference_endpoint.config.schema import (
+    APIType,
     BenchmarkConfig,
     ClientSettings,
     Dataset,
@@ -56,7 +57,6 @@
 from inference_endpoint.dataset_manager.factory import DataLoaderFactory
 from inference_endpoint.endpoint_client.configs import (
     AioHttpConfig,
-    APIType,
     HTTPClientConfig,
     ZMQConfig,
 )
 
@@ -22,10 +22,10 @@
 import time
 from urllib.parse import urljoin
 
+from inference_endpoint.config.schema import APIType
 from inference_endpoint.core.types import Query, QueryResult
 from inference_endpoint.endpoint_client.configs import (
     AioHttpConfig,
-    APIType,
     HTTPClientConfig,
     ZMQConfig,
 )
 
@@ -27,10 +27,23 @@
 from pydantic import BaseModel, Field
 
 from .. import metrics
-from ..endpoint_client.configs import APIType
 from .ruleset_base import BenchmarkSuiteRuleset
 
 
+class APIType(str, Enum):
+    OPENAI = "openai"
+    SGLANG = "sglang"
+
+    def default_route(self) -> str:
+        match self:
+            case APIType.OPENAI:
+                return "/v1/chat/completions"
+            case APIType.SGLANG:
+                return "/generate"
+            case _:
+                raise ValueError(f"Invalid API type: {self}")
+
+
 class LoadPatternType(str, Enum):
     """Load pattern types."""
 
 
@@ -18,28 +18,14 @@
 import os
 import socket
 from dataclasses import dataclass, field
-from enum import Enum
 from pathlib import Path
 from typing import Any
 
 import aiohttp
 import zmq
 
-from inference_endpoint.endpoint_client.adapter_protocol import HttpRequestAdapter
-
-
-class APIType(Enum):
-    OPENAI = "openai"
-    SGLANG = "sglang"
-
-    def default_route(self) -> str:
-        match self:
-            case APIType.OPENAI:
-                return "/v1/chat/completions"
-            case APIType.SGLANG:
-                return "/generate"
-            case _:
-                raise ValueError(f"Invalid API type: {self}")
+from ..config.schema import APIType
+from .adapter_protocol import HttpRequestAdapter
 
 
 @dataclass
@@ -80,6 +66,9 @@ class HTTPClientConfig:
 
     def __post_init__(self):
         # set default adapter in __post_init__ to avoid circular dependency
+        if isinstance(self.api_type, str):
+            self.api_type = APIType(self.api_type)
+
         if self.adapter is None:
             if self.api_type == APIType.OPENAI:
                 from inference_endpoint.openai.openai_msgspec_adapter import (
 
@@ -31,24 +31,24 @@
 import zmq
 import zmq.asyncio
 
+from inference_endpoint.config.schema import APIType
 from inference_endpoint.core.types import (
     Query,
     QueryResult,
     StreamChunk,
 )
 from inference_endpoint.endpoint_client.configs import (
     AioHttpConfig,
-    APIType,
     HTTPClientConfig,
     ZMQConfig,
 )
 from inference_endpoint.endpoint_client.zmq_utils import ZMQPullSocket, ZMQPushSocket
 from inference_endpoint.load_generator.events import SampleEvent
 from inference_endpoint.metrics.recorder import EventRecorder
 from inference_endpoint.metrics.reporter import MetricsReporter
-from inference_endpoint.openai.openai_adapter import SSEDelta as OpenAISSEDelta
+from inference_endpoint.openai.types import SSEDelta as OpenAISSEDelta
 from inference_endpoint.profiling import profile
-from inference_endpoint.sglang.adapter import SGLangSSEDelta
+from inference_endpoint.sglang.types import SGLangSSEDelta
 from inference_endpoint.utils.logging import setup_logging
 
 logger = logging.getLogger(__name__)
 
@@ -34,27 +34,7 @@
     Role6,
     ServiceTier,
 )
-
-
-# msgspec structs for typed SSE message parsing (OpenAI streaming format)
-class SSEDelta(msgspec.Struct):
-    """SSE delta object containing content."""
-
-    content: str = ""
-    reasoning: str = ""
-
-
-class SSEChoice(msgspec.Struct):
-    """SSE choice object containing delta."""
-
-    delta: SSEDelta = msgspec.field(default_factory=SSEDelta)
-    finish_reason: str | None = None
-
-
-class SSEMessage(msgspec.Struct):
-    """SSE message structure for OpenAI streaming responses."""
-
-    choices: list[SSEChoice] = msgspec.field(default_factory=list)
+from .types import SSEMessage
 
 
 class OpenAIAdapter(HttpRequestAdapter):
 
@@ -25,75 +25,14 @@
 # Import base class and shared SSE types
 from inference_endpoint.endpoint_client.adapter_protocol import HttpRequestAdapter
 
-from .openai_adapter import SSEMessage
-
-# ============================================================================
-# msgspec Structs for OpenAI API Types
-# ============================================================================
-
-
-class ChatMessage(msgspec.Struct, kw_only=True, omit_defaults=True):
-    """Chat message in OpenAI format."""
-
-    role: str
-    content: str
-    name: str | None = None
-
-
-class ChatCompletionRequest(msgspec.Struct, kw_only=True, omit_defaults=True):
-    """OpenAI chat completion request."""
-
-    model: str
-    messages: list[ChatMessage]
-    temperature: float | None = None
-    max_completion_tokens: int | None = None
-    stream: bool | None = None
-    top_p: float | None = None
-    top_k: int | None = None
-    repetition_penalty: float | None = None
-    n: int | None = None
-    stop: str | list[str] | None = None
-    presence_penalty: float | None = None
-    frequency_penalty: float | None = None
-    logit_bias: dict[str, float] | None = None
-    user: str | None = None
-
-
-class ChatCompletionResponseMessage(msgspec.Struct, kw_only=True, omit_defaults=True):
-    """Response message from OpenAI."""
-
-    role: str
-    content: str | None
-    refusal: str | None
-
-
-class ChatCompletionChoice(msgspec.Struct, kw_only=True, omit_defaults=True):
-    """A single choice in the completion response."""
-
-    index: int
-    message: ChatCompletionResponseMessage
-    finish_reason: str | None
-
-
-class CompletionUsage(msgspec.Struct, kw_only=True, omit_defaults=True):
-    """Token usage statistics."""
-
-    prompt_tokens: int
-    completion_tokens: int
-    total_tokens: int
-
-
-class ChatCompletionResponse(msgspec.Struct, kw_only=True, omit_defaults=True):
-    """OpenAI chat completion response (msgspec version)."""
-
-    id: str
-    object: str = "chat.completion"
-    created: int
-    model: str
-    choices: list[ChatCompletionChoice]
-    usage: CompletionUsage | None
-    system_fingerprint: str | None
-
+from .types import (
+    ChatCompletionChoice,
+    ChatCompletionRequest,
+    ChatCompletionResponse,
+    ChatCompletionResponseMessage,
+    ChatMessage,
+    SSEMessage,
+)
 
 # ============================================================================
 # msgspec-based OpenAI Adapter
 
@@ -0,0 +1,112 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+msgspec types for OpenAI API serialization/deserialization.
+"""
+
+import msgspec
+
+# ============================================================================
+# SSE (Server-Sent Events) Types for OpenAI streaming format
+# ============================================================================
+
+
+class SSEDelta(msgspec.Struct):
+    """SSE delta object containing content."""
+
+    content: str = ""
+    reasoning: str = ""
+
+
+class SSEChoice(msgspec.Struct):
+    """SSE choice object containing delta."""
+
+    delta: SSEDelta = msgspec.field(default_factory=SSEDelta)
+    finish_reason: str | None = None
+
+
+class SSEMessage(msgspec.Struct):
+    """SSE message structure for OpenAI streaming responses."""
+
+    choices: list[SSEChoice] = msgspec.field(default_factory=list)
+
+
+# ============================================================================
+# OpenAI Chat Completion Types (msgspec-based)
+# ============================================================================
+
+
+class ChatMessage(msgspec.Struct, kw_only=True, omit_defaults=True):
+    """Chat message in OpenAI format."""
+
+    role: str
+    content: str
+    name: str | None = None
+
+
+class ChatCompletionRequest(msgspec.Struct, kw_only=True, omit_defaults=True):
+    """OpenAI chat completion request."""
+
+    model: str
+    messages: list[ChatMessage]
+    temperature: float | None = None
+    max_completion_tokens: int | None = None
+    stream: bool | None = None
+    top_p: float | None = None
+    top_k: int | None = None
+    repetition_penalty: float | None = None
+    n: int | None = None
+    stop: str | list[str] | None = None
+    presence_penalty: float | None = None
+    frequency_penalty: float | None = None
+    logit_bias: dict[str, float] | None = None
+    user: str | None = None
+
+
+class ChatCompletionResponseMessage(msgspec.Struct, kw_only=True, omit_defaults=True):
+    """Response message from OpenAI."""
+
+    role: str
+    content: str | None
+    refusal: str | None
+
+
+class ChatCompletionChoice(msgspec.Struct, kw_only=True, omit_defaults=True):
+    """A single choice in the completion response."""
+
+    index: int
+    message: ChatCompletionResponseMessage
+    finish_reason: str | None
+
+
+class CompletionUsage(msgspec.Struct, kw_only=True, omit_defaults=True):
+    """Token usage statistics."""
+
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+
+
+class ChatCompletionResponse(msgspec.Struct, kw_only=True, omit_defaults=True):
+    """OpenAI chat completion response (msgspec version)."""
+
+    id: str
+    object: str = "chat.completion"
+    created: int
+    model: str
+    choices: list[ChatCompletionChoice]
+    usage: CompletionUsage | None
+    system_fingerprint: str | None