Skip to content

Commit 60b77e5

Browse files
committed
Revert "Configurable max_tokens/max_completion_tokens key (#399)"
This reverts commit 121dcdc.
1 parent 121dcdc commit 60b77e5

File tree

3 files changed

+9
-14
lines changed

3 files changed

+9
-14
lines changed

src/guidellm/backend/openai.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,10 @@
3131
TEXT_COMPLETIONS_PATH = "/v1/completions"
3232
CHAT_COMPLETIONS_PATH = "/v1/chat/completions"
3333

34-
CompletionEndpointType = Literal["text_completions", "chat_completions"]
35-
EndpointType = Union[Literal["models"], CompletionEndpointType]
36-
CHAT_COMPLETIONS: CompletionEndpointType = "chat_completions"
34+
EndpointType = Literal["chat_completions", "models", "text_completions"]
35+
CHAT_COMPLETIONS: EndpointType = "chat_completions"
3736
MODELS: EndpointType = "models"
38-
TEXT_COMPLETIONS: CompletionEndpointType = "text_completions"
37+
TEXT_COMPLETIONS: EndpointType = "text_completions"
3938

4039

4140
@Backend.register("openai_http")
@@ -448,7 +447,7 @@ def _extra_body(self, endpoint_type: EndpointType) -> dict[str, Any]:
448447

449448
def _completions_payload(
450449
self,
451-
endpoint_type: CompletionEndpointType,
450+
endpoint_type: EndpointType,
452451
orig_kwargs: Optional[dict],
453452
max_output_tokens: Optional[int],
454453
**kwargs,
@@ -468,10 +467,8 @@ def _completions_payload(
468467
self.__class__.__name__,
469468
max_output_tokens or self.max_output_tokens,
470469
)
471-
max_output_key = settings.openai.max_output_key.get(
472-
endpoint_type, "max_tokens"
473-
)
474-
payload[max_output_key] = max_output_tokens or self.max_output_tokens
470+
payload["max_tokens"] = max_output_tokens or self.max_output_tokens
471+
payload["max_completion_tokens"] = payload["max_tokens"]
475472

476473
if max_output_tokens:
477474
# only set stop and ignore_eos if max_output_tokens set at request level

src/guidellm/config.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,6 @@ class OpenAISettings(BaseModel):
8888
base_url: str = "http://localhost:8000"
8989
max_output_tokens: int = 16384
9090
verify: bool = True
91-
max_output_key: dict[Literal["text_completions", "chat_completions"], str] = {
92-
"text_completions": "max_tokens",
93-
"chat_completions": "max_completion_tokens",
94-
}
9591

9692

9793
class ReportGenerationSettings(BaseModel):

tests/unit/conftest.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ async def _mock_completions_response(request) -> AsyncIterable[str]:
132132
assert payload["stream_options"] == {"include_usage": True}
133133
assert payload["prompt"] is not None
134134
assert len(payload["prompt"]) > 0
135+
assert payload["max_completion_tokens"] > 0
135136
assert payload["max_tokens"] > 0
136137

137138
return httpx.Response( # type: ignore
@@ -140,7 +141,7 @@ async def _mock_completions_response(request) -> AsyncIterable[str]:
140141
type_="text",
141142
prompt=payload["prompt"],
142143
output_token_count=(
143-
payload["max_tokens"]
144+
payload["max_completion_tokens"]
144145
if payload.get("ignore_eos", False)
145146
else None
146147
),
@@ -161,6 +162,7 @@ async def _mock_chat_completions_response(request):
161162
assert payload["messages"] is not None
162163
assert len(payload["messages"]) > 0
163164
assert payload["max_completion_tokens"] > 0
165+
assert payload["max_tokens"] > 0
164166

165167
return httpx.Response( # type: ignore
166168
200,

0 commit comments

Comments
 (0)