31
31
TEXT_COMPLETIONS_PATH = "/v1/completions"
32
32
CHAT_COMPLETIONS_PATH = "/v1/chat/completions"
33
33
34
- EndpointType = Literal ["chat_completions" , "models" , "text_completions" ]
35
- CHAT_COMPLETIONS : EndpointType = "chat_completions"
34
+ CompletionEndpointType = Literal ["text_completions" , "chat_completions" ]
35
+ EndpointType = Union [Literal ["models" ], CompletionEndpointType ]
36
+ CHAT_COMPLETIONS : CompletionEndpointType = "chat_completions"
36
37
MODELS : EndpointType = "models"
37
- TEXT_COMPLETIONS : EndpointType = "text_completions"
38
+ TEXT_COMPLETIONS : CompletionEndpointType = "text_completions"
38
39
39
40
40
41
@Backend .register ("openai_http" )
@@ -447,7 +448,7 @@ def _extra_body(self, endpoint_type: EndpointType) -> dict[str, Any]:
447
448
448
449
def _completions_payload (
449
450
self ,
450
- endpoint_type : EndpointType ,
451
+ endpoint_type : CompletionEndpointType ,
451
452
orig_kwargs : Optional [dict ],
452
453
max_output_tokens : Optional [int ],
453
454
** kwargs ,
@@ -467,7 +468,10 @@ def _completions_payload(
467
468
self .__class__ .__name__ ,
468
469
max_output_tokens or self .max_output_tokens ,
469
470
)
470
- payload ["max_tokens" ] = max_output_tokens or self .max_output_tokens
471
+ max_output_key = settings .openai .max_output_key .get (
472
+ endpoint_type , "max_tokens"
473
+ )
474
+ payload [max_output_key ] = max_output_tokens or self .max_output_tokens
471
475
472
476
if max_output_tokens :
473
477
# only set stop and ignore_eos if max_output_tokens set at request level
0 commit comments