Skip to content

Commit 8a414bd

Browse files
Fix max token input (#478)
Signed-off-by: Onur Yilmaz <[email protected]>
1 parent 0d8d86b commit 8a414bd

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

nemo_export/tensorrt_llm_deployable_ray.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ async def completions(self, request: Dict[Any, Any]):
123123
# Prepare inference inputs with proper parameter mapping
124124
inference_inputs = {
125125
"prompts": request.get("prompts", []),
126-
"max_length": request.get("max_tokens", 256),
126+
"max_output_len": request.get("max_tokens", 256),
127127
"temperature": request.get("temperature", 1.0),
128128
"top_k": request.get("top_k", 0),
129129
"top_p": request.get("top_p", 0.0),
@@ -197,7 +197,7 @@ async def chat_completions(self, request: Dict[Any, Any]):
197197

198198
inference_inputs = {
199199
"prompts": [messages], # Wrap messages in a list so apply_chat_template gets the full conversation
200-
"max_length": request.get("max_tokens", 256),
200+
"max_output_len": request.get("max_tokens", 256),
201201
"temperature": request.get("temperature", 1.0),
202202
"top_k": request.get("top_k", 0),
203203
"top_p": request.get("top_p", 0.0),
@@ -248,7 +248,7 @@ async def chat_completions(self, request: Dict[Any, Any]):
248248
),
249249
"finish_reason": (
250250
"length"
251-
if generated_texts and len(str(generated_texts[0])) >= inference_inputs["max_length"]
251+
if generated_texts and len(str(generated_texts[0])) >= inference_inputs["max_output_len"]
252252
else "stop"
253253
),
254254
}

0 commit comments

Comments
 (0)