We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
2 parents b749aa5 + e705c94 commit 1606cffCopy full SHA for 1606cff
src/engine_args.py
@@ -288,6 +288,13 @@ def get_engine_args():
288
289
# Set max_num_batched_tokens to max_model_len for unlimited batching.
290
# vLLM defaults max_num_batched_tokens to 2048 when None, which is too low.
291
+
292
+ if args.get("max_model_len") == 0:
293
+ args["max_model_len"] = None
294
295
+ if args.get("max_num_batched_tokens") == 0:
296
+ args["max_num_batched_tokens"] = None
297
298
if args.get("max_num_batched_tokens") is None:
299
max_model_len = args.get("max_model_len")
300
if max_model_len is None:
0 commit comments