clip input shape to max tunable token count

rosenrodt · rosenrodt · commit 41901d13880c · 2026-01-02T12:18:10.000+08:00
Signed-off-by: Anthony Chang &lt;27950904+rosenrodt@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/autotuner.py b/tensorrt_llm/_torch/autotuner.py
@@ -1141,7 +1141,10 @@ def _optimization_profiles(
             # Add the current input value as one of the opt values
             opt_shapes = set(opt_shapes)
             opt_shapes.add(
-                base_profile.shapes[spec.input_idx][spec.dim_idx].val)
+                min(
+                    tuning_config.tune_max_num_tokens,
+                    base_profile.shapes[spec.input_idx][spec.dim_idx].val,
+                ))
             opt_shapes = sorted(list(opt_shapes))
             opt_shapes_max = tuple(opt_shapes[1:]) + (float('inf'), )
             opt_shapes_max = {