Skip to content

Commit 2a4eb96

Browse files
committed
clip input shape to max tunable token count
Signed-off-by: Anthony Chang <[email protected]>
1 parent 4121b67 commit 2a4eb96

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

tensorrt_llm/_torch/autotuner.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1140,8 +1140,15 @@ def _optimization_profiles(
11401140
opt_shapes = spec.gen_tuning_buckets
11411141
# Add the current input value as one of the opt values
11421142
opt_shapes = set(opt_shapes)
1143-
opt_shapes.add(
1144-
base_profile.shapes[spec.input_idx][spec.dim_idx].val)
1143+
if tuning_config.tune_max_num_tokens is not None:
1144+
opt_shapes.add(
1145+
min(
1146+
tuning_config.tune_max_num_tokens,
1147+
base_profile.shapes[spec.input_idx][spec.dim_idx].val,
1148+
))
1149+
else:
1150+
opt_shapes.add(
1151+
base_profile.shapes[spec.input_idx][spec.dim_idx].val)
11451152
opt_shapes = sorted(list(opt_shapes))
11461153
opt_shapes_max = tuple(opt_shapes[1:]) + (float('inf'), )
11471154
opt_shapes_max = {

0 commit comments

Comments
 (0)