We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4121b67 commit 2a4eb96Copy full SHA for 2a4eb96
tensorrt_llm/_torch/autotuner.py
@@ -1140,8 +1140,15 @@ def _optimization_profiles(
1140
opt_shapes = spec.gen_tuning_buckets
1141
# Add the current input value as one of the opt values
1142
opt_shapes = set(opt_shapes)
1143
- opt_shapes.add(
1144
- base_profile.shapes[spec.input_idx][spec.dim_idx].val)
+ if tuning_config.tune_max_num_tokens is not None:
+ opt_shapes.add(
1145
+ min(
1146
+ tuning_config.tune_max_num_tokens,
1147
+ base_profile.shapes[spec.input_idx][spec.dim_idx].val,
1148
+ ))
1149
+ else:
1150
1151
+ base_profile.shapes[spec.input_idx][spec.dim_idx].val)
1152
opt_shapes = sorted(list(opt_shapes))
1153
opt_shapes_max = tuple(opt_shapes[1:]) + (float('inf'), )
1154
opt_shapes_max = {
0 commit comments