Skip to content

Commit bc25fff

Browse files
[#9496][fix] AutoDeploy: remove auto-tuner from nvfp4_gemm forward (#9497)
Signed-off-by: Neta Zmora <[email protected]>
1 parent d69bf9f commit bc25fff

File tree

1 file changed

+3
-6
lines changed
  • tensorrt_llm/_torch/auto_deploy/custom_ops

1 file changed

+3
-6
lines changed

tensorrt_llm/_torch/auto_deploy/custom_ops/quant.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
from flashinfer import bmm_fp8
88
from torch import nn
99

10-
from tensorrt_llm._torch.autotuner import autotune
11-
1210
from ..distributed import common as dist
1311
from ..distributed import trtllm as trtllm_dist
1412
from .torch_libs.float8_python_api import addmm_float8_unwrapped
@@ -336,10 +334,9 @@ def nvfp4_linear(
336334
x_fp4, x_sf_block = torch.ops.trtllm.fp4_quantize(
337335
input, input_scale, TRTLLM_NVFP4_SCALING_VECTOR_SIZE, False
338336
)
339-
with autotune():
340-
output = torch.ops.trtllm.nvfp4_gemm(
341-
x_fp4, weight_fp4, x_sf_block, weight_scale, alpha, input.dtype
342-
)
337+
output = torch.ops.trtllm.nvfp4_gemm(
338+
x_fp4, weight_fp4, x_sf_block, weight_scale, alpha, input.dtype
339+
)
343340

344341
if bias is not None:
345342
output = output + bias

0 commit comments

Comments
 (0)