Skip to content

Commit b7bfd00

Browse files
authored
Decouple cutlass config version from flashinfer version (flashinfer-ai#1441)
1 parent 37cfb57 commit b7bfd00

File tree

3 files changed

+7
-238
lines changed

3 files changed

+7
-238
lines changed

flashinfer/autotuner.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,22 @@
1111

1212
import torch
1313

14-
from flashinfer import __version__ as flashinfer_version
15-
1614
# from tensorrt_llm.bindings.internal.runtime import delay_kernel
1715
# from tensorrt_llm.logger import logger
1816
from flashinfer.tllm_utils import delay_kernel
1917

2018
from .jit.core import logger
2119

20+
# This version should be updated whenever the nvfp4_cutlass backend is changed,
21+
# such as when new kernels or configs are added. In such cases, the tuning configs
22+
# should also be updated. Currently, this process is manual, but it should be automated in the future.
23+
_nvfp4_cutlass_version = "0.1"
24+
2225

2326
def get_config_path(is_module: bool):
2427
dev_name = torch.cuda.get_device_name(0).replace(" ", "_")
25-
fi_ver = flashinfer_version.replace(".", "_")
26-
config_name = f"v{fi_ver}_trtllm_fused_moe_{dev_name}"
28+
cutlass_ver = _nvfp4_cutlass_version.replace(".", "_")
29+
config_name = f"v{cutlass_ver}_trtllm_fused_moe_{dev_name}"
2730
if is_module:
2831
return f"flashinfer.tuning_configs.{config_name}"
2932
else:

flashinfer/tuning_configs/v0_2_8_trtllm_fused_moe_NVIDIA_B200.py

Lines changed: 0 additions & 234 deletions
This file was deleted.

0 commit comments

Comments
 (0)