yongwww
diff --git a/‎flashinfer/autotuner.py
Lines changed: 7 additions & 4 deletions b/‎flashinfer/autotuner.py
Lines changed: 7 additions & 4 deletions
diff --git a/‎flashinfer/tuning_configs/v0_2_9_trtllm_fused_moe_NVIDIA_B200.py renamed to ‎flashinfer/tuning_configs/v0_1_trtllm_fused_moe_NVIDIA_B200.py b/‎flashinfer/tuning_configs/v0_2_9_trtllm_fused_moe_NVIDIA_B200.py renamed to ‎flashinfer/tuning_configs/v0_1_trtllm_fused_moe_NVIDIA_B200.py
diff --git a/‎flashinfer/tuning_configs/v0_2_8_trtllm_fused_moe_NVIDIA_B200.py
Lines changed: 0 additions & 234 deletions b/‎flashinfer/tuning_configs/v0_2_8_trtllm_fused_moe_NVIDIA_B200.py
Lines changed: 0 additions & 234 deletions
@@ -11,19 +11,22 @@
 
 import torch
 
-from flashinfer import __version__ as flashinfer_version
-
 # from tensorrt_llm.bindings.internal.runtime import delay_kernel
 # from tensorrt_llm.logger import logger
 from flashinfer.tllm_utils import delay_kernel
 
 from .jit.core import logger
 
+# This version should be updated whenever the nvfp4_cutlass backend is changed,
+# such as when new kernels or configs are added. In such cases, the tuning configs
+# should also be updated. Currently, this process is manual, but it should be automated in the future.
+_nvfp4_cutlass_version = "0.1"
+
 
 def get_config_path(is_module: bool):
     dev_name = torch.cuda.get_device_name(0).replace(" ", "_")
-    fi_ver = flashinfer_version.replace(".", "_")
-    config_name = f"v{fi_ver}_trtllm_fused_moe_{dev_name}"
+    cutlass_ver = _nvfp4_cutlass_version.replace(".", "_")
+    config_name = f"v{cutlass_ver}_trtllm_fused_moe_{dev_name}"
     if is_module:
         return f"flashinfer.tuning_configs.{config_name}"
     else: