bugfix: Fix FusedMoeRunner does not exist error (flashinfer-ai#1424)

nvpohanh · yzh119 · web-flow · commit 9c3b746de2a3 · 2025-08-08T16:57:22.000-07:00
Fix `AttributeError: FusedMoeRunner does not exist` error when running cutlass MoE. Broken by flashinfer-ai#1201  ## 📌 Description  ## 🔍 Related Issues  ## 🚀 Pull Request Checklist Thank you for contributing to FlashInfer! Before we review your pull request, please make sure the following items are complete. ### ✅ Pre-commit Checks - [x] I have installed `pre-commit` by running `pip install pre-commit` (or used your preferred method). - [x] I have installed the hooks with `pre-commit install`. - [x] I have run the hooks manually with `pre-commit run --all-files` and fixed any reported issues. > If you are unsure about how to set up `pre-commit`, see [the pre-commit documentation](https://pre-commit.com/). ## 🧪 Tests - [x] Tests have been added or updated as needed. - [x] All tests are passing (`unittest`, etc.). ## Reviewer Notes  --------- Signed-off-by: Po-Han Huang <pohanh@nvidia.com> Co-authored-by: Zihao Ye <expye@outlook.com>
diff --git a/flashinfer/fused_moe/core.py b/flashinfer/fused_moe/core.py
@@ -266,7 +266,7 @@ def gen_cutlass_fused_moe_sm100_module(use_fast_build: bool = False) -> JitSpec:
 
 @functools.cache
 def get_cutlass_fused_moe_sm100_module(use_fast_build: bool = False):
-    module = gen_cutlass_fused_moe_sm100_module(use_fast_build).build_and_load(
+    gen_cutlass_fused_moe_sm100_module(use_fast_build).build_and_load(
         class_name="FusedMoeRunner"
     )
 
@@ -329,14 +329,17 @@ def __init__(
             )
 
             if instance_key not in MoERunner.runner_dict:
-                MoERunner.runner_dict[instance_key] = module.FusedMoeRunner(
-                    x_dtype,
-                    weight_dtype,
-                    output_dtype,
-                    use_deepseek_fp8_block_scale,
-                    use_w4a8_group_scaling,
-                    use_mxfp8_act_scaling,
+                MoERunner.runner_dict[instance_key] = (
+                    torch.classes.fused_moe_sm100.FusedMoeRunner(
+                        x_dtype,
+                        weight_dtype,
+                        output_dtype,
+                        use_deepseek_fp8_block_scale,
+                        use_w4a8_group_scaling,
+                        use_mxfp8_act_scaling,
+                    )
                 )
+
             self.fused_moe_runner = MoERunner.runner_dict[instance_key]
 
         def get_valid_tactics(