Fix redundant kernels in moe (#1428)

fzyzcjy · web-flow · commit e8dca1f3a7df · 2025-08-08T09:48:26.000-07:00
## 📌 Description  ## 🔍 Related Issues  ## 🚀 Pull Request Checklist Thank you for contributing to FlashInfer! Before we review your pull request, please make sure the following items are complete. ### ✅ Pre-commit Checks - [ ] I have installed `pre-commit` by running `pip install pre-commit` (or used your preferred method). - [ ] I have installed the hooks with `pre-commit install`. - [ ] I have run the hooks manually with `pre-commit run --all-files` and fixed any reported issues. > If you are unsure about how to set up `pre-commit`, see [the pre-commit documentation](https://pre-commit.com/). ## 🧪 Tests - [ ] Tests have been added or updated as needed. - [ ] All tests are passing (`unittest`, etc.). ## Reviewer Notes
diff --git a/flashinfer/fused_moe/core.py b/flashinfer/fused_moe/core.py
@@ -1006,15 +1006,15 @@ def trtllm_fp4_block_scale_moe_op(
 
         # workspace buffers required by trtllm-gen
         if topk_ids is None:
-            topk_ids = torch.zeros(
+            topk_ids = torch.empty(
                 num_tokens, top_k, dtype=torch.int32, device=hidden_states.device
             )
         if expert_weights is None:
-            expert_weights = torch.zeros(
+            expert_weights = torch.empty(
                 num_tokens, top_k, dtype=routing_dtype, device=hidden_states.device
             )
         if output is None:
-            output = torch.zeros(
+            output = torch.empty(
                 num_tokens,
                 hidden_size,
                 dtype=torch.bfloat16,