add test case for trtllm gen fused moe with kimi k2 problem sizes (#1768)

nv-yunzheq · web-flow · commit 1bb72595f937 · 2025-09-24T23:29:20.000-07:00
This is to add kimi k2 problem size to unit test. num of experts is 384 top_k is 8 The support for kimi k2 moe is added w/ [PR 1696](#1696) ## 📌 Description  ## 🔍 Related Issues  ## 🚀 Pull Request Checklist Thank you for contributing to FlashInfer! Before we review your pull request, please make sure the following items are complete. ### ✅ Pre-commit Checks - [x] I have installed `pre-commit` by running `pip install pre-commit` (or used your preferred method). - [x] I have installed the hooks with `pre-commit install`. - [x] I have run the hooks manually with `pre-commit run --all-files` and fixed any reported issues. > If you are unsure about how to set up `pre-commit`, see [the pre-commit documentation](https://pre-commit.com/). ## 🧪 Tests - [x] Tests have been added or updated as needed. - [ ] All tests are passing (`unittest`, etc.). ## Reviewer Notes
diff --git a/tests/test_trtllm_gen_fused_moe.py b/tests/test_trtllm_gen_fused_moe.py
@@ -1853,6 +1853,23 @@ def cache_permute_indices():
 @pytest.mark.parametrize(
     "routing_config",
     [
+        pytest.param(
+            {
+                "num_experts": 384,
+                "top_k": 8,
+                "padding": 8,
+                "n_groups": 12,
+                "top_k_groups": 4,
+                "routed_scaling": 2.5,
+                "has_routing_bias": True,
+                "routing_method_type": RoutingMethodType.DeepSeekV3,
+                "compatible_moe_impls": [
+                    FP4Moe,
+                    FP8BlockScaleMoe,
+                ],
+            },
+            id="kimi_k2",
+        ),
         pytest.param(
             {
                 "num_experts": 256,