xx

wangxiaoxin-sherie · wangxiaoxin-sherie · commit 3f5eb78a3882 · 2025-08-20T11:38:06.000+08:00
diff --git a/tests/ut/ops/test_token_dispatcher.py b/tests/ut/ops/test_token_dispatcher.py
@@ -73,7 +73,6 @@ def test_initialization(self, dispatcher, config):
 class TestTokenDispatcherWithMC2(unittest.TestCase):
 
     def setUp(self):
-        # Mock get_mc2_group() 返回固定值
         self.mc2_group = mock.MagicMock()
         self.mc2_group.device_group.return_value._get_backend.return_value.get_hccl_comm_name.return_value = "hccl_123"
         self.mc2_group.rank_in_group = 0
@@ -110,7 +109,6 @@ def setUp(self):
         self.ascend_config_patch.start()
 
         kwargs = {"with_quant": False, "top_k": 8, "num_experts": 128}
-        # 初始化 TokenDispatcherWithMC2 实例
         self.dispatcher = TokenDispatcherWithMC2(**kwargs)
 
     def tearDown(self):
@@ -120,7 +118,6 @@ def tearDown(self):
         self.ascend_config_patch.stop()
 
     def test_init(self):
-        """测试 __init__ 初始化行为"""
         # self.assertEqual(self.dispatcher.moe_all_to_all_group_name, "hccl_123")
         self.assertEqual(self.dispatcher.ep_rank_id, 0)
         self.assertEqual(self.dispatcher.ep_world_size, 8)
@@ -131,7 +128,6 @@ def test_init(self):
         self.assertTrue(self.dispatcher.a3_need_extra_args)
 
     def test_get_permute_mc2_kwargs_without_quant(self):
-        """测试 get_permute_mc2_kwargs（无量化）"""
         hidden_states = torch.randn(10, 128)
         topk_ids = torch.randint(0, 8, (10, 1))
         topk_weights = torch.randn(10, 1)
@@ -144,7 +140,6 @@ def test_get_permute_mc2_kwargs_without_quant(self):
         self.assertEqual(kwargs["moe_expert_num"], 8)
 
     def test_token_permutation_dispatch(self):
-        """测试 token_permutation(使用 dispatch)"""
         hidden_states = torch.randn(10, 128)
         topk_weights = torch.randn(10, 1)
         topk_ids = torch.randint(0, 8, (10, 1))
@@ -160,7 +155,6 @@ def test_token_permutation_dispatch(self):
             self.assertEqual(output[0], 1)  # group_list_type == 1
 
     def test_token_permutation_with_shared_experts_and_quant(self):
-        """测试 token_permutation（有 shared_experts 且 with_quant=True）"""
         self.shared_experts = mock.MagicMock()
         self.shared_experts.gate_up_proj.return_value = (torch.randn(10, 128),
                                                          torch.tensor(1.0))
@@ -189,7 +183,6 @@ def test_token_permutation_with_shared_experts_and_quant(self):
                                               self.topk_weights)
 
     def test_get_unpermute_mc_kwargs_with_quant(self):
-        """测试 get_unpermute_mc_kwargs（with_quant=True）"""
         self.dispatcher.with_quant = True
         hidden_states = torch.randn(10, 128)
         self.dispatcher.topk_ids = torch.randint(0, 8, (10, 1))
@@ -198,6 +191,7 @@ def test_get_unpermute_mc_kwargs_with_quant(self):
         self.dispatcher.ep_recv_counts = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7])
         self.dispatcher.need_extra_args = True
         self.dispatcher.enable_dispatch_v2 = True
+        self.dispatcher.output = torch.randint(0, 8, (10, 1))
 
         kwargs = self.dispatcher.get_unpermute_mc_kwargs(hidden_states)
         self.assertIn("tp_send_counts", kwargs)
@@ -215,6 +209,7 @@ def test_token_unpermutation_with_shared_experts(self):
         self.dispatcher.need_extra_args = True
         self.dispatcher.enable_dispatch_v2 = True
         self.dispatcher.swiglu_out_scale = torch.randint(0, 8, (10, 1))
+        self.dispatcher.output = torch.randint(0, 8, (10, 1))
         self.hidden_states = torch.randn(10, 128)
 
         with mock.patch("torch_npu.npu_moe_distribute_combine_v2",
@@ -270,23 +265,6 @@ def tearDown(self):
         self.patcher_moe_compute_expert_tokens.stop()
         self.patcher_moe_finalize_routing.stop()
 
-    def test_token_permutation_with_expert_map(self):
-        self.dispatcher.expert_map = torch.tensor([0, 1, 2, 3])
-        hidden_states = torch.randn(3, 128)
-        topk_weights = torch.tensor([[0.7, 0.3], [0.6, 0.4], [0.5, 0.5]])
-        topk_ids = torch.tensor([[0, 1], [1, 2], [2, 3]])
-
-        group_list_type, sorted_hidden_states, expert_tokens = self.dispatcher.token_permutation(
-            hidden_states, topk_weights, topk_ids, self.dispatcher.expert_map)
-
-        # Verify expert_map logic is used
-        self.assertEqual(group_list_type, 0)
-        self.assertTrue(sorted_hidden_states.shape, (6, 128))
-
-        # Check if sorting and filtering were applied
-        self.assertIsNotNone(self.dispatcher.sorted_token_indices)
-        self.assertIsNotNone(self.dispatcher.sorted_weights)
-
     def test_token_permutation_without_expert_map(self):
         hidden_states = torch.randn(3, 128)
         topk_weights = torch.tensor([[0.7, 0.3], [0.6, 0.4], [0.5, 0.5]])
@@ -341,7 +319,11 @@ def test_token_unpermutation_without_expert_map(self):
         self.dispatcher.with_quant = False
         self.dispatcher.expanded_row_idx = torch.tensor([0, 1, 1, 1, 1, 1])
         self.dispatcher.topk_ids = torch.tensor([[0, 1], [1, 2], [2, 3]])
+        self.dispatcher.sorted_token_indices = torch.tensor([0, 1, 1, 1, 1, 1])
+        self.dispatcher.sorted_weights = torch.tensor(
+            [0.5, 0.5, 0.5, 0.5, 0.5, 0.5])
         self.dispatcher.original_shape = (3, 128)
+        self.dispatcher.mask = torch.tensor([0, 1, 1, 0])
         hidden_states = torch.randn(6, 128)
 
         final_hidden_states = self.dispatcher.token_unpermutation(