diff --git a/tests/ut/ops/test_fused_ops.py b/tests/ut/ops/test_fused_ops.py index a5bdfe225d..ad95403d42 100644 --- a/tests/ut/ops/test_fused_ops.py +++ b/tests/ut/ops/test_fused_ops.py @@ -258,7 +258,9 @@ def get_fused_moe_quant_config(self, layer: torch.nn.Module): class TestAscendFusedMoe: - def test_init_no_quant(self, mock_dist_env, default_moe_config): + @patch('torch.npu.is_available') + @patch("torch.npu.current_device", return_value=MagicMock()) + def test_init_no_quant(self, mock_dist_env, default_moe_config, mocker): layer = AscendFusedMoE(**default_moe_config) layer.w13_weight = nn.Parameter( diff --git a/vllm_ascend/ops/common_fused_moe.py b/vllm_ascend/ops/common_fused_moe.py index ac22b69bcc..f3bdb15bd0 100644 --- a/vllm_ascend/ops/common_fused_moe.py +++ b/vllm_ascend/ops/common_fused_moe.py @@ -173,7 +173,7 @@ def __init__(self, *args, **kwargs): self.global_redundant_expert_num) self.log2phy = determine_default_log2phy_map( self.global_num_experts, self.ep_size, self.ep_rank, - self.global_redundant_expert_num) + self.global_redundant_expert_num).npu() local_num_experts = (torch.sum( self.expert_map != -1) if self.expert_map is not None else self.global_num_experts) diff --git a/vllm_ascend/ops/fused_moe.py b/vllm_ascend/ops/fused_moe.py index 97489f9ac3..3d87c5eb8b 100644 --- a/vllm_ascend/ops/fused_moe.py +++ b/vllm_ascend/ops/fused_moe.py @@ -264,7 +264,7 @@ def __init__( self.global_redundant_expert_num) self.log2phy = determine_default_log2phy_map( self.global_num_experts, self.ep_size, self.ep_rank, - self.global_redundant_expert_num) + self.global_redundant_expert_num).npu() local_num_experts = (torch.sum(self.expert_map != -1) if self.expert_map is not None else num_experts) if self.dynamic_eplb: diff --git a/vllm_ascend/torchair/ops/torchair_fused_moe.py b/vllm_ascend/torchair/ops/torchair_fused_moe.py index bd25a79562..1b0d939f7a 100644 --- a/vllm_ascend/torchair/ops/torchair_fused_moe.py +++ b/vllm_ascend/torchair/ops/torchair_fused_moe.py @@ -1046,7 +1046,7 @@ def __init__( self.global_redundant_expert_num) self.log2phy = determine_default_log2phy_map( self.global_num_experts, self.ep_size, self.ep_rank, - self.global_redundant_expert_num) + self.global_redundant_expert_num).npu() local_num_experts = (torch.sum(self.expert_map != -1) if self.expert_map is not None else num_experts) if self.dynamic_eplb: