fix

sufubao · sufubao · commit 263b53a825d8 · 2025-09-05T16:42:05.000+08:00
diff --git a/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_ep.py b/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_ep.py
@@ -21,8 +21,9 @@
 )
 from lightllm.common.fused_moe.deepep_scatter_gather import ep_scatter, ep_gather
 from lightllm.common.basemodel.triton_kernel.redundancy_topk_ids_repair import redundancy_topk_ids_repair
-from lightllm.utils.envs_utils import enable_triton_autotune
 from lightllm.utils.log_utils import init_logger
+from lightllm.common.triton_utils.autotuner import Autotuner
+
 
 logger = init_logger(__name__)
 
@@ -355,7 +356,7 @@ def prefilled_group_gemm(
             ######################################## warning ##################################################
             # here is used to match autotune feature, make moe model run same triton kernel in different rank.
             # in some special case, one rank will recv 0 token, so add a token to make it run triton kernel.
-            if enable_triton_autotune():
+            if Autotuner.is_autotune_warmup():
                 _gemm_out_a = torch.zeros((1, N), device=device, dtype=hidden_dtype)
                 _silu_out = torch.zeros((1, N // 2), device=device, dtype=hidden_dtype)
                 silu_and_mul_fwd(_gemm_out_a.view(-1, N), _silu_out)
diff --git a/lightllm/common/fused_moe/grouped_fused_moe_ep.py b/lightllm/common/fused_moe/grouped_fused_moe_ep.py
@@ -14,7 +14,7 @@
 )
 from lightllm.common.fused_moe.deepep_scatter_gather import ep_scatter, ep_gather
 from lightllm.utils.envs_utils import get_deepep_num_max_dispatch_tokens_per_rank
-from lightllm.utils.envs_utils import enable_triton_autotune
+from lightllm.common.triton_utils.autotuner import Autotuner
 import numpy as np
 
 logger = init_logger(__name__)
@@ -189,7 +189,7 @@ def fused_experts_impl(
             ######################################## warning ##################################################
             # here is used to match autotune feature, make moe model run same triton kernel in different rank.
             # in some special case, one rank will recv 0 token, so add a token to make it run triton kernel.
-            if enable_triton_autotune():
+            if Autotuner.is_autotune_warmup():
                 _gemm_out_a = torch.zeros((1, N), device=hidden_states.device, dtype=hidden_states.dtype)
                 _silu_out = torch.zeros((1, N // 2), device=hidden_states.device, dtype=hidden_states.dtype)
                 silu_and_mul_fwd(_gemm_out_a.view(-1, N), _silu_out)
diff --git a/lightllm/common/fused_moe/topk_select.py b/lightllm/common/fused_moe/topk_select.py
@@ -23,7 +23,7 @@
 from lightllm.utils.light_utils import light_ops
 from typing import Callable, List, Optional, Tuple
 from lightllm.common.fused_moe.softmax_topk import softmax_topk
-from lightllm.utils.envs_utils import enable_triton_autotune
+from lightllm.common.triton_utils.autotuner import Autotuner
 
 use_cuda_grouped_topk = os.getenv("LIGHTLLM_CUDA_GROUPED_TOPK", "False").upper() in ["ON", "TRUE", "1"]
 
@@ -224,7 +224,7 @@ def select_experts(
 
     ######################################## warning ##################################################
     # here is used to match autotune feature, make topk_ids more random
-    if enable_triton_autotune():
+    if Autotuner.is_autotune_warmup():
         rand_gen = torch.Generator(device="cuda")
         rand_gen.manual_seed(router_logits.shape[0])
         router_logits = torch.randn(size=router_logits.shape, generator=rand_gen, dtype=torch.float32, device="cuda")
diff --git a/lightllm/common/triton_utils/autotuner.py b/lightllm/common/triton_utils/autotuner.py
@@ -12,7 +12,7 @@
 from lightllm.utils.device_utils import get_current_device_name
 from lightllm.utils.log_utils import init_logger
 from typing import Callable, Optional, Union, List
-from lightllm.utils.envs_utils import enable_triton_autotune, get_triton_autotune_level
+from lightllm.utils.envs_utils import get_triton_autotune_level
 from lightllm.common.kernel_config import KernelConfigs
 from lightllm.utils.dist_utils import get_global_world_size, get_global_rank, get_current_rank_in_node
 
@@ -172,7 +172,7 @@ def __call__(self, *args, **kwargs):
                 )
             self.cached_configs[static_key] = {}
 
-        if enable_triton_autotune():
+        if Autotuner.is_autotune_warmup():
             need_tuning = (autotune_level == AutotuneLevel.FORCE_AUTOTUNE) or (
                 run_key not in self.cached_configs.get(static_key, {})
             )
diff --git a/lightllm/utils/envs_utils.py b/lightllm/utils/envs_utils.py
@@ -154,15 +154,6 @@ def get_triton_autotune_level():
     return int(os.getenv("LIGHTLLM_TRITON_AUTOTUNE_LEVEL", 0))
 
 
-def enable_triton_autotune():
-    from lightllm.common.triton_utils.autotuner import AutotuneLevel, Autotuner
-
-    return (
-        get_triton_autotune_level() in [AutotuneLevel.ADAPTIVE_AUTOTUNE, AutotuneLevel.FORCE_AUTOTUNE]
-        and Autotuner.is_autotune_warmup()
-    )
-
-
 g_model_init_done = False