clean code

sufubao · sufubao · commit e3a44a135077 · 2025-09-05T15:13:39.000+08:00
diff --git a/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_ep.py b/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_ep.py
@@ -21,8 +21,7 @@
 )
 from lightllm.common.fused_moe.deepep_scatter_gather import ep_scatter, ep_gather
 from lightllm.common.basemodel.triton_kernel.redundancy_topk_ids_repair import redundancy_topk_ids_repair
-from lightllm.utils.envs_utils import get_triton_autotune_level
-from lightllm.common.triton_utils.autotuner import AutotuneLevel
+from lightllm.utils.envs_utils import enable_triton_autotune
 from lightllm.utils.log_utils import init_logger
 
 logger = init_logger(__name__)
@@ -356,7 +355,7 @@ def prefilled_group_gemm(
             ######################################## warning ##################################################
             # here is used to match autotune feature, make moe model run same triton kernel in different rank.
             # in some special case, one rank will recv 0 token, so add a token to make it run triton kernel.
-            if get_triton_autotune_level() in [AutotuneLevel.ADAPTIVE_AUTOTUNE, AutotuneLevel.FORCE_AUTOTUNE]:
+            if enable_triton_autotune():
                 _gemm_out_a = torch.zeros((1, N), device=device, dtype=hidden_dtype)
                 _silu_out = torch.zeros((1, N // 2), device=device, dtype=hidden_dtype)
                 silu_and_mul_fwd(_gemm_out_a.view(-1, N), _silu_out)
diff --git a/lightllm/common/fused_moe/grouped_fused_moe_ep.py b/lightllm/common/fused_moe/grouped_fused_moe_ep.py
@@ -14,8 +14,7 @@
 )
 from lightllm.common.fused_moe.deepep_scatter_gather import ep_scatter, ep_gather
 from lightllm.utils.envs_utils import get_deepep_num_max_dispatch_tokens_per_rank
-from lightllm.utils.envs_utils import get_triton_autotune_level
-from lightllm.common.triton_utils.autotuner import AutotuneLevel
+from lightllm.utils.envs_utils import enable_triton_autotune
 import numpy as np
 
 logger = init_logger(__name__)
@@ -190,7 +189,7 @@ def fused_experts_impl(
             ######################################## warning ##################################################
             # here is used to match autotune feature, make moe model run same triton kernel in different rank.
             # in some special case, one rank will recv 0 token, so add a token to make it run triton kernel.
-            if get_triton_autotune_level() in [AutotuneLevel.ADAPTIVE_AUTOTUNE, AutotuneLevel.FORCE_AUTOTUNE]:
+            if enable_triton_autotune():
                 _gemm_out_a = torch.zeros((1, N), device=hidden_states.device, dtype=hidden_states.dtype)
                 _silu_out = torch.zeros((1, N // 2), device=hidden_states.device, dtype=hidden_states.dtype)
                 silu_and_mul_fwd(_gemm_out_a.view(-1, N), _silu_out)
diff --git a/lightllm/common/fused_moe/topk_select.py b/lightllm/common/fused_moe/topk_select.py
@@ -23,8 +23,7 @@
 from lightllm.utils.light_utils import light_ops
 from typing import Callable, List, Optional, Tuple
 from lightllm.common.fused_moe.softmax_topk import softmax_topk
-from lightllm.common.triton_utils.autotuner import AutotuneLevel
-from lightllm.utils.envs_utils import get_triton_autotune_level
+from lightllm.utils.envs_utils import enable_triton_autotune
 
 use_cuda_grouped_topk = os.getenv("LIGHTLLM_CUDA_GROUPED_TOPK", "False").upper() in ["ON", "TRUE", "1"]
 
@@ -225,7 +224,7 @@ def select_experts(
 
     ######################################## warning ##################################################
     # here is used to match autotune feature, make topk_ids more random
-    if get_triton_autotune_level() in [AutotuneLevel.ADAPTIVE_AUTOTUNE, AutotuneLevel.FORCE_AUTOTUNE]:
+    if enable_triton_autotune():
         rand_gen = torch.Generator(device="cuda")
         rand_gen.manual_seed(router_logits.shape[0])
         router_logits = torch.randn(size=router_logits.shape, generator=rand_gen, dtype=torch.float32, device="cuda")
diff --git a/lightllm/common/triton_utils/autotuner.py b/lightllm/common/triton_utils/autotuner.py
@@ -12,7 +12,7 @@
 from lightllm.utils.device_utils import get_current_device_name
 from lightllm.utils.log_utils import init_logger
 from typing import Callable, Optional, Union, List
-from lightllm.utils.envs_utils import get_triton_autotune_level
+from lightllm.utils.envs_utils import enable_triton_autotune, get_triton_autotune_level
 from lightllm.common.kernel_config import KernelConfigs
 from lightllm.utils.dist_utils import get_global_world_size, get_global_rank, get_current_rank_in_node
 
@@ -120,6 +120,7 @@ def __init__(
         self.run_key_distance_func = run_key_distance_func
         self.cached_configs = {}
         self.fast_match_configs = collections.defaultdict(dict)
+        self.warmuped_configs_set = set()
         self.arg_names = [param.name for param in inspect.signature(self.fn).parameters.values()]
         self._argname_to_pos = {name: idx for idx, name in enumerate(self.arg_names)}
         self._pos_to_argname = {idx: name for idx, name in enumerate(self.arg_names)}
@@ -160,7 +161,7 @@ def __call__(self, *args, **kwargs):
             for run_config in all_configs.values():
                 # warmup all configs
                 kwargs["run_config"] = run_config
-                self.kernel_warmup(*args, **kwargs)
+                self.kernel_warmup(static_key, *args, **kwargs)
 
         if static_key not in self.cached_configs and autotune_level == AutotuneLevel.USE_AUTOTUNE_HIS_CONFIG:
             if (dist.is_initialized() and get_current_rank_in_node() == 0) or not dist.is_initialized():
@@ -171,10 +172,7 @@ def __call__(self, *args, **kwargs):
                 )
             self.cached_configs[static_key] = {}
 
-        if (
-            autotune_level in [AutotuneLevel.ADAPTIVE_AUTOTUNE, AutotuneLevel.FORCE_AUTOTUNE]
-            and Autotuner.is_autotune_warmup()
-        ):
+        if enable_triton_autotune():
             need_tuning = (autotune_level == AutotuneLevel.FORCE_AUTOTUNE) or (
                 run_key not in self.cached_configs.get(static_key, {})
             )
@@ -218,11 +216,15 @@ def _try_load_cache(self, static_key):
                 self.cached_configs[static_key] = orjson.loads(f.read())
         return True
 
-    def kernel_warmup(self, *args, **kwargs):
+    def kernel_warmup(self, static_key, *args, **kwargs):
         new_args, new_kwargs, origin_list, new_list = self._mutate_args_clone(args, kwargs)
-
+        run_config = kwargs.get("run_config", {})
+        hash_key = str(frozendict(run_config)) + str(static_key)
+        if hash_key in self.warmuped_configs_set:
+            return
         try:
             self.fn(*new_args, **new_kwargs)
+            self.warmuped_configs_set.add(hash_key)
         except:
             pass
         finally:
diff --git a/lightllm/utils/envs_utils.py b/lightllm/utils/envs_utils.py
@@ -154,6 +154,15 @@ def get_triton_autotune_level():
     return int(os.getenv("LIGHTLLM_TRITON_AUTOTUNE_LEVEL", 0))
 
 
+def enable_triton_autotune():
+    from lightllm.common.triton_utils.autotuner import AutotuneLevel, Autotuner
+
+    return (
+        get_triton_autotune_level() in [AutotuneLevel.ADAPTIVE_AUTOTUNE, AutotuneLevel.FORCE_AUTOTUNE]
+        and Autotuner.is_autotune_warmup()
+    )
+
+
 g_model_init_done = False