Skip to content

Commit 98deac3

Browse files
authored
[FEATURE] support custom vllm tuned config path for fused moe triton kernels (#22791)
Signed-off-by: Chi Zhang <[email protected]>
1 parent 653124b commit 98deac3

File tree

2 files changed

+26
-8
lines changed

2 files changed

+26
-8
lines changed

vllm/envs.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@
158158
VLLM_USE_TRTLLM_ATTENTION: Optional[str] = None
159159
VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8: bool = False
160160
VLLM_USE_FLASHINFER_MOE_MXFP4_BF16: bool = False
161+
VLLM_TUNED_CONFIG_FOLDER: Optional[str] = None
161162

162163

163164
def get_default_cache_root():
@@ -1120,6 +1121,11 @@ def get_vllm_port() -> Optional[int]:
11201121
# never removed from memory until the server terminates.
11211122
"VLLM_ENABLE_RESPONSES_API_STORE":
11221123
lambda: bool(int(os.getenv("VLLM_ENABLE_RESPONSES_API_STORE", "0"))),
1124+
1125+
# Allows vllm to find tuned config under customized folder
1126+
"VLLM_TUNED_CONFIG_FOLDER":
1127+
lambda: os.getenv("VLLM_TUNED_CONFIG_FOLDER", None),
1128+
11231129
}
11241130

11251131
# --8<-- [end:env-vars-definition]

vllm/model_executor/layers/fused_moe/fused_moe.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -701,20 +701,32 @@ def get_moe_configs(
701701
block_shape = [block_n, block_k] if block_n and block_k else None
702702
json_file_name = get_config_file_name(E, N, dtype, block_shape)
703703

704-
config_file_path = os.path.join(
704+
config_file_paths = []
705+
706+
# note that we prioritize user defined config
707+
user_defined_config_folder = envs.VLLM_TUNED_CONFIG_FOLDER
708+
if user_defined_config_folder is not None:
709+
user_defined_config_file_path = os.path.join(
710+
user_defined_config_folder, json_file_name)
711+
config_file_paths.append(user_defined_config_file_path)
712+
713+
default_config_file_path = os.path.join(
705714
os.path.dirname(os.path.realpath(__file__)), "configs", json_file_name)
706-
if os.path.exists(config_file_path):
707-
with open(config_file_path) as f:
708-
logger.info("Using configuration from %s for MoE layer.",
709-
config_file_path)
710-
# If a configuration has been found, return it
711-
return {int(key): val for key, val in json.load(f).items()}
715+
config_file_paths.append(default_config_file_path)
716+
717+
for config_file_path in config_file_paths:
718+
if os.path.exists(config_file_path):
719+
with open(config_file_path) as f:
720+
logger.info("Using configuration from %s for MoE layer.",
721+
config_file_path)
722+
# If a configuration has been found, return it
723+
return {int(key): val for key, val in json.load(f).items()}
712724

713725
# If no optimized configuration is available, we will use the default
714726
# configuration
715727
logger.warning(
716728
("Using default MoE config. Performance might be sub-optimal! "
717-
"Config file not found at %s"), config_file_path)
729+
"Config file not found at %s"), config_file_paths)
718730
return None
719731

720732

0 commit comments

Comments
 (0)