Skip to content

Commit 1eee250

Browse files
committed
fix_A3_ACLgraph_sizes_capture_bug_and_add_new_ut
Signed-off-by: lilinsiman <[email protected]>
1 parent 992271b commit 1eee250

File tree

2 files changed

+56
-11
lines changed

2 files changed

+56
-11
lines changed

tests/e2e/multicard/test_qwen3_moe.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
"""
2323

2424
from modelscope import snapshot_download # type: ignore
25-
25+
import os
2626
from tests.e2e.conftest import VllmRunner
2727

2828

@@ -72,3 +72,36 @@ def test_models_distributed_Qwen3_MOE_W8A8():
7272
enforce_eager=False,
7373
) as vllm_model:
7474
vllm_model.generate_greedy(example_prompts, max_tokens)
75+
76+
77+
def test_models_distributed_Qwen3_MOE_TP2_WITH_ACLGRAPH_AIV():
78+
os.environ['HCCL_OP_EXPANSION_MODE'] = 'AIV'
79+
example_prompts = [
80+
"Hello, my name is",
81+
]
82+
dtype = "auto"
83+
max_tokens = 5
84+
with VllmRunner(
85+
"Qwen/Qwen3-30B-A3B",
86+
dtype=dtype,
87+
tensor_parallel_size=2,
88+
enforce_eager=False,
89+
) as vllm_model:
90+
vllm_model.generate_greedy(example_prompts, max_tokens)
91+
92+
93+
def test_models_distributed_Qwen3_MOE_TP2_WITH_ACLGRAPH():
94+
if 'HCCL_OP_EXPANSION_MODE' in os.environ:
95+
del os.environ['HCCL_OP_EXPANSION_MODE']
96+
example_prompts = [
97+
"Hello, my name is",
98+
]
99+
dtype = "auto"
100+
max_tokens = 5
101+
with VllmRunner(
102+
"Qwen/Qwen3-30B-A3B",
103+
dtype=dtype,
104+
tensor_parallel_size=2,
105+
enforce_eager=False,
106+
) as vllm_model:
107+
vllm_model.generate_greedy(example_prompts, max_tokens)

vllm_ascend/utils.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# Adapted from vllm-project/vllm/vllm/worker/worker.py
1818
#
1919

20+
import os
2021
import atexit
2122
import functools
2223
import math
@@ -325,17 +326,28 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
325326
num_hidden_layers = get_max_hidden_layers(hf_config)
326327
parallel_config = vllm_config.parallel_config
327328

329+
if os.getenv("HCCL_OP_EXPANSION_MODE")=='AIV':
328330
# TODO: Find out whether we need to take into account the pp_size
329-
parallel_factor = 1 + sum(size > 1 for size in [
330-
parallel_config.data_parallel_size_local,
331-
parallel_config.tensor_parallel_size,
332-
])
333-
334-
# Calculate maximum supported batch sizes considering model architecture
335-
max_num_batch_sizes = math.floor(MAX_CAPTURE_SIZE /
336-
(num_hidden_layers + 1) / parallel_factor)
337-
logger.info("Calculated maximum supported batch sizes for ACL graph: %s",
338-
max_num_batch_sizes)
331+
parallel_factor = 1 + sum(size > 1 for size in [
332+
parallel_config.data_parallel_size,
333+
parallel_config.tensor_parallel_size,
334+
])
335+
336+
# Calculate maximum supported batch sizes considering model architecture
337+
max_num_batch_sizes = math.floor(MAX_CAPTURE_SIZE /
338+
(num_hidden_layers + 1) / parallel_factor)
339+
logger.info("Calculated maximum supported batch sizes for ACL graph: %s",
340+
max_num_batch_sizes)
341+
else:
342+
num_comm_groups = sum(size > 1 for size in [
343+
parallel_config.data_parallel_size,
344+
parallel_config.tensor_parallel_size,
345+
])
346+
347+
max_num_batch_sizes = math.floor((MAX_CAPTURE_SIZE - num_comm_groups * 40) / (num_hidden_layers + 1) / (1 + num_comm_groups * 2))
348+
logger.info("Calculated maximum supported batch sizes for ACL graph: %s",
349+
max_num_batch_sizes)
350+
logger.warning("Unset HCCL_OP_EXPANSION_MODE prevents max size capture. Setting HCCL_OP_EXPANSION_MODE=AIV captures max sizes and boosts ACL graph performance.")
339351

340352
# If original sizes exceed maximum, sample a representative subset
341353
if max_num_batch_sizes < len(original_sizes):

0 commit comments

Comments
 (0)