@@ -166,8 +166,13 @@ def test_e2e_pangu_with_torchair():
166
166
167
167
def _qwen_torchair_test_fixture (
168
168
model ,
169
+ tp ,
169
170
enable_expert_parallel ,
170
171
):
172
+ # The current access control does not support 16 cards,
173
+ # so the MC2 operator in Qwen's graph mode cannot run.
174
+ # Once 16-card support is available,
175
+ # this e2e can be switched to graph mode.
171
176
example_prompts = [
172
177
"Hello, my name is" ,
173
178
"The president of the United States is" ,
@@ -177,7 +182,7 @@ def _qwen_torchair_test_fixture(
177
182
178
183
additional_config = {
179
184
"torchair_graph_config" : {
180
- "enabled" : True ,
185
+ "enabled" : False ,
181
186
},
182
187
"ascend_scheduler_config" : {
183
188
"enabled" : True ,
@@ -188,9 +193,9 @@ def _qwen_torchair_test_fixture(
188
193
with VllmRunner (
189
194
model ,
190
195
dtype = "half" ,
191
- tensor_parallel_size = 2 ,
196
+ tensor_parallel_size = tp ,
192
197
distributed_executor_backend = "mp" ,
193
- enforce_eager = False ,
198
+ enforce_eager = True ,
194
199
additional_config = additional_config ,
195
200
enable_expert_parallel = enable_expert_parallel ,
196
201
) as vllm_model :
@@ -214,8 +219,8 @@ def _qwen_torchair_test_fixture(
214
219
215
220
216
221
def test_e2e_qwen2_with_torchair ():
217
- _qwen_torchair_test_fixture ("Qwen/Qwen2.5-0.5B -Instruct" , False )
222
+ _qwen_torchair_test_fixture ("Qwen/Qwen2.5-32B -Instruct" , 2 , False )
218
223
219
224
220
225
def test_e2e_qwen3_moe_with_torchair ():
221
- _qwen_torchair_test_fixture ("Qwen/Qwen3-30B-A3B" , True )
226
+ _qwen_torchair_test_fixture ("Qwen/Qwen3-30B-A3B" , 2 , True )
0 commit comments