Skip to content

Commit d4a3fbe

Browse files
author
p00465316
committed
fix e2e
Signed-off-by: p00465316 <[email protected]>
1 parent d2374c0 commit d4a3fbe

File tree

3 files changed

+6
-6
lines changed

3 files changed

+6
-6
lines changed

vllm_ascend/attention/attention_v1_torchair.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -378,8 +378,9 @@ def forward(
378378
shape = [batch_size * seq_len, num_heads, head_size]
379379
"""
380380
num_tokens = query.shape[0]
381-
use_kv_cache_quant = len(
382-
kv_cache) > 0 and kv_cache[0].dtype == torch.int8
381+
use_kv_cache_quant = (kv_cache is not None and len(kv_cache) > 0
382+
and kv_cache[0].numel() > 0
383+
and kv_cache[0].dtype == torch.int8)
383384
if output is None:
384385
output = torch.empty(num_tokens,
385386
self.num_heads,

vllm_ascend/models/qwen3_moe.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -373,9 +373,7 @@ def forward(
373373

374374
if not self.use_aclgraph:
375375
hidden_states = self.mlp(
376-
hidden_states,
377-
attn_metadata,
378-
_metadata_for_padding=_metadata_for_padding)
376+
hidden_states, _metadata_for_padding=_metadata_for_padding)
379377
else:
380378
hidden_states = self.mlp(hidden_states)
381379

vllm_ascend/ops/rotary_embedding.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,8 @@ def rope_forward(
305305
is_prefill: Optional[bool] = True,
306306
is_qwen_torchair: Optional[bool] = False,
307307
):
308-
if not get_ascend_config().torchair_graph_config.enabled or is_prefill:
308+
if (not get_ascend_config().torchair_graph_config.enabled
309+
or not is_qwen_torchair or is_prefill):
309310
return rope_forward_oot(self, positions, query, key, offsets,
310311
is_neox_style_override,
311312
is_qwen_torchair) # type: ignore

0 commit comments

Comments
 (0)