[Bugfix]fix ds3.2+dcp

weiguihua2 · weiguihua2 · commit 349a9456b930 · 2026-03-25T10:22:49.000+08:00
Signed-off-by: weiguihua2 &lt;weiguihua2@huawei.com&gt;
diff --git a/vllm_ascend/attention/context_parallel/sfa_cp.py b/vllm_ascend/attention/context_parallel/sfa_cp.py
@@ -258,7 +258,7 @@ def _execute_sparse_flash_attention_process(
 
     def _align_to_graph_bucket_tokens(self, attn_output: torch.Tensor | None, attn_metadata: M) -> torch.Tensor | None:
         if attn_output is None or self.pcp_size == 1:
-            return None
+            return attn_output
         # In graph/piecewise mode, output buffer uses graph bucket token size
         # (forward_context.num_tokens), while PCP path may compute only valid
         # tokens. Align to the larger one to avoid later write-back mismatch.