We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 2a99f68 commit 349a945Copy full SHA for 349a945
vllm_ascend/attention/context_parallel/sfa_cp.py
@@ -258,7 +258,7 @@ def _execute_sparse_flash_attention_process(
258
259
def _align_to_graph_bucket_tokens(self, attn_output: torch.Tensor | None, attn_metadata: M) -> torch.Tensor | None:
260
if attn_output is None or self.pcp_size == 1:
261
- return None
+ return attn_output
262
# In graph/piecewise mode, output buffer uses graph bucket token size
263
# (forward_context.num_tokens), while PCP path may compute only valid
264
# tokens. Align to the larger one to avoid later write-back mismatch.
0 commit comments