【Fix Bug】fix startend_row_indices bug (#2565)

cheng221 · web-flow · commit 14f2c1c0d24a · 2025-09-08T22:35:10.000+08:00
diff --git a/paddleformers/transformers/llama/fusion_ops.py b/paddleformers/transformers/llama/fusion_ops.py
@@ -248,16 +248,14 @@ def fusion_flash_attention(
             else:
                 if attn_mask_startend_row_indices is not None:
                     assert alibi is None, "flashmask_attention or flash_attention_with_sparse_mask not support alibi"
-                    if len(attn_mask_startend_row_indices.shape) == 2:
-                        attn_mask_startend_row_indices = paddle.unsqueeze(attn_mask_startend_row_indices, axis=1)
 
                     if hasattr(F, "flashmask_attention"):
                         attn_output = no_recompute(
                             F.flashmask_attention,
                             query_states,
                             key_states,
                             value_states,
-                            startend_row_indices=attn_mask_startend_row_indices.unsqueeze(-1),
+                            startend_row_indices=attn_mask_startend_row_indices,
                             causal=True,
                             enable=skip_recompute,
                         )