Skip to content

Commit 39db419

Browse files
authored
Merge pull request #171 from foundation-model-stack/fp8_paged_prefill_opt
feat: Change paged FP8 prefill back to regular attention
2 parents f9ca98a + f86feed commit 39db419

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

fms_mo/aiu_addons/fp8/fp8_attn.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
# Third Party
3030
from fms.modules.attention import (
3131
AttentionKwargs,
32+
_sdpa_compute_op,
3233
_sdpa_update_attn_kwargs,
3334
register_attention_op,
3435
)
@@ -340,7 +341,7 @@ def __spyre_scaled_paged_validate_attn_kwargs_op(
340341
register_attention_op(
341342
"spyre_paged_attn_fp8",
342343
_spyre_scaled_paged_store_op,
343-
compute_op=_math_fp8_compute_op,
344+
compute_op=_sdpa_compute_op,
344345
is_prefill_op=lambda **attn_kwargs: attn_kwargs.get("block_table", None)
345346
is None,
346347
compute_decode_op=_spyre_scaled_paged_compute_op,

0 commit comments

Comments
 (0)