We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9b131ae commit e34fd18Copy full SHA for e34fd18
vllm/model_executor/models/llama.py
@@ -215,6 +215,8 @@ def __init__(
215
and quant_config.is_fp8_w8a8())
216
self.attn_fp8_out = (envs.VLLM_USE_ROCM_CUSTOM_PAGED_ATTN_FP8_OUT
217
and current_platform.is_fp8_fnuz() and use_fp8)
218
+ if envs.VLLM_USE_V1 and not envs.VLLM_V1_USE_PREFILL_DECODE_ATTENTION:
219
+ self.attn_fp8_out = False
220
221
self.attn = Attention(
222
self.num_heads,
0 commit comments