[ROCm][Bugfix] Use platform specific FP8 dtype (#15717)

gshtras · web-flow · commit 40a36ccfeb49 · 2025-04-04T09:40:20.000-07:00
Signed-off-by: Gregory Shtrasberg &lt;Gregory.Shtrasberg@amd.com&gt;
diff --git a/vllm/attention/ops/prefix_prefill.py b/vllm/attention/ops/prefix_prefill.py
@@ -753,7 +753,7 @@ def context_attention_fwd(q,
             assert (v_cache.dtype == torch.uint8)
 
             if kv_cache_dtype in ("fp8", "fp8_e4m3"):
-                target_dtype = torch.float8_e4m3fn
+                target_dtype = current_platform.fp8_dtype()
             elif kv_cache_dtype == "fp8_e5m2":
                 target_dtype = torch.float8_e5m2
             else: