Skip to content

Commit 1473a26

Browse files
authored
[BACKEND] Enable vectorized fp8 cast on Ada GPUs (#6156)
``` cvt with .e4m3x2/.e5m2x2 requires sm89 or higher. cvt.satfinite.{e4m3x2, e5m2x2}.{f32, f16x2} requires sm_89 or higher. ```
1 parent 2ca510f commit 1473a26

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ElementwiseOpToLLVM.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -422,19 +422,19 @@ struct FpToFpOpConversion
422422
// F8 -> F16
423423
{{F8E4M3TyID, F16TyID, undefRounding}, Fp8E4M3Nv_to_Fp16},
424424
{{F8E5M2TyID, F16TyID, undefRounding},
425-
Fp8E5M2_to_Fp16(computeCapability >= 90)},
425+
Fp8E5M2_to_Fp16(computeCapability >= 89)},
426426
{{F16TyID, F8E4M3TyID, RoundingMode::RTNE}, Fp16_to_Fp8E4M3Nv},
427427
{{F16TyID, F8E5M2TyID, RoundingMode::RTNE},
428-
Fp16_to_Fp8E5M2_RTNE(computeCapability >= 90)},
428+
Fp16_to_Fp8E5M2_RTNE(computeCapability >= 89)},
429429
{{F16TyID, F8E5M2TyID, RoundingMode::RTZ}, Fp16_to_Fp8E5M2_RTZ},
430430
// F8 -> BF16
431431
{{F8E5M2TyID, BF16TyID, undefRounding},
432-
Fp8E5M2_to_Bf16(computeCapability >= 90)},
432+
Fp8E5M2_to_Bf16(computeCapability >= 89)},
433433
{{F8E4M3TyID, BF16TyID, undefRounding},
434-
Fp8E4M3Nv_to_Bf16(computeCapability >= 90)},
434+
Fp8E4M3Nv_to_Bf16(computeCapability >= 89)},
435435
// BF16 -> F8
436436
{{BF16TyID, F8E5M2TyID, RoundingMode::RTNE},
437-
Bf16_to_Fp8E5M2(computeCapability >= 90)},
437+
Bf16_to_Fp8E5M2(computeCapability >= 89)},
438438
{{BF16TyID, F8E4M3TyID, RoundingMode::RTNE}, Bf16_to_Fp8E4M3Nv},
439439
// F32 -> F8
440440
{{F32TyID, F8E4M3TyID, RoundingMode::RTNE}, Fp32_to_Fp8E4M3Nv},

0 commit comments

Comments
 (0)