diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 7f0a9f7983ecf..01866fbd9da6e 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -588,8 +588,10 @@ bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) { Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 || Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 || Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 || - Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 || - Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12; + Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 || + Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 || + Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 || + Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12; } bool isGenericAtomic(unsigned Opc) { diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 4d550644504a7..c743eb43e3465 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -634,17 +634,16 @@ let SubtargetPredicate = HasFP8ConversionInsts, OtherPredicates = [HasSDWA] in { } } - -// Similar to VOPProfile_Base_CVT_F32_F8, but for VOP3 instructions. -def VOPProfile_Base_CVT_PK_F32_F8_OpSel : VOPProfile<[v2f32, i32, untyped, untyped]> { - let HasOpSel = 1; - let HasClamp = 0; - let HasOMod = 0; - let HasExtDPP = 0; - let HasExtVOP3DPP = 0; - let AsmVOP3Base = getAsmVOP3Base.ret; +let HasClamp = 0, HasOMod = 0, HasExtDPP = 0, HasExtVOP3DPP = 0, + HasOpSel = 1 in { + // Input modifiers are not supported + // NB: fake16 VOP1 does not support op_sel. + def VOPProfile_Base_CVT_PK_F32_F8_fake16 : VOPProfile_Fake16> { + let Src0Mod = IntT16InputMods<1/*IsFake16*/>; + } + def VOPProfile_Base_CVT_PK_F32_F8_t16 : VOPProfile_True16> { + let Src0Mod = IntT16InputMods<0/*IsFake16*/>; + } } class VOPProfile_Base_CVT_F_F8_ByteSel : VOPProfile<[DstVT, i32, untyped, untyped]> { @@ -673,8 +672,15 @@ let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in { defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F_F8_ByteSel>; defm V_CVT_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_f32_bf8_op_sel", VOPProfile_Base_CVT_F_F8_ByteSel>; - defm V_CVT_PK_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_fp8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>; - defm V_CVT_PK_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_bf8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>; + + let True16Predicate = UseFakeTrue16Insts in { + defm V_CVT_PK_F32_FP8_fake16 : VOP1Inst<"v_cvt_pk_f32_fp8_fake16", VOPProfile_Base_CVT_PK_F32_F8_fake16>; + defm V_CVT_PK_F32_BF8_fake16 : VOP1Inst<"v_cvt_pk_f32_bf8_fake16", VOPProfile_Base_CVT_PK_F32_F8_fake16>; + } + let True16Predicate = UseRealTrue16Insts in { + defm V_CVT_PK_F32_FP8_t16 : VOP1Inst<"v_cvt_pk_f32_fp8_t16", VOPProfile_Base_CVT_PK_F32_F8_t16>; + defm V_CVT_PK_F32_BF8_t16 : VOP1Inst<"v_cvt_pk_f32_bf8_t16", VOPProfile_Base_CVT_PK_F32_F8_t16>; + } } class Cvt_F_F8_Pat_ByteSel : GCNPat< @@ -698,9 +704,9 @@ class Cvt_PK_F32_F8_Pat_OpSel; + V_CVT_PK_F32_FP8_fake16_e32, V_CVT_PK_F32_FP8_fake16_e64>; def : Cvt_PK_F32_F8_Pat_OpSel; + V_CVT_PK_F32_BF8_fake16_e32, V_CVT_PK_F32_BF8_fake16_e64>; } } @@ -954,13 +960,14 @@ multiclass VOP1_Real_NO_DPP_OP_SEL_with_name op, defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name; defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name; -// Define VOP1 instructions using the pseudo instruction with its old profile and -// VOP3 using the OpSel profile for the pseudo instruction. -defm V_CVT_PK_F32_FP8 : VOP1_Real_e32_with_name; -defm V_CVT_PK_F32_FP8 : VOP3_Real_with_name; - -defm V_CVT_PK_F32_BF8 : VOP1_Real_e32_with_name; -defm V_CVT_PK_F32_BF8 : VOP3_Real_with_name; +defm V_CVT_PK_F32_FP8_fake16 : VOP1_Real_e32_with_name; +defm V_CVT_PK_F32_FP8_t16 : VOP1_Real_e32_with_name; +defm V_CVT_PK_F32_FP8_fake16 : VOP3_Real_with_name; +defm V_CVT_PK_F32_FP8_t16 : VOP3_Real_with_name; +defm V_CVT_PK_F32_BF8_fake16 : VOP1_Real_e32_with_name; +defm V_CVT_PK_F32_BF8_t16 : VOP1_Real_e32_with_name; +defm V_CVT_PK_F32_BF8_fake16 : VOP3_Real_with_name; +defm V_CVT_PK_F32_BF8_t16 : VOP3_Real_with_name; defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c, "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">; diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s index b9ee13dcad6e7..59d1030fb8a96 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s @@ -429,11 +429,14 @@ v_cvt_pk_f32_bf8_e32 v[2:3], 3 v_cvt_pk_f32_bf8_e32 v[3:4], 3 // GFX12: v_cvt_pk_f32_bf8_e32 v[3:4], 3 ; encoding: [0x83,0xde,0x06,0x7e] -v_cvt_pk_f32_bf8_e32 v[2:3], v3 -// GFX12: v_cvt_pk_f32_bf8_e32 v[2:3], v3 ; encoding: [0x03,0xdf,0x04,0x7e] +v_cvt_pk_f32_bf8_e32 v[2:3], v3.l +// GFX12: v_cvt_pk_f32_bf8_e32 v[2:3], v3.l ; encoding: [0x03,0xdf,0x04,0x7e] -v_cvt_pk_f32_bf8_e32 v[3:4], v3 -// GFX12: v_cvt_pk_f32_bf8_e32 v[3:4], v3 ; encoding: [0x03,0xdf,0x06,0x7e] +v_cvt_pk_f32_bf8_e32 v[3:4], v3.l +// GFX12: v_cvt_pk_f32_bf8_e32 v[3:4], v3.l ; encoding: [0x03,0xdf,0x06,0x7e] + +v_cvt_pk_f32_bf8_e32 v[3:4], v3.h +// GFX12: v_cvt_pk_f32_bf8_e32 v[3:4], v3.h ; encoding: [0x83,0xdf,0x06,0x7e] v_cvt_pk_f32_fp8_e32 v[2:3], s3 // GFX12: v_cvt_pk_f32_fp8_e32 v[2:3], s3 ; encoding: [0x03,0xdc,0x04,0x7e] @@ -441,8 +444,11 @@ v_cvt_pk_f32_fp8_e32 v[2:3], s3 v_cvt_pk_f32_fp8_e32 v[2:3], 3 // GFX12: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xdc,0x04,0x7e] -v_cvt_pk_f32_fp8_e32 v[2:3], v3 -// GFX12: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xdd,0x04,0x7e] +v_cvt_pk_f32_fp8_e32 v[2:3], v3.l +// GFX12: v_cvt_pk_f32_fp8_e32 v[2:3], v3.l ; encoding: [0x03,0xdd,0x04,0x7e] + +v_cvt_pk_f32_fp8_e32 v[2:3], v3.h +// GFX12: v_cvt_pk_f32_fp8_e32 v[2:3], v3.h ; encoding: [0x83,0xdd,0x04,0x7e] v_cvt_f16_f32 v5.l, v1 // GFX12: v_cvt_f16_f32_e32 v5.l, v1 ; encoding: [0x01,0x15,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s index d7cca435abc24..6e1708dd879cc 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s @@ -486,6 +486,12 @@ v_cvt_pk_f32_bf8_e64 v[2:3], v3 v_cvt_pk_f32_bf8_e64 v[2:3], v3 op_sel:[1,0] // GFX12: encoding: [0x02,0x08,0xef,0xd5,0x03,0x01,0x00,0x00] +v_cvt_pk_f32_bf8_e64 v[2:3], v3.h +// GFX12: encoding: [0x02,0x08,0xef,0xd5,0x03,0x01,0x00,0x00] + +v_cvt_pk_f32_bf8_e64 v[2:3], v255.h +// GFX12: encoding: [0x02,0x08,0xef,0xd5,0xff,0x01,0x00,0x00] + v_cvt_pk_f32_fp8_e64 v[2:3], s3 // GFX12: encoding: [0x02,0x00,0xee,0xd5,0x03,0x00,0x00,0x00] @@ -534,6 +540,12 @@ v_cvt_pk_f32_fp8_e64 v[3:4], v3 v_cvt_pk_f32_fp8_e64 v[3:4], v3 op_sel:[1,0] // GFX12: encoding: [0x03,0x08,0xee,0xd5,0x03,0x01,0x00,0x00] +v_cvt_pk_f32_fp8_e64 v[3:4], v3.h +// GFX12: encoding: [0x03,0x08,0xee,0xd5,0x03,0x01,0x00,0x00] + +v_cvt_pk_f32_fp8_e64 v[3:4], v255.h +// GFX12: encoding: [0x03,0x08,0xee,0xd5,0xff,0x01,0x00,0x00] + v_cvt_f16_f32_e64 v5.l, v1 // GFX12: encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt index 9a181350f5b8c..67618f45c31ca 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt @@ -436,18 +436,32 @@ # GFX12: v_cvt_pk_f32_bf8_e64 v[2:3], 3 ; encoding: [0x02,0x00,0xef,0xd5,0x83,0x00,0x00,0x00] 0x02,0x00,0xef,0xd5,0x83,0x00,0x00,0x00 -# GFX12: v_cvt_pk_f32_bf8_e64 v[2:3], v3 ; encoding: [0x02,0x00,0xef,0xd5,0x03,0x01,0x00,0x00] +# GFX12-REAL16: v_cvt_pk_f32_bf8_e64 v[2:3], v3.l ; encoding: [0x02,0x00,0xef,0xd5,0x03,0x01,0x00,0x00] +# GFX12-FAKE16: v_cvt_pk_f32_bf8_e64 v[2:3], v3 ; encoding: [0x02,0x00,0xef,0xd5,0x03,0x01,0x00,0x00] 0x02,0x00,0xef,0xd5,0x03,0x01,0x00,0x00 +# GFX12-REAL16: v_cvt_pk_f32_bf8_e64 v[2:3], v3.h op_sel:[1,0] ; encoding: [0x02,0x08,0xef,0xd5,0x03,0x01,0x00,0x00] +0x02,0x08,0xef,0xd5,0x03,0x01,0x00,0x00 + +# GFX12-REAL16: v_cvt_pk_f32_bf8_e64 v[2:3], v255.h op_sel:[1,0] ; encoding: [0x02,0x08,0xef,0xd5,0xff,0x01,0x00,0x00] +0x02,0x08,0xef,0xd5,0xff,0x01,0x00,0x00 + # GFX12: v_cvt_pk_f32_fp8_e64 v[2:3], s3 ; encoding: [0x02,0x00,0xee,0xd5,0x03,0x00,0x00,0x00] 0x02,0x00,0xee,0xd5,0x03,0x00,0x00,0x00 # GFX12: v_cvt_pk_f32_fp8_e64 v[2:3], 3 ; encoding: [0x02,0x00,0xee,0xd5,0x83,0x00,0x00,0x00] 0x02,0x00,0xee,0xd5,0x83,0x00,0x00,0x00 -# GFX12: v_cvt_pk_f32_fp8_e64 v[2:3], v3 ; encoding: [0x02,0x00,0xee,0xd5,0x03,0x01,0x00,0x00] +# GFX12-REAL16: v_cvt_pk_f32_fp8_e64 v[2:3], v3.l ; encoding: [0x02,0x00,0xee,0xd5,0x03,0x01,0x00,0x00] +# GFX12-FAKE16: v_cvt_pk_f32_fp8_e64 v[2:3], v3 ; encoding: [0x02,0x00,0xee,0xd5,0x03,0x01,0x00,0x00] 0x02,0x00,0xee,0xd5,0x03,0x01,0x00,0x00 +# GFX12-REAL16: v_cvt_pk_f32_fp8_e64 v[2:3], v3.h op_sel:[1,0] ; encoding: [0x02,0x08,0xee,0xd5,0x03,0x01,0x00,0x00] +0x02,0x08,0xee,0xd5,0x03,0x01,0x00,0x00 + +# GFX12-REAL16: v_cvt_pk_f32_fp8_e64 v[2:3], v255.h op_sel:[1,0] ; encoding: [0x02,0x08,0xee,0xd5,0xff,0x01,0x00,0x00] +0x02,0x08,0xee,0xd5,0xff,0x01,0x00,0x00 + # GFX12-REAL16: v_cvt_f16_f32_e64 v5.l, v1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00] # GFX12-FAKE16: v_cvt_f16_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00] 0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00