Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1818,6 +1818,18 @@ class getVOP3VRegSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> {
1 : RegisterOperand<VGPR_32>);
}

// VGPR only VOP3 src with 9 bit encoding
class getVOP3VSrcReg9ForVT<ValueType VT> {
RegisterOperand ret = !cond(!eq(VT.Size, 1024): VRegSrc_1024,
!eq(VT.Size, 512): VRegSrc_512,
!eq(VT.Size, 256): VRegSrc_256,
!eq(VT.Size, 192): VRegSrc_192,
!eq(VT.Size, 128): VRegSrc_128,
!eq(VT.Size, 96): VRegSrc_96,
!eq(VT.Size, 64): VRegSrc_64,
1: VRegSrc_32);
}

// Src2 of VOP3 DPP instructions cannot be a literal
class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
RegisterOperand ret =
Expand Down Expand Up @@ -2852,6 +2864,7 @@ def VOP_V2I16_F32_F32_F32 : VOPProfile<[v2i16, f32, f32, f32]>;
def VOP_V2I16_V2F16_F32 : VOPProfile<[v2i16, v2f16, f32, untyped]>;
def VOP_V2I16_V2BF16_F32 : VOPProfile<[v2i16, v2bf16, f32, untyped]>;
def VOP_I32_F32_F32_F32 : VOPProfile<[i32, f32, f32, f32]>;
def VOP_I32_V2F32_I32_F32 : VOPProfile<[i32, v2f32, i32, f32]>;
def VOP_I32_V2F16_F32_F32 : VOPProfile<[i32, v2f16, f32, f32]>;
def VOP_I32_V2BF16_F32_F32: VOPProfile<[i32, v2bf16, f32, f32]>;
def VOP_BF16_F32_I32 : VOPProfile<[bf16, f32, i32, untyped]>;
Expand Down
16 changes: 14 additions & 2 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1052,7 +1052,11 @@ class VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<ValueType Src0Ty> :
let HasFP4DstByteSel = 1;
}

def VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile : VOP3_Profile<VOPProfile<[i32, v2f32, i32, f32]>, VOP3_OPSEL> {
class VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile<VOPProfile P>
: VOP3_Profile<P, VOP3_OPSEL> {

let Src0RC64 = !if(!gt(P.Src0VT.Size, 32), getVOP3VSrcReg9ForVT<P.Src0VT>.ret,
getVOP3SrcForVT<P.Src0VT>.ret);
let InsVOP3OpSel = (ins PackedF32InputMods: $src0_modifiers, Src0RC64:$src0,
Int32InputMods: $src1_modifiers, Src1RC64:$src1,
FP32InputMods: $src2_modifiers, Src2RC64:$src2,
Expand Down Expand Up @@ -1100,6 +1104,11 @@ class VOP3_CVT_SCALEF32_PK_F864_Profile<VOPProfile P> : VOP3_Profile<P> {
let HasExt32BitDPP = 0;
let HasExtVOP3DPP = 0;
let HasExt64BitDPP = 0;

// All convert opcodes operating on FP6/BF6/FP4 data must use VGPR sources for
// any operand slots > 32 bit.
let Src0RC64 = !if(!gt(P.Src0VT.Size, 32), getVOP3VSrcReg9ForVT<P.Src0VT>.ret,
getVOP3SrcForVT<P.Src0VT>.ret);
}

let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
Expand Down Expand Up @@ -1141,7 +1150,10 @@ let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in
let Constraints = "@earlyclobber $vdst" in {
defm V_CVT_SCALEF32_SR_PK_FP4_F16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2f16>>;
defm V_CVT_SCALEF32_SR_PK_FP4_BF16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_bf16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2bf16>>;
defm V_CVT_SCALEF32_SR_PK_FP4_F32: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f32", VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile>;
defm V_CVT_SCALEF32_SR_PK_FP4_F32
: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f32",
VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile<
VOP_I32_V2F32_I32_F32>>;
}
}
defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
Expand Down
63 changes: 63 additions & 0 deletions llvm/test/MC/AMDGPU/gfx950_err.s
Original file line number Diff line number Diff line change
Expand Up @@ -434,3 +434,66 @@ v_cvt_scalef32_sr_pk32_bf6_f32 v[0:5], v[6:37], v38, v39 clamp

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_sr_pk32_fp6_f32 v[0:5], v[6:37], v38, v39 clamp

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_pk32_f32_fp6 v[0:31], s[32:37], v6

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_pk32_f32_bf6 v[0:31], s[32:37], v6

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_pk32_f16_fp6 v[0:15], s[20:25], v8

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_pk32_bf16_fp6 v[0:15], s[20:25], v8

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_pk32_f16_bf6 v[0:15], s[20:25], v8

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_pk32_bf16_bf6 v[0:15], s[20:25], v8

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_pk32_fp6_f16 v[18:23], s[0:15], v16

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_pk32_bf6_f16 v[18:23], s[0:15], v16

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_pk32_fp6_bf16 v[18:23], s[0:15], v16

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_pk32_bf6_bf16 v[18:23], s[0:15], v16

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_sr_pk32_bf6_bf16 v[20:25], s[0:15], v16, v17

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_sr_pk32_bf6_f16 v[20:25], s[0:15], v16, v17

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid or unsupported register size
v_cvt_scalef32_sr_pk32_bf6_f32 v[36:41], s[0:31], v32, v33

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_sr_pk32_fp6_bf16 v[20:25], s[0:15], v16, v17

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_sr_pk32_fp6_f16 v[20:25], s[0:15], v16, v17

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid or unsupported register size
v_cvt_scalef32_sr_pk32_fp6_f32 v[36:41], s[0:31], v32, v33

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_2xpk16_fp6_f32 v[0:5], s[0:15], v[6:21], v16

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_2xpk16_fp6_f32 v[0:5], v[6:21], s[0:15], v16

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_2xpk16_bf6_f32 v[0:5], s[0:15], v[6:21], v16

// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_cvt_scalef32_2xpk16_bf6_f32 v[0:5], v[6:21], s[0:15], v16

// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, s[2:3]/*Invalid register, operand has 'VReg_64' register class*/, v4, v5
v_cvt_scalef32_sr_pk_fp4_f32 v0, s[2:3], v4, v5