Skip to content

Commit 9590b73

Browse files
Pravin JagtapvikramRH
authored andcommitted
AMDGPU: Restrict src0 to VGPRs only for certain cvt scale opcodes. (llvm#127464)
The Src0 operand width higher that 32-bits of cvt_scale opcodes operating on FP6/BF6/FP4 need to be restricted to take only VGPRs.
1 parent 9412f74 commit 9590b73

File tree

5 files changed

+1964
-2
lines changed

5 files changed

+1964
-2
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1662,6 +1662,18 @@ class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
16621662
1 : VSrc_b32);
16631663
}
16641664

1665+
// VGPR only VOP3 src with 9 bit encoding
1666+
class getVOP3VRegSrcForVT<ValueType VT> {
1667+
RegisterOperand ret = !cond(!eq(VT.Size, 1024) : VRegSrc_1024,
1668+
!eq(VT.Size, 512) : VRegSrc_512,
1669+
!eq(VT.Size, 256) : VRegSrc_256,
1670+
!eq(VT.Size, 192) : VRegSrc_192,
1671+
!eq(VT.Size, 128) : VRegSrc_128,
1672+
!eq(VT.Size, 96) : VRegSrc_96,
1673+
!eq(VT.Size, 64) : VRegSrc_64,
1674+
1 : VRegSrc_32);
1675+
}
1676+
16651677
// Src2 of VOP3 DPP instructions cannot be a literal
16661678
class getVOP3DPPSrcForVT<ValueType VT> {
16671679
RegisterOperand ret =
@@ -2657,6 +2669,7 @@ def VOP_V2I16_F32_F32_F32 : VOPProfile<[v2i16, f32, f32, f32]>;
26572669
def VOP_V2I16_V2F16_F32 : VOPProfile<[v2i16, v2f16, f32, untyped]>;
26582670
def VOP_V2I16_V2BF16_F32 : VOPProfile<[v2i16, v2bf16, f32, untyped]>;
26592671
def VOP_I32_F32_F32_F32 : VOPProfile<[i32, f32, f32, f32]>;
2672+
def VOP_I32_V2F32_I32_F32 : VOPProfile<[i32, v2f32, i32, f32]>;
26602673
def VOP_I32_V2F16_F32_F32 : VOPProfile<[i32, v2f16, f32, f32]>;
26612674
def VOP_I32_V2BF16_F32_F32: VOPProfile<[i32, v2bf16, f32, f32]>;
26622675
def VOP_BF16_F32_I32 : VOPProfile<[bf16, f32, i32, untyped]>;

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1016,7 +1016,11 @@ class VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<ValueType Src0Ty> :
10161016
let HasFP8DstByteSel = 1;
10171017
}
10181018

1019-
def VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile : VOP3_Profile<VOPProfile<[i32, v2f32, i32, f32]>, VOP3_OPSEL> {
1019+
class VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile<VOPProfile P>
1020+
: VOP3_Profile<P, VOP3_OPSEL> {
1021+
1022+
let Src0RC64 = !if(!gt(P.Src0VT.Size, 32), getVOP3VRegSrcForVT<P.Src0VT>.ret,
1023+
getVOP3SrcForVT<P.Src0VT>.ret);
10201024
let InsVOP3OpSel = (ins PackedF32InputMods: $src0_modifiers, Src0RC64:$src0,
10211025
Int32InputMods: $src1_modifiers, Src1RC64:$src1,
10221026
FP32InputMods: $src2_modifiers, Src2RC64:$src2,
@@ -1064,6 +1068,11 @@ class VOP3_CVT_SCALEF32_PK_F864_Profile<VOPProfile P> : VOP3_Profile<P> {
10641068
let HasExt32BitDPP = 0;
10651069
let HasExtVOP3DPP = 0;
10661070
let HasExt64BitDPP = 0;
1071+
1072+
// All convert opcodes operating on FP6/BF6/FP4 data must use VGPR sources for
1073+
// any operand slots > 32 bit.
1074+
let Src0RC64 = !if(!gt(P.Src0VT.Size, 32), getVOP3VRegSrcForVT<P.Src0VT>.ret,
1075+
getVOP3SrcForVT<P.Src0VT>.ret);
10671076
}
10681077

10691078
let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
@@ -1105,7 +1114,10 @@ let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in
11051114
let Constraints = "@earlyclobber $vdst" in {
11061115
defm V_CVT_SCALEF32_SR_PK_FP4_F16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2f16>>;
11071116
defm V_CVT_SCALEF32_SR_PK_FP4_BF16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_bf16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2bf16>>;
1108-
defm V_CVT_SCALEF32_SR_PK_FP4_F32: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f32", VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile>;
1117+
defm V_CVT_SCALEF32_SR_PK_FP4_F32
1118+
: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f32",
1119+
VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile<
1120+
VOP_I32_V2F32_I32_F32>>;
11091121
}
11101122
}
11111123
defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;

0 commit comments

Comments
 (0)