Skip to content

Commit 5dd48c4

Browse files
arsenmPravin Jagtap
andauthored
AMDGPU: MC support for v_cvt_scalef32_pk32_f32_[fp|bf]6 of gfx950 (#117590)
Co-authored-by: Pravin Jagtap <[email protected]>
1 parent 6657d4b commit 5dd48c4

File tree

10 files changed

+87
-5
lines changed

10 files changed

+87
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,11 +402,17 @@ def FeatureFP4ConversionScaleInsts : SubtargetFeature<"fp4-cvt-scale-insts",
402402
"Has fp4 conversion scale instructions"
403403
>;
404404

405+
def FeatureFP6BF6ConversionScaleInsts : SubtargetFeature<"fp6bf6-cvt-scale-insts",
406+
"HasFP6BF6ConversionScaleInsts",
407+
"true",
408+
"Has fp6 and bf6 conversion scale instructions"
409+
>;
410+
405411
def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
406412
"GFX950Insts",
407413
"true",
408414
"Additional instructions for GFX950+",
409-
[FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts, FeatureFP4ConversionScaleInsts]
415+
[FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts, FeatureFP4ConversionScaleInsts, FeatureFP6BF6ConversionScaleInsts]
410416
>;
411417

412418
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
@@ -1552,7 +1558,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
15521558
FeatureBitOp3Insts,
15531559
FeatureFP8ConversionScaleInsts,
15541560
FeatureBF8ConversionScaleInsts,
1555-
FeatureFP4ConversionScaleInsts
1561+
FeatureFP4ConversionScaleInsts,
1562+
FeatureFP6BF6ConversionScaleInsts
15561563
])>;
15571564

15581565
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -2435,6 +2442,9 @@ def HasBF8ConversionScaleInsts : Predicate<"Subtarget->hasBF8ConversionScaleInst
24352442
def HasFP4ConversionScaleInsts : Predicate<"Subtarget->hasFP4ConversionScaleInsts()">,
24362443
AssemblerPredicate<(all_of FeatureFP4ConversionScaleInsts)>;
24372444

2445+
def HasFP6BF6ConversionScaleInsts : Predicate<"Subtarget->hasFP6BF6ConversionScaleInsts()">,
2446+
AssemblerPredicate<(all_of FeatureFP6BF6ConversionScaleInsts)>;
2447+
24382448
def HasGDS : Predicate<"Subtarget->hasGDS()">;
24392449

24402450
def HasGWS : Predicate<"Subtarget->hasGWS()">;

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class AMDGPUSubtarget {
5353
bool HasFP8ConversionScaleInsts = false;
5454
bool HasBF8ConversionScaleInsts = false;
5555
bool HasFP4ConversionScaleInsts = false;
56+
bool HasFP6BF6ConversionScaleInsts = false;
5657
bool EnableRealTrue16Insts = false;
5758
bool HasBF16ConversionInsts = false;
5859
bool HasMadMixInsts = false;
@@ -184,6 +185,8 @@ class AMDGPUSubtarget {
184185

185186
bool hasFP4ConversionScaleInsts() const { return HasFP4ConversionScaleInsts; }
186187

188+
bool hasFP6BF6ConversionScaleInsts() const { return HasFP6BF6ConversionScaleInsts; }
189+
187190
bool hasMadMacF32Insts() const {
188191
return HasMadMacF32Insts || !isGCN();
189192
}

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1530,6 +1530,7 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
15301530
case OPWV232: return VReg_64RegClassID;
15311531
case OPW96: return VReg_96RegClassID;
15321532
case OPW128: return VReg_128RegClassID;
1533+
case OPW192: return VReg_192RegClassID;
15331534
case OPW160: return VReg_160RegClassID;
15341535
case OPW256: return VReg_256RegClassID;
15351536
case OPW288: return VReg_288RegClassID;

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ class AMDGPUDisassembler : public MCDisassembler {
219219
OPW96,
220220
OPW128,
221221
OPW160,
222+
OPW192,
222223
OPW256,
223224
OPW288,
224225
OPW320,

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1696,7 +1696,8 @@ class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> {
16961696
defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16,
16971697
VOPDstOperand_t16Lo128),
16981698
VOPDstOperand<VGPR_32>);
1699-
RegisterOperand ret = !cond(!eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
1699+
RegisterOperand ret = !cond(!eq(VT.Size, 1024) : VOPDstOperand<VReg_1024>,
1700+
!eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
17001701
!eq(VT.Size, 128) : VOPDstOperand<VReg_128>,
17011702
!eq(VT.Size, 64) : VOPDstOperand<VReg_64>,
17021703
!eq(VT.Size, 32) : VOPDstOperand<VGPR_32>,
@@ -1752,7 +1753,8 @@ class getSOPSrcForVT<ValueType VT> {
17521753
// Returns the vreg register class to use for source operand given VT
17531754
class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> {
17541755
RegisterOperand ret =
1755-
!cond(!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
1756+
!cond(!eq(VT.Size, 192) : RegisterOperand<VReg_192>,
1757+
!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
17561758
!eq(VT.Size, 96) : RegisterOperand<VReg_96>,
17571759
!eq(VT.Size, 64) : RegisterOperand<VReg_64>,
17581760
!eq(VT.Size, 48) : RegisterOperand<VReg_64>,
@@ -1785,6 +1787,7 @@ class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
17851787
!eq(VT, v2i16) : VSrc_v2b16,
17861788
!eq(VT, v4f16) : AVSrc_64,
17871789
!eq(VT, v4bf16) : AVSrc_64,
1790+
!eq(VT.Size, 192) : VRegSrc_192,
17881791
!eq(VT.Size, 128) : VRegSrc_128,
17891792
!eq(VT.Size, 96) : VRegSrc_96,
17901793
!eq(VT.Size, 64) : VSrc_b64,
@@ -2828,6 +2831,9 @@ def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=
28282831
def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
28292832
def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
28302833
def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>;
2834+
def VOP_V32F32_V6I32_F32 : VOPProfile <[v32f32, v6i32, f32, untyped]>;
2835+
def VOP_V32F16_V6I32_F32 : VOPProfile <[v32f16, v6i32, f32, untyped]>;
2836+
def VOP_V32BF16_V6I32_F32 : VOPProfile <[v32bf16, v6i32, f32, untyped]>;
28312837

28322838
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
28332839
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,6 +1249,7 @@ def VRegSrc_32 : SrcReg9<VGPR_32, "OPW32">;
12491249
def VRegSrc_64 : SrcReg9<VReg_64, "OPW64">;
12501250
def VRegSrc_96 : SrcReg9<VReg_96, "OPW96">;
12511251
def VRegSrc_128: SrcReg9<VReg_128, "OPW128">;
1252+
def VRegSrc_192: SrcReg9<VReg_192, "OPW192">;
12521253
def VRegSrc_256: SrcReg9<VReg_256, "OPW256">;
12531254
def VRegOrLdsSrc_32 : SrcReg9<VRegOrLds_32, "OPW32">;
12541255

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,19 @@ def VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile : VOP3_Profile<VOPProfile<[i32, v2f
925925
let HasOMod = 0;
926926
}
927927

928+
class VOP3_CVT_SCALEF32_PK_F864_Profile<VOPProfile P> : VOP3_Profile<P> {
929+
let HasModifiers = 0;
930+
let HasSrc0IntMods = 0;
931+
let HasSrc1IntMods = 0;
932+
let HasOMod = 0;
933+
let HasOpSel = 0;
934+
let HasClamp = 0;
935+
let HasExtDPP = 0;
936+
let HasExt32BitDPP = 0;
937+
let HasExtVOP3DPP = 0;
938+
let HasExt64BitDPP = 0;
939+
}
940+
928941
let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
929942
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
930943
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
@@ -950,6 +963,11 @@ let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in
950963
defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
951964
}
952965

966+
let SubtargetPredicate = HasFP6BF6ConversionScaleInsts, mayRaiseFPException = 0 in {
967+
defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3Inst<"v_cvt_scalef32_pk32_f32_fp6", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V32F32_V6I32_F32>>;
968+
defm V_CVT_SCALEF32_PK32_F32_BF6 : VOP3Inst<"v_cvt_scalef32_pk32_f32_bf6", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V32F32_V6I32_F32>>;
969+
}
970+
953971
let SubtargetPredicate = isGFX10Plus in {
954972
let isCommutable = 1, isReMaterializable = 1 in {
955973
defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -1894,3 +1912,7 @@ defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3OpSel_Real_gfx9 <0x23d>;
18941912
defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3OpSel_Real_gfx9 <0x250>;
18951913
defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 <0x251>;
18961914
}
1915+
let OtherPredicates = [HasFP6BF6ConversionScaleInsts] in {
1916+
defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3_Real_gfx9<0x256, "v_cvt_scalef32_pk32_f32_fp6">;
1917+
defm V_CVT_SCALEF32_PK32_F32_BF6 : VOP3_Real_gfx9<0x257, "v_cvt_scalef32_pk32_f32_bf6">;
1918+
}

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -884,4 +884,12 @@ v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0]
884884

885885
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
886886
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00]
887-
v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0]
887+
v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0]
888+
889+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
890+
// GFX950: v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00]
891+
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6
892+
893+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
894+
// GFX950: v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00]
895+
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6

llvm/test/MC/AMDGPU/gfx950_err.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,27 @@ v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 div:2
125125

126126
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
127127
v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 clamp div:2
128+
129+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
130+
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 clamp
131+
132+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
133+
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 mul:2
134+
135+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
136+
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 div:2
137+
138+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
139+
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 clamp div:2
140+
141+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
142+
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 clamp
143+
144+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
145+
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 mul:2
146+
147+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
148+
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 div:2
149+
150+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
151+
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 clamp div:2

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,3 +611,9 @@
611611

612612
# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00]
613613
0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00
614+
615+
# GFX950: v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00]
616+
0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00
617+
618+
# GFX950: v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00]
619+
0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00

0 commit comments

Comments
 (0)