Skip to content

Commit 8a0c2e7

Browse files
authored
[AMDGPU][True16][MC][CodeGen] true16 for v_cndmask_b16 (llvm#119736)
Support true16 format for v_cndmask_b16 in MC and CodeGen in true16 and fake16 flow. Since we are replacing `v_cndmask_b16` to `v_cndmask_b16_t16/fake16`, we have to at least update the fake16 codeGen to get codeGen test passing. For this case, we have to update the true16 and with fake16 together, otherwise some of the true16 tests will fail
1 parent a98df67 commit 8a0c2e7

18 files changed

+1828
-946
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3007,8 +3007,8 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
30073007
switch (I.getOpcode()) {
30083008
case AMDGPU::V_ADDC_U32_e32:
30093009
case AMDGPU::V_ADDC_U32_dpp:
3010-
case AMDGPU::V_CNDMASK_B16_e32:
3011-
case AMDGPU::V_CNDMASK_B16_dpp:
3010+
case AMDGPU::V_CNDMASK_B16_fake16_e32:
3011+
case AMDGPU::V_CNDMASK_B16_fake16_dpp:
30123012
case AMDGPU::V_CNDMASK_B32_e32:
30133013
case AMDGPU::V_CNDMASK_B32_dpp:
30143014
case AMDGPU::V_DIV_FMAS_F32_e64:
@@ -3023,8 +3023,8 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
30233023
HazardReg == AMDGPU::VCC_HI;
30243024
case AMDGPU::V_ADDC_U32_e64:
30253025
case AMDGPU::V_ADDC_U32_e64_dpp:
3026-
case AMDGPU::V_CNDMASK_B16_e64:
3027-
case AMDGPU::V_CNDMASK_B16_e64_dpp:
3026+
case AMDGPU::V_CNDMASK_B16_fake16_e64:
3027+
case AMDGPU::V_CNDMASK_B16_fake16_e64_dpp:
30283028
case AMDGPU::V_CNDMASK_B32_e64:
30293029
case AMDGPU::V_CNDMASK_B32_e64_dpp:
30303030
case AMDGPU::V_SUBB_U32_e64:

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,11 +1245,22 @@ class VOPSelectPat <ValueType vt> : GCNPat <
12451245
(vt (select i1:$src0, vt:$src1, vt:$src2)),
12461246
(V_CNDMASK_B32_e64 0, VSrc_b32:$src2, 0, VSrc_b32:$src1, SSrc_i1:$src0)
12471247
>;
1248+
class VOPSelectPat_t16 <ValueType vt> : GCNPat <
1249+
(vt (select i1:$src0, vt:$src1, vt:$src2)),
1250+
(V_CNDMASK_B16_t16_e64 0, VSrcT_b16:$src2, 0, VSrcT_b16:$src1, SSrc_i1:$src0)
1251+
>;
12481252

12491253
def : VOPSelectModsPat <i32>;
12501254
def : VOPSelectModsPat <f32>;
1251-
def : VOPSelectPat <f16>;
1252-
def : VOPSelectPat <i16>;
1255+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
1256+
let True16Predicate = p in {
1257+
def : VOPSelectPat <f16>;
1258+
def : VOPSelectPat <i16>;
1259+
} // End True16Predicate = p
1260+
let True16Predicate = UseRealTrue16Insts in {
1261+
def : VOPSelectPat_t16 <f16>;
1262+
def : VOPSelectPat_t16 <i16>;
1263+
} // End True16Predicate = UseRealTrue16Insts
12531264

12541265
let AddedComplexity = 1 in {
12551266
def : GCNPat <

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,26 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
714714
def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>;
715715
def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>;
716716
// V_CNDMASK_B16 is VOP3 only
717+
def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
718+
let IsTrue16 = 1;
719+
let IsRealTrue16 = 1;
720+
let HasOpSel = 1;
721+
let DstRC64 = getVALUDstForVT<DstVT, 1, 1>.ret;
722+
let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
723+
let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
724+
let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
725+
let Src0Mod = getSrc0Mod<f16, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
726+
let Src1Mod = getSrcMod<f16, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
727+
let HasSrc2Mods = 0;
728+
let InsVOP3OpSel = getInsVOP3Base<Src0RC64, Src1RC64,
729+
Src2RC64, NumSrcArgs,
730+
HasClamp, 1/*HasModifiers*/, 0/*HasSrc2Mods*/, HasOMod,
731+
Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/>.ret;
732+
let Src0VOP3DPP = VGPRSrc_16;
733+
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
734+
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 0/*IsFake16*/>.ret;
735+
let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 0/*IsFake16*/>.ret;
736+
}
717737
def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
718738
let IsTrue16 = 1;
719739
let DstRC64 = getVALUDstForVT<DstVT>.ret;
@@ -765,8 +785,10 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
765785
// VOP2 Instructions
766786
//===----------------------------------------------------------------------===//
767787

768-
let SubtargetPredicate = isGFX11Plus in
769-
defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1_fake16>;
788+
let SubtargetPredicate = isGFX11Plus, True16Predicate = UseRealTrue16Insts in
789+
defm V_CNDMASK_B16_t16 : VOP2eInst <"v_cndmask_b16_t16", VOP2e_I16_I16_I16_I1_true16>;
790+
let SubtargetPredicate = isGFX11Plus, True16Predicate = UseFakeTrue16Insts in
791+
defm V_CNDMASK_B16_fake16 : VOP2eInst <"v_cndmask_b16_fake16", VOP2e_I16_I16_I16_I1_fake16>;
770792
defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">;
771793
let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in
772794
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
@@ -1846,7 +1868,7 @@ defm V_FMAMK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x037
18461868
defm V_FMAAK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x038, "v_fmaak_f16">;
18471869

18481870
// VOP3 only.
1849-
defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11_gfx12<0x25d>;
1871+
defm V_CNDMASK_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x25d, "v_cndmask_b16">;
18501872
defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11_gfx12<0x31c>;
18511873
defm V_BFM_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31d>;
18521874
defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31e>;

0 commit comments

Comments
 (0)