Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3007,8 +3007,8 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
switch (I.getOpcode()) {
case AMDGPU::V_ADDC_U32_e32:
case AMDGPU::V_ADDC_U32_dpp:
case AMDGPU::V_CNDMASK_B16_e32:
case AMDGPU::V_CNDMASK_B16_dpp:
case AMDGPU::V_CNDMASK_B16_fake16_e32:
case AMDGPU::V_CNDMASK_B16_fake16_dpp:
case AMDGPU::V_CNDMASK_B32_e32:
case AMDGPU::V_CNDMASK_B32_dpp:
case AMDGPU::V_DIV_FMAS_F32_e64:
Expand All @@ -3023,8 +3023,8 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
HazardReg == AMDGPU::VCC_HI;
case AMDGPU::V_ADDC_U32_e64:
case AMDGPU::V_ADDC_U32_e64_dpp:
case AMDGPU::V_CNDMASK_B16_e64:
case AMDGPU::V_CNDMASK_B16_e64_dpp:
case AMDGPU::V_CNDMASK_B16_fake16_e64:
case AMDGPU::V_CNDMASK_B16_fake16_e64_dpp:
case AMDGPU::V_CNDMASK_B32_e64:
case AMDGPU::V_CNDMASK_B32_e64_dpp:
case AMDGPU::V_SUBB_U32_e64:
Expand Down
15 changes: 13 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1245,11 +1245,22 @@ class VOPSelectPat <ValueType vt> : GCNPat <
(vt (select i1:$src0, vt:$src1, vt:$src2)),
(V_CNDMASK_B32_e64 0, VSrc_b32:$src2, 0, VSrc_b32:$src1, SSrc_i1:$src0)
>;
class VOPSelectPat_t16 <ValueType vt> : GCNPat <
(vt (select i1:$src0, vt:$src1, vt:$src2)),
(V_CNDMASK_B16_t16_e64 0, VSrcT_b16:$src2, 0, VSrcT_b16:$src1, SSrc_i1:$src0)
>;

def : VOPSelectModsPat <i32>;
def : VOPSelectModsPat <f32>;
def : VOPSelectPat <f16>;
def : VOPSelectPat <i16>;
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
let True16Predicate = p in {
def : VOPSelectPat <f16>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do source modifiers work with V_CNDMASK_B16? We should have the source modifier patterns here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Matt. V_CNDMASK_B16 has a operand type of I16_I16_I16_I1 thus it does not has source modifier in Pre-GFX11. Do it make sense to you?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This wasn't a question of how the instruction is encoded in tablegen, but does the underlying instruction support them. i.e. is tablegen wrong and it should have them

Copy link
Contributor Author

@broxigarchen broxigarchen Jan 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Sorry I was confused at the beginning.

In Pre-GFX11 i16/f16 are matched to v_cndmask_b32 and in post-gfx11 they are matched to v_cndmask_b16. Both instruction support omod/abs/neg/opsel srcmod. So yes I think we should have srcmod pattern for f16/i16 both pre-GFX11 and post-GFX11.

When I checked the old commit here d2a9739#diff-bfd398f33980e2dad30c39c83b325066633fe6c107982e6d1207d070580940d3R905 It seems set the srcmod to be zero for 16bits type. Is there a reason behind it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed it looks like cndmask_b32 and cndmask_b16 can support those modifiers. I'd recommend filing an issue and adding the support in a follow up patch if it is useful.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filed an issue for this and will address it in later patch

def : VOPSelectPat <i16>;
} // End True16Predicate = p
let True16Predicate = UseRealTrue16Insts in {
def : VOPSelectPat_t16 <f16>;
def : VOPSelectPat_t16 <i16>;
} // End True16Predicate = UseRealTrue16Insts

let AddedComplexity = 1 in {
def : GCNPat <
Expand Down
28 changes: 25 additions & 3 deletions llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,26 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>;
def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>;
// V_CNDMASK_B16 is VOP3 only
def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
let IsTrue16 = 1;
let IsRealTrue16 = 1;
let HasOpSel = 1;
let DstRC64 = getVALUDstForVT<DstVT, 1, 1>.ret;
let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
let Src0Mod = getSrc0Mod<f16, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
let Src1Mod = getSrcMod<f16, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
let HasSrc2Mods = 0;
let InsVOP3OpSel = getInsVOP3Base<Src0RC64, Src1RC64,
Src2RC64, NumSrcArgs,
HasClamp, 1/*HasModifiers*/, 0/*HasSrc2Mods*/, HasOMod,
Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/>.ret;
let Src0VOP3DPP = VGPRSrc_16;
let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 0/*IsFake16*/>.ret;
let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 0/*IsFake16*/>.ret;
}
def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
let IsTrue16 = 1;
let DstRC64 = getVALUDstForVT<DstVT>.ret;
Expand Down Expand Up @@ -765,8 +785,10 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
// VOP2 Instructions
//===----------------------------------------------------------------------===//

let SubtargetPredicate = isGFX11Plus in
defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1_fake16>;
let SubtargetPredicate = isGFX11Plus, True16Predicate = UseRealTrue16Insts in
defm V_CNDMASK_B16_t16 : VOP2eInst <"v_cndmask_b16_t16", VOP2e_I16_I16_I16_I1_true16>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should have SubtargetPredicate and True16Predicate on these instructions. I guess the codegen works since the selection patterns have the predicates, but MC probably needs them.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

let SubtargetPredicate = isGFX11Plus, True16Predicate = UseFakeTrue16Insts in
defm V_CNDMASK_B16_fake16 : VOP2eInst <"v_cndmask_b16_fake16", VOP2e_I16_I16_I16_I1_fake16>;
defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">;
let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
Expand Down Expand Up @@ -1830,7 +1852,7 @@ defm V_FMAMK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x037
defm V_FMAAK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x038, "v_fmaak_f16">;

// VOP3 only.
defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11_gfx12<0x25d>;
defm V_CNDMASK_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x25d, "v_cndmask_b16">;
defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11_gfx12<0x31c>;
defm V_BFM_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31d>;
defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31e>;
Expand Down
Loading
Loading