Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4004,6 +4004,9 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {
}

unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
if (!IsB32 && STI.hasTrue16BitInsts())
Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
: AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
unsigned CBL = STI.getConstantBusLimit(Opc);
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
Expand Down
67 changes: 54 additions & 13 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1447,34 +1447,72 @@ let SubtargetPredicate = isGFX12Plus in {

} // End SubtargetPredicate = isGFX12Plus

let SubtargetPredicate = HasBitOp3Insts in {
let HasClamp = 0, HasModifiers = 1 in {
def BitOp3_B16_Profile : VOP3_BITOP3_Profile<VOPProfile <[i16, i16, i16, i16, i32]>, VOP3_OPSEL>;
def BitOp3_B16_t16_Profile : VOP3_Profile_True16<BitOp3_B16_Profile>;
def BitOp3_B16_fake16_Profile : VOP3_Profile_Fake16<BitOp3_B16_Profile>;
}

let OtherPredicates = [HasBitOp3Insts] in {
let isReMaterializable = 1 in {
defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16",
VOP3_BITOP3_Profile<VOPProfile <[i16, i16, i16, i16, i32]>, VOP3_OPSEL>>;
let SubtargetPredicate = isGFX940Plus in
defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16", BitOp3_B16_Profile>;
let SubtargetPredicate = isGFX1250Plus in
defm V_BITOP3_B16_gfx1250 : VOP3Inst_t16_with_profiles <"v_bitop3_b16_gfx1250", BitOp3_B16_Profile,
BitOp3_B16_t16_Profile, BitOp3_B16_fake16_Profile>;
defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32",
VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i32]>, VOP3_REGULAR>>,
VOPD_Component<0x12, "v_bitop2_b32">;
}

def : GCNPat<
(i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
(i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3))
>;

def : GCNPat<
(i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
(i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
>;

def : GCNPat<
(i32 (BITOP3_32 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
(i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3))
>;

def : GCNPat<
(i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
(i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
>;
} // End SubtargetPredicate = HasBitOp3Insts
let SubtargetPredicate = isGFX940Plus in {
def : GCNPat<
(i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
(i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
>;

def : GCNPat<
(i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
(i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
>;
} // End SubtargetPredicate = isGFX940Plus

let SubtargetPredicate = isGFX1250Plus in {
let True16Predicate = UseFakeTrue16Insts in {
def : GCNPat<
(i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
(i16 (V_BITOP3_B16_gfx1250_fake16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
>;

def : GCNPat<
(i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
(i16 (V_BITOP3_B16_gfx1250_fake16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
>;
}
let True16Predicate = UseRealTrue16Insts in {
def : GCNPat<
(i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
(i16 (V_BITOP3_B16_gfx1250_t16_e64 0, VSrcT_b16:$src0, 0, VSrcT_b16:$src1, 0, VSrcT_b16:$src2, timm:$bitop3, 0))
>;

def : GCNPat<
(i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
(i16 (V_BITOP3_B16_gfx1250_t16_e64 0, VSrcT_b16:$src0, 0, VSrcT_b16:$src1, 0, VSrcT_b16:$src2, timm:$bitop3, 0))
>;
}
} // End SubtargetPredicate = isGFX1250Plus

} // End OtherPredicates = [HasBitOp3Insts]

class DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat<
(AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
Expand Down Expand Up @@ -1766,6 +1804,9 @@ defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x368, "v_m
defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;

defm V_BITOP3_B16_gfx1250 : VOP3_Real_BITOP3_t16_and_fake16_gfx1250<0x233, "v_bitop3_b16">;
defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx1250<0x234>;

defm V_MAD_U32 : VOP3Only_Realtriple_gfx1250<0x235>;
defm V_MAD_NC_U64_U32 : VOP3Only_Realtriple_gfx1250<0x2fa>;
defm V_MAD_NC_I64_I32 : VOP3Only_Realtriple_gfx1250<0x2fb>;
Expand Down
77 changes: 77 additions & 0 deletions llvm/lib/Target/AMDGPU/VOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,19 @@ class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
let Inst{49-41} = src0;
}

class VOP3a_BITOP3_gfx12<bits<10> op, VOPProfile p> : VOP3e_gfx11_gfx12<op, p> {
bits<8> bitop3;

let Inst{60-59} = bitop3{7-6};
let Inst{10-8} = bitop3{5-3};
let Inst{63-61} = bitop3{2-0};

let Inst{11} = !if(p.HasOpSel, src0_modifiers{2}, 0);
let Inst{12} = !if(p.HasOpSel, src1_modifiers{2}, 0);
let Inst{13} = !if(p.HasOpSel, src2_modifiers{2}, 0);
let Inst{14} = !if(p.HasOpSel, src0_modifiers{3}, 0);
}

class VOP3Interp_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
bits<6> attr;
bits<2> attrchan;
Expand Down Expand Up @@ -1506,6 +1519,7 @@ class VOP3_Profile_Base<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VO
let HasFP8SrcByteSel = P.HasFP8SrcByteSel;
let HasFP8DstByteSel = P.HasFP8DstByteSel;
let HasOMod = P.HasOMod;
let HasBitOp3 = P.HasBitOp3;

let HasModifiers =
!if (Features.IsMAI, 0,
Expand All @@ -1525,6 +1539,7 @@ class VOP3_Profile_True16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> :
let HasFP8SrcByteSel = P.HasFP8SrcByteSel;
let HasFP8DstByteSel = P.HasFP8DstByteSel;
let HasOMod = P.HasOMod;
let HasBitOp3 = P.HasBitOp3;

let HasModifiers =
!if (Features.IsMAI, 0,
Expand All @@ -1540,6 +1555,7 @@ class VOP3_Profile_Fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> :
let HasFP8SrcByteSel = P.HasFP8SrcByteSel;
let HasFP8DstByteSel = P.HasFP8DstByteSel;
let HasOMod = P.HasOMod;
let HasBitOp3 = P.HasBitOp3;

let HasModifiers =
!if (Features.IsMAI, 0,
Expand Down Expand Up @@ -1723,6 +1739,34 @@ class VOP3b_DPP8_Base<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
let Inst{14 - 8} = sdst;
}

class VOP3_BITOP3_DPP16_Gen<bits<10> op, VOP_DPP_Pseudo p, GFXGen Gen, string asmName>
: VOP3_DPP16_Gen_t16<op, p, Gen, asmName> {
bits<8> bitop3;

let Inst{60-59} = bitop3{7-6};
let Inst{10-8} = bitop3{5-3};
let Inst{63-61} = bitop3{2-0};

let Inst{11} = !if(p.Pfl.HasOpSel, src0_modifiers{2}, 0);
let Inst{12} = !if(p.Pfl.HasOpSel, src1_modifiers{2}, 0);
let Inst{13} = !if(p.Pfl.HasOpSel, src2_modifiers{2}, 0);
let Inst{14} = !if(p.Pfl.HasOpSel, src0_modifiers{3}, 0);
}

class VOP3_BITOP3_DPP8<bits<10> op, VOP_Pseudo p, string asmName>
: Base_VOP3_DPP8_t16<op, p, asmName> {
bits<8> bitop3;

let Inst{60-59} = bitop3{7-6};
let Inst{10-8} = bitop3{5-3};
let Inst{63-61} = bitop3{2-0};

let Inst{11} = !if(p.Pfl.HasOpSel, src0_modifiers{2}, 0);
let Inst{12} = !if(p.Pfl.HasOpSel, src1_modifiers{2}, 0);
let Inst{13} = !if(p.Pfl.HasOpSel, src2_modifiers{2}, 0);
let Inst{14} = !if(p.Pfl.HasOpSel, src0_modifiers{3}, 0);
}

class VOP3b_DPP8_Base_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
: Base_VOP3_DPP8<op, ps, opName> {
bits<8> sdst;
Expand Down Expand Up @@ -1943,6 +1987,29 @@ multiclass VOP3be_Realtriple<
multiclass VOP3beOnly_Realtriple<GFXGen Gen, bits<10> op> :
VOP3be_Realtriple<Gen, op, 1>;

multiclass VOP3_BITOP3_Real_dpp_Base<GFXGen Gen, bits<10> op, string asmName> {
def _e64_dpp#Gen.Suffix :
VOP3_BITOP3_DPP16_Gen<op, !cast<VOP_DPP_Pseudo>(NAME#"_e64"#"_dpp"), Gen, asmName>;
}

multiclass VOP3_BITOP3_Real_dpp8_Base<GFXGen Gen, bits<10> op, string asmName> {
defvar ps = !cast<VOP3_Pseudo>(NAME#"_e64");
def _e64_dpp8#Gen.Suffix : VOP3_BITOP3_DPP8<op, ps, asmName> {
let DecoderNamespace =
Gen.DecoderNamespace #!if (ps.Pfl.IsRealTrue16, "", "_FAKE16");
let AssemblerPredicate = Gen.AssemblerPredicate;
}
}

multiclass VOP3_BITOP3_Real_Base<GFXGen Gen, bits<10> op, string asmName> {
defvar ps = !cast<VOP_Pseudo>(NAME#"_e64");
let IsSingle = ps.Pfl.IsSingle, AsmString = asmName # ps.AsmOperands in {
def _e64#Gen.Suffix :
VOP3_Real_Gen<ps, Gen>,
VOP3a_BITOP3_gfx12<op, ps.Pfl>;
}
}

//===----------------------------------------------------------------------===//
// VOP3 GFX11
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -2046,6 +2113,16 @@ multiclass VOP3Only_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
VOP3Only_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
VOP3Only_Realtriple_with_name<GFX12Gen, op, opName, asmName>;

multiclass VOP3_Real_BITOP3_gfx1250<bits<10> op, string asmName = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> :
VOP3_BITOP3_Real_Base<GFX1250Gen, op, asmName>,
VOP3_BITOP3_Real_dpp_Base<GFX1250Gen, op, asmName>,
VOP3_BITOP3_Real_dpp8_Base<GFX1250Gen, op, asmName>;

multiclass VOP3_Real_BITOP3_t16_and_fake16_gfx1250<bits<10> op, string asmName = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> {
defm _t16 : VOP3_Real_BITOP3_gfx1250<op, asmName>;
defm _fake16: VOP3_Real_BITOP3_gfx1250<op, asmName>;
}

multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op, string asmName, bit isSingle = 0,
string opName = NAME> :
VOP3Dot_Realtriple<GFX11Gen, op, asmName, isSingle, opName>,
Expand Down
Loading