Skip to content

Commit 7eaf1f2

Browse files
authored
[AMDGPU] Bitop3 opcodes for gfx1250 (#151235)
1 parent cff9ae7 commit 7eaf1f2

16 files changed

+3727
-87
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4004,6 +4004,9 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {
40044004
}
40054005

40064006
unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
4007+
if (!IsB32 && STI.hasTrue16BitInsts())
4008+
Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4009+
: AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
40074010
unsigned CBL = STI.getConstantBusLimit(Opc);
40084011
MachineBasicBlock *MBB = MI.getParent();
40094012
const DebugLoc &DL = MI.getDebugLoc();

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,34 +1447,72 @@ let SubtargetPredicate = isGFX12Plus in {
14471447

14481448
} // End SubtargetPredicate = isGFX12Plus
14491449

1450-
let SubtargetPredicate = HasBitOp3Insts in {
1450+
let HasClamp = 0, HasModifiers = 1 in {
1451+
def BitOp3_B16_Profile : VOP3_BITOP3_Profile<VOPProfile <[i16, i16, i16, i16, i32]>, VOP3_OPSEL>;
1452+
def BitOp3_B16_t16_Profile : VOP3_Profile_True16<BitOp3_B16_Profile>;
1453+
def BitOp3_B16_fake16_Profile : VOP3_Profile_Fake16<BitOp3_B16_Profile>;
1454+
}
1455+
1456+
let OtherPredicates = [HasBitOp3Insts] in {
14511457
let isReMaterializable = 1 in {
1452-
defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16",
1453-
VOP3_BITOP3_Profile<VOPProfile <[i16, i16, i16, i16, i32]>, VOP3_OPSEL>>;
1458+
let SubtargetPredicate = isGFX940Plus in
1459+
defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16", BitOp3_B16_Profile>;
1460+
let SubtargetPredicate = isGFX1250Plus in
1461+
defm V_BITOP3_B16_gfx1250 : VOP3Inst_t16_with_profiles <"v_bitop3_b16_gfx1250", BitOp3_B16_Profile,
1462+
BitOp3_B16_t16_Profile, BitOp3_B16_fake16_Profile>;
14541463
defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32",
14551464
VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i32]>, VOP3_REGULAR>>,
14561465
VOPD_Component<0x12, "v_bitop2_b32">;
14571466
}
1467+
14581468
def : GCNPat<
14591469
(i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
14601470
(i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3))
14611471
>;
14621472

1463-
def : GCNPat<
1464-
(i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
1465-
(i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
1466-
>;
1467-
14681473
def : GCNPat<
14691474
(i32 (BITOP3_32 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
14701475
(i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3))
14711476
>;
14721477

1473-
def : GCNPat<
1474-
(i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
1475-
(i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
1476-
>;
1477-
} // End SubtargetPredicate = HasBitOp3Insts
1478+
let SubtargetPredicate = isGFX940Plus in {
1479+
def : GCNPat<
1480+
(i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
1481+
(i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
1482+
>;
1483+
1484+
def : GCNPat<
1485+
(i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
1486+
(i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
1487+
>;
1488+
} // End SubtargetPredicate = isGFX940Plus
1489+
1490+
let SubtargetPredicate = isGFX1250Plus in {
1491+
let True16Predicate = UseFakeTrue16Insts in {
1492+
def : GCNPat<
1493+
(i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
1494+
(i16 (V_BITOP3_B16_gfx1250_fake16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
1495+
>;
1496+
1497+
def : GCNPat<
1498+
(i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
1499+
(i16 (V_BITOP3_B16_gfx1250_fake16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
1500+
>;
1501+
}
1502+
let True16Predicate = UseRealTrue16Insts in {
1503+
def : GCNPat<
1504+
(i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
1505+
(i16 (V_BITOP3_B16_gfx1250_t16_e64 0, VSrcT_b16:$src0, 0, VSrcT_b16:$src1, 0, VSrcT_b16:$src2, timm:$bitop3, 0))
1506+
>;
1507+
1508+
def : GCNPat<
1509+
(i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
1510+
(i16 (V_BITOP3_B16_gfx1250_t16_e64 0, VSrcT_b16:$src0, 0, VSrcT_b16:$src1, 0, VSrcT_b16:$src2, timm:$bitop3, 0))
1511+
>;
1512+
}
1513+
} // End SubtargetPredicate = isGFX1250Plus
1514+
1515+
} // End OtherPredicates = [HasBitOp3Insts]
14781516

14791517
class DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat<
14801518
(AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
@@ -1766,6 +1804,9 @@ defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x368, "v_m
17661804
defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
17671805
defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
17681806

1807+
defm V_BITOP3_B16_gfx1250 : VOP3_Real_BITOP3_t16_and_fake16_gfx1250<0x233, "v_bitop3_b16">;
1808+
defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx1250<0x234>;
1809+
17691810
defm V_MAD_U32 : VOP3Only_Realtriple_gfx1250<0x235>;
17701811
defm V_MAD_NC_U64_U32 : VOP3Only_Realtriple_gfx1250<0x2fa>;
17711812
defm V_MAD_NC_I64_I32 : VOP3Only_Realtriple_gfx1250<0x2fb>;

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,19 @@ class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
401401
let Inst{49-41} = src0;
402402
}
403403

404+
class VOP3a_BITOP3_gfx12<bits<10> op, VOPProfile p> : VOP3e_gfx11_gfx12<op, p> {
405+
bits<8> bitop3;
406+
407+
let Inst{60-59} = bitop3{7-6};
408+
let Inst{10-8} = bitop3{5-3};
409+
let Inst{63-61} = bitop3{2-0};
410+
411+
let Inst{11} = !if(p.HasOpSel, src0_modifiers{2}, 0);
412+
let Inst{12} = !if(p.HasOpSel, src1_modifiers{2}, 0);
413+
let Inst{13} = !if(p.HasOpSel, src2_modifiers{2}, 0);
414+
let Inst{14} = !if(p.HasOpSel, src0_modifiers{3}, 0);
415+
}
416+
404417
class VOP3Interp_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
405418
bits<6> attr;
406419
bits<2> attrchan;
@@ -1506,6 +1519,7 @@ class VOP3_Profile_Base<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VO
15061519
let HasFP8SrcByteSel = P.HasFP8SrcByteSel;
15071520
let HasFP8DstByteSel = P.HasFP8DstByteSel;
15081521
let HasOMod = P.HasOMod;
1522+
let HasBitOp3 = P.HasBitOp3;
15091523

15101524
let HasModifiers =
15111525
!if (Features.IsMAI, 0,
@@ -1525,6 +1539,7 @@ class VOP3_Profile_True16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> :
15251539
let HasFP8SrcByteSel = P.HasFP8SrcByteSel;
15261540
let HasFP8DstByteSel = P.HasFP8DstByteSel;
15271541
let HasOMod = P.HasOMod;
1542+
let HasBitOp3 = P.HasBitOp3;
15281543

15291544
let HasModifiers =
15301545
!if (Features.IsMAI, 0,
@@ -1540,6 +1555,7 @@ class VOP3_Profile_Fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> :
15401555
let HasFP8SrcByteSel = P.HasFP8SrcByteSel;
15411556
let HasFP8DstByteSel = P.HasFP8DstByteSel;
15421557
let HasOMod = P.HasOMod;
1558+
let HasBitOp3 = P.HasBitOp3;
15431559

15441560
let HasModifiers =
15451561
!if (Features.IsMAI, 0,
@@ -1723,6 +1739,34 @@ class VOP3b_DPP8_Base<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
17231739
let Inst{14 - 8} = sdst;
17241740
}
17251741

1742+
class VOP3_BITOP3_DPP16_Gen<bits<10> op, VOP_DPP_Pseudo p, GFXGen Gen, string asmName>
1743+
: VOP3_DPP16_Gen_t16<op, p, Gen, asmName> {
1744+
bits<8> bitop3;
1745+
1746+
let Inst{60-59} = bitop3{7-6};
1747+
let Inst{10-8} = bitop3{5-3};
1748+
let Inst{63-61} = bitop3{2-0};
1749+
1750+
let Inst{11} = !if(p.Pfl.HasOpSel, src0_modifiers{2}, 0);
1751+
let Inst{12} = !if(p.Pfl.HasOpSel, src1_modifiers{2}, 0);
1752+
let Inst{13} = !if(p.Pfl.HasOpSel, src2_modifiers{2}, 0);
1753+
let Inst{14} = !if(p.Pfl.HasOpSel, src0_modifiers{3}, 0);
1754+
}
1755+
1756+
class VOP3_BITOP3_DPP8<bits<10> op, VOP_Pseudo p, string asmName>
1757+
: Base_VOP3_DPP8_t16<op, p, asmName> {
1758+
bits<8> bitop3;
1759+
1760+
let Inst{60-59} = bitop3{7-6};
1761+
let Inst{10-8} = bitop3{5-3};
1762+
let Inst{63-61} = bitop3{2-0};
1763+
1764+
let Inst{11} = !if(p.Pfl.HasOpSel, src0_modifiers{2}, 0);
1765+
let Inst{12} = !if(p.Pfl.HasOpSel, src1_modifiers{2}, 0);
1766+
let Inst{13} = !if(p.Pfl.HasOpSel, src2_modifiers{2}, 0);
1767+
let Inst{14} = !if(p.Pfl.HasOpSel, src0_modifiers{3}, 0);
1768+
}
1769+
17261770
class VOP3b_DPP8_Base_t16<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName>
17271771
: Base_VOP3_DPP8<op, ps, opName> {
17281772
bits<8> sdst;
@@ -1943,6 +1987,29 @@ multiclass VOP3be_Realtriple<
19431987
multiclass VOP3beOnly_Realtriple<GFXGen Gen, bits<10> op> :
19441988
VOP3be_Realtriple<Gen, op, 1>;
19451989

1990+
multiclass VOP3_BITOP3_Real_dpp_Base<GFXGen Gen, bits<10> op, string asmName> {
1991+
def _e64_dpp#Gen.Suffix :
1992+
VOP3_BITOP3_DPP16_Gen<op, !cast<VOP_DPP_Pseudo>(NAME#"_e64"#"_dpp"), Gen, asmName>;
1993+
}
1994+
1995+
multiclass VOP3_BITOP3_Real_dpp8_Base<GFXGen Gen, bits<10> op, string asmName> {
1996+
defvar ps = !cast<VOP3_Pseudo>(NAME#"_e64");
1997+
def _e64_dpp8#Gen.Suffix : VOP3_BITOP3_DPP8<op, ps, asmName> {
1998+
let DecoderNamespace =
1999+
Gen.DecoderNamespace #!if (ps.Pfl.IsRealTrue16, "", "_FAKE16");
2000+
let AssemblerPredicate = Gen.AssemblerPredicate;
2001+
}
2002+
}
2003+
2004+
multiclass VOP3_BITOP3_Real_Base<GFXGen Gen, bits<10> op, string asmName> {
2005+
defvar ps = !cast<VOP_Pseudo>(NAME#"_e64");
2006+
let IsSingle = ps.Pfl.IsSingle, AsmString = asmName # ps.AsmOperands in {
2007+
def _e64#Gen.Suffix :
2008+
VOP3_Real_Gen<ps, Gen>,
2009+
VOP3a_BITOP3_gfx12<op, ps.Pfl>;
2010+
}
2011+
}
2012+
19462013
//===----------------------------------------------------------------------===//
19472014
// VOP3 GFX11
19482015
//===----------------------------------------------------------------------===//
@@ -2046,6 +2113,16 @@ multiclass VOP3Only_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
20462113
VOP3Only_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
20472114
VOP3Only_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
20482115

2116+
multiclass VOP3_Real_BITOP3_gfx1250<bits<10> op, string asmName = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> :
2117+
VOP3_BITOP3_Real_Base<GFX1250Gen, op, asmName>,
2118+
VOP3_BITOP3_Real_dpp_Base<GFX1250Gen, op, asmName>,
2119+
VOP3_BITOP3_Real_dpp8_Base<GFX1250Gen, op, asmName>;
2120+
2121+
multiclass VOP3_Real_BITOP3_t16_and_fake16_gfx1250<bits<10> op, string asmName = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> {
2122+
defm _t16 : VOP3_Real_BITOP3_gfx1250<op, asmName>;
2123+
defm _fake16: VOP3_Real_BITOP3_gfx1250<op, asmName>;
2124+
}
2125+
20492126
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op, string asmName, bit isSingle = 0,
20502127
string opName = NAME> :
20512128
VOP3Dot_Realtriple<GFX11Gen, op, asmName, isSingle, opName>,

0 commit comments

Comments
 (0)