Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsPowerPC.td
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,13 @@ multiclass PowerPC_MMA_ACC_PP_Intrinsic<list<LLVMType> args> {
[IntrNoMem]>;
}

multiclass PowerPC_MMA_DMR_PP_Intrinsic<list<LLVMType> args> {
def NAME: DefaultAttrsIntrinsic<[llvm_v1024i1_ty], args, [IntrNoMem]>;
def pp : DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
!listconcat([llvm_v1024i1_ty], args),
[IntrNoMem]>;
}

//===----------------------------------------------------------------------===//
// PowerPC Altivec Intrinsic Class Definitions.
//
Expand Down Expand Up @@ -1711,6 +1718,20 @@ let TargetPrefix = "ppc" in {
[llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
defm int_ppc_mma_dmxvi8gerx4 :
PowerPC_MMA_DMR_PP_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>;
defm int_ppc_mma_pmdmxvi8gerx4 :
PowerPC_MMA_DMR_PP_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty]>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the 32-bit int args should probably be marked as ImmArg.

Copy link
Contributor Author

@maryammo maryammo Mar 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when I add the ImmArg to the intrinsics definition, the original types that make sure the mask values are fitted into 2/4/8 bits wont work.

def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>; -
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;

and an IR test like the following

declare <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1>, <256 x i1>, <16 x i8>, i32 immarg, i32 immarg, i32 immarg)

define dso_local void @test_pmdmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
entry:
  %0 = load <1024 x i1>, ptr %vop, align 64
  %1 = load <256 x i1>, ptr %vpp, align 32
  %2 = load <16 x i8>, ptr %vcp, align 32
  %3 = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> %0, <256 x i1> %1, <16 x i8> %2, i32 0, i32 0, i32 0)
  store <1024 x i1> %3, ptr %resp, align 64
  ret void
}

fails with
LLVM ERROR: Cannot select: intrinsic %llvm.ppc.mma.pmdmxvi8gerx4pp

By changing the Pat for them to use i32 instead of Msk*Imm, the IR compiles fine, however it wont have the original constraint on the masks.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm. Not sure why that is failing, but I am okay staying with your original code.

def int_ppc_mma_dmxvi8gerx4spp :
DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
[llvm_v1024i1_ty, llvm_v256i1_ty, llvm_v16i8_ty],
[IntrNoMem]>;
def int_ppc_mma_pmdmxvi8gerx4spp :
DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
[llvm_v1024i1_ty, llvm_v256i1_ty, llvm_v16i8_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ImmArg

[IntrNoMem]>;
}

// XL Compat intrinsics.
Expand Down
143 changes: 143 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,96 @@ class XForm_ATB3<bits<6> opcode, bits<5> o, bits<10> xo, dag OOL, dag IOL,
let Inst{31} = 0;
}

class XX3Form_AT3_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
string asmstr, InstrItinClass itin,
list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<3> AT;
bits<5> XAp;
bits<6> XB;

let Pattern = pattern;

let Inst{6-8} = AT;
let Inst{9-10} = 0;
let Inst{11-14} = XAp{3-0};
let Inst{15} = 0;
let Inst{16-20} = XB{4-0};
let Inst{21-28} = xo;
let Inst{29} = XAp{4};
let Inst{30} = XB{5};
let Inst{31} = 0;
}

class MMIRR_XX3Form_X8YP4_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
string asmstr, InstrItinClass itin,
list<dag> pattern>
: PI<1, opcode, OOL, IOL, asmstr, itin> {
bits<3> AT;
bits<6> XAp;
bits<6> XB;
bits<8> XMSK;
bits<4> YMSK;
bits<4> PMSK;

let Pattern = pattern;

// The prefix.
let Inst{6-7} = 3;
let Inst{8-11} = 9;
let Inst{12-15} = 0;
let Inst{16-19} = PMSK;
let Inst{20-27} = XMSK;
let Inst{28-31} = YMSK;

// The instruction.
let Inst{38-40} = AT;
let Inst{41-42} = 0;
let Inst{43-46} = XAp{3-0};
let Inst{47} = 0;
let Inst{48-52} = XB{4-0};
let Inst{53-60} = xo;
let Inst{61} = XAp{4};
let Inst{62} = XB{5};
let Inst{63} = 0;
}

multiclass DMR_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
string asmstr> {
let Predicates = [MMA, IsISAFuture] in {
def NAME :
XX3Form_AT3_XAp5B6<opcode, !or(xo, 0x01), (outs dmr:$AT), IOL,
!strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PP :
XX3Form_AT3_XAp5B6<opcode, xo, (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
!strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}

multiclass DMR_UM_M448_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
string asmstr> {
defm NAME : DMR_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
def PM#NAME :
MMIRR_XX3Form_X8YP4_XAp5B6<
opcode, !or(xo, 0x01), (outs dmr:$AT),
!con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)),
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PM#NAME#PP :
MMIRR_XX3Form_X8YP4_XAp5B6<
opcode, xo, (outs dmr:$AT),
!con((ins dmr:$ATi),
!con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))),
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}

let Predicates = [IsISAFuture] in {
def DMXXEXTFDMR512 : XX3Form_AT3_XABp5_P1<60, 226,
(outs vsrprc:$XAp, vsrprc:$XBp),
Expand Down Expand Up @@ -117,3 +207,56 @@ let Predicates = [IsISAFuture] in {
"dmsetdmrz $AT", NoItinerary,
[(set v1024i1:$AT, (int_ppc_mma_dmsetdmrz))]>;
}

// MMA+ accumulating/non-accumulating instructions.

// DMXVI8GERX4, DMXVI8GERX4PP, PMDMXVI8GERX4, PMDMXVI8GERX4PP
defm DMXVI8GERX4 : DMR_UM_M448_XOEO<59, 10, (ins vsrprc:$XAp, vsrc:$XB),
"dmxvi8gerx4", "$AT, $XAp, $XB">;

let Predicates = [MMA, IsISAFuture] in {
def DMXVI8GERX4SPP :
XX3Form_AT3_XAp5B6<59, 98, (outs dmr:$AT), (ins dmr:$ATi, vsrprc:$XAp, vsrc:$XB),
"dmxvi8gerx4spp $AT, $XAp, $XB", IIC_VecGeneral, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}

let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
def PMDMXVI8GERX4SPP :
MMIRR_XX3Form_X8YP4_XAp5B6<59, 98, (outs dmr:$AT),
(ins dmr:$ATi, vsrprc:$XAp,vsrc:$XB, u8imm:$XMSK,
u4imm:$YMSK, u4imm:$PMSK),
"pmdmxvi8gerx4spp $AT, $XAp, $XB, $XMSK, $YMSK, $PMSK",
IIC_VecGeneral, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}

// MMA+ Intrinsics
let Predicates = [MMA, IsISAFuture] in {
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4 v256i1:$XAp, v16i8:$XB)),
(DMXVI8GERX4 $XAp, RCCp.BToVSRC)>;
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
(DMXVI8GERX4PP $ATi, $XAp, RCCp.BToVSRC)>;

def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
(DMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC)>;
}

let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4 v256i1:$XAp, v16i8:$XB, Msk8Imm:$XMSK,
Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
(PMDMXVI8GERX4 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;

def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
Msk4Imm:$PMSK)),
(PMDMXVI8GERX4PP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;

def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
Msk4Imm:$PMSK)),
(PMDMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
}
7 changes: 7 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -3474,6 +3474,13 @@ class PPCAsmPseudo<string asm, dag iops>
let TSFlags{10} = MemriOp;
}

// Mask immediates for MMA instructions (2, 4 and 8 bits).
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;

def MMA : Predicate<"Subtarget->hasMMA()">;

// Prefixed instructions may require access to the above defs at a later
// time so we include this after the def.
include "PPCInstrP10.td"
Expand Down
8 changes: 0 additions & 8 deletions llvm/lib/Target/PowerPC/PPCInstrMMA.td
Original file line number Diff line number Diff line change
@@ -1,12 +1,4 @@

// Mask immediates for MMA instructions (2, 4 and 8 bits).
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;

def MMA : Predicate<"Subtarget->hasMMA()">;


// Multiclass definitions for MMA accumulator instructions.
// ----------------------------------------------------------------------------

Expand Down
Loading
Loading