Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsPowerPC.td
Original file line number Diff line number Diff line change
Expand Up @@ -1661,6 +1661,22 @@ let TargetPrefix = "ppc" in {
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty,
llvm_v1024i1_ty], [IntrNoMem]>;

def int_ppc_mma_dmxxextfdmr512 :
DefaultAttrsIntrinsic<[llvm_v256i1_ty, llvm_v256i1_ty], [llvm_v1024i1_ty,
llvm_i32_ty], [IntrNoMem]>;

def int_ppc_mma_dmxxinstdmr512 :
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v256i1_ty,
llvm_v256i1_ty, llvm_i32_ty], [IntrNoMem]>;

def int_ppc_mma_dmxxextfdmr256 :
DefaultAttrsIntrinsic<[llvm_v256i1_ty], [llvm_v1024i1_ty, llvm_i32_ty],
[IntrNoMem]>;

def int_ppc_mma_dmxxinstdmr256 :
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v256i1_ty,
llvm_i32_ty], [IntrNoMem]>;

// MMA Reduced-Precision: Outer Product Intrinsic Definitions.
defm int_ppc_mma_xvi4ger8 :
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,10 @@ static inline bool isVFRegister(unsigned Reg) {
static inline bool isVRRegister(unsigned Reg) {
return Reg >= PPC::V0 && Reg <= PPC::V31;
}

static inline bool isDMRROWpRegister(unsigned Reg) {
return Reg >= PPC::DMRROWp0 && Reg <= PPC::DMRROWp31;
}
} // namespace PPC
} // namespace llvm

Expand Down
110 changes: 110 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11146,6 +11146,116 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getMergeValues(RetOps, dl);
}

case Intrinsic::ppc_mma_dmxxextfdmr512: {
assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
"Specify P of 0 or 1 for lower or upper 512 bytes");
unsigned HiLo = Idx->getSExtValue();
unsigned Opcode;
unsigned Subx;
if (HiLo == 0) {
Opcode = PPC::DMXXEXTFDMR512;
Subx = PPC::sub_wacc_lo;
} else {
Opcode = PPC::DMXXEXTFDMR512_HI;
Subx = PPC::sub_wacc_hi;
}
SDValue Subreg(
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
Op.getOperand(1),
DAG.getTargetConstant(Subx, dl, MVT::i32)),
0);
EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
}

case Intrinsic::ppc_mma_dmxxextfdmr256: {
assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
"Specify a dmr row pair 0-3");
unsigned IdxVal = Idx->getSExtValue();
unsigned Subx;
switch (IdxVal) {
case 0:
Subx = PPC::sub_dmrrowp0;
break;
case 1:
Subx = PPC::sub_dmrrowp1;
break;
case 2:
Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
break;
case 3:
Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
break;
}
SDValue Subreg(
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
Op.getOperand(1),
DAG.getTargetConstant(Subx, dl, MVT::i32)),
0);
SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
return SDValue(
DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
0);
}

case Intrinsic::ppc_mma_dmxxinstdmr512: {
assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
"Specify P of 0 or 1 for lower or upper 512 bytes");
unsigned HiLo = Idx->getSExtValue();
unsigned Opcode;
unsigned Subx;
if (HiLo == 0) {
Opcode = PPC::DMXXINSTDMR512;
Subx = PPC::sub_wacc_lo;
} else {
Opcode = PPC::DMXXINSTDMR512_HI;
Subx = PPC::sub_wacc_hi;
}
SDValue Ops[] = {Op.getOperand(2), Op.getOperand(3)};
SDValue Wacc = SDValue(DAG.getMachineNode(Opcode, dl, MVT::v512i1, Ops), 0);
SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
Op.getOperand(1), Wacc, SubReg),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Op.getOperand(1) is the 1024i1 operand which can be technically just the output but since it is used here by INSERT_SUBREG, then it was added as an input type in the int_ppc_mma_dmxxinstdmr512 intrinsic definition. Could we just create an IMPLICIT_DEF 1024i1 here instead as it seems we only care about its RegClass and remove 1024i1 from the input list of int_ppc_mma_dmxxinstdmr512?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We want to be able to insert both the top half and bottom half of the same object. Without an input parameter for the object, we would create two different objects when we set the upper and lower halves, with no obvious way to put them together.

0);
}

case Intrinsic::ppc_mma_dmxxinstdmr256: {
assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
"Specify a dmr row pair 0-3");
unsigned IdxVal = Idx->getSExtValue();
unsigned Subx;
switch (IdxVal) {
case 0:
Subx = PPC::sub_dmrrowp0;
break;
case 1:
Subx = PPC::sub_dmrrowp1;
break;
case 2:
Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
break;
case 3:
Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
break;
}
SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
SDValue Ops[] = {Op.getOperand(2), P};
SDValue DMRRowp = SDValue(
DAG.getMachineNode(PPC::DMXXINSTDMR256, dl, MVT::v256i1, Ops), 0);
return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
Op.getOperand(1), DMRRowp, SubReg),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as Intrinsic::ppc_mma_dmxxinstdmr512.

0);
}

case Intrinsic::ppc_mma_xxmfacc:
case Intrinsic::ppc_mma_xxmtacc: {
// Allow pre-isa-future subtargets to lower as normal.
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,12 @@ bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
assert(MO.getReg() > PPC::NoRegister &&
MO.getReg() < PPC::NUM_TARGET_REGS &&
"Invalid register for this target!");
// ISA instructions refer to the containing dmr reg.
if (PPC::isDMRROWpRegister(MO.getReg())) {
OutMO =
MCOperand::createReg(PPC::DMR0 + (MO.getReg() - PPC::DMRROWp0) / 4);
return true;
}
// Ignore all implicit register operands.
if (MO.isImplicit())
return false;
Expand Down
Loading
Loading