Skip to content

Commit a9cbd20

Browse files
committed
add builtins
1 parent e64224a commit a9cbd20

File tree

8 files changed

+158
-10
lines changed

8 files changed

+158
-10
lines changed

clang/include/clang/Basic/BuiltinsPPC.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,6 +1098,10 @@ UNALIASED_CUSTOM_BUILTIN(mma_dmmr, "vW1024*W1024*", false,
10981098
"mma,isa-future-instructions")
10991099
UNALIASED_CUSTOM_BUILTIN(mma_dmxor, "vW1024*W1024*", true,
11001100
"mma,isa-future-instructions")
1101+
UNALIASED_CUSTOM_BUILTIN(mma_disassemble_dmr, "vv*W1024*", false,
1102+
"mma,isa-future-instructions")
1103+
UNALIASED_CUSTOM_BUILTIN(mma_build_dmr, "vW1024*VVVVVVVV", false,
1104+
"mma,isa-future-instructions")
11011105

11021106
// MMA builtins with positive/negative multiply/accumulate.
11031107
UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf16ger2, "vW512*VV",

clang/lib/CodeGen/TargetBuiltins/PPC.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1152,10 +1152,15 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
11521152
CallOps.push_back(Acc);
11531153
}
11541154
if (BuiltinID == PPC::BI__builtin_mma_dmmr ||
1155-
BuiltinID == PPC::BI__builtin_mma_dmxor) {
1155+
BuiltinID == PPC::BI__builtin_mma_dmxor ||
1156+
BuiltinID == PPC::BI__builtin_mma_disassemble_dmr) {
11561157
Address Addr = EmitPointerWithAlignment(E->getArg(1));
11571158
Ops[1] = Builder.CreateLoad(Addr);
11581159
}
1160+
if (BuiltinID == PPC::BI__builtin_mma_disassemble_dmr) {
1161+
dbgs() << "&&& No disassemble!!!\n";
1162+
return Builder.CreateAlignedStore(Ops[1], Ops[0], MaybeAlign());
1163+
}
11591164
for (unsigned i=1; i<Ops.size(); i++)
11601165
CallOps.push_back(Ops[i]);
11611166
llvm::Function *F = CGM.getIntrinsic(ID);

clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -93,18 +93,36 @@ void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsi
9393
*((__dmr1024 *)resp) = vdmr;
9494
}
9595

96-
// CHECK-LABEL: @test_dmf_basic
97-
// CHECK-NEXT: entry:
98-
// CHECK-NEXT: [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
99-
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]])
100-
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr %res1, align 128
101-
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr %res2, align 128
102-
// CHECK-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr %p, align 128
103-
// CHECK-NEXT: [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]])
104-
// CHECK-NEXT: store <1024 x i1> [[TMP4]], ptr %res2, align 128
96+
// CHECK-LABEL: @test_dmf_basic(
97+
// CHECK-NEXT: entry:
98+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
99+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]])
100+
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES1:%.*]], align 128
101+
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[RES2:%.*]], align 128
102+
// CHECK-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr [[P:%.*]], align 128
103+
// CHECK-NEXT: [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]])
104+
// CHECK-NEXT: store <1024 x i1> [[TMP4]], ptr [[RES2]], align 128
105+
// CHECK-NEXT: ret void
106+
//
105107
void test_dmf_basic(char *p, char *res1, char *res2) {
106108
__dmr1024 x[2];
107109
__builtin_mma_dmsetdmrz(&x[0]);
108110
__builtin_mma_dmmr((__dmr1024*)res1, &x[0]);
109111
__builtin_mma_dmxor((__dmr1024*)res2, (__dmr1024*)p);
110112
}
113+
114+
// CHECK-LABEL: @test_dmf_basic2(
115+
// CHECK-NEXT: entry:
116+
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[V:%.*]], align 16, !tbaa [[TBAA8:![0-9]+]]
117+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]])
118+
// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RES2:%.*]], align 128
119+
// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1:%.*]], align 128
120+
// CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RES1:%.*]], align 128
121+
// CHECK-NEXT: ret void
122+
//
123+
void test_dmf_basic2(char *p1, char *res1, char *res2,
124+
vector unsigned char *v) {
125+
vector unsigned char vv = *v;
126+
__builtin_mma_build_dmr((__dmr1024*)res2, vv, vv, vv, vv, vv, vv, vv, vv);
127+
__builtin_mma_disassemble_dmr(res1, (__dmr1024*)p1);
128+
}

clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
1616
__builtin_mma_dmsetdmrz(&vdmr);
1717
__builtin_mma_dmmr(&vdmr, (__dmr1024*)vpp);
1818
__builtin_mma_dmxor(&vdmr, (__dmr1024*)vpp);
19+
__builtin_mma_build_dmr(&vdmr, vc, vc, vc, vc, vc, vc, vc, vc);
20+
__builtin_mma_disassemble_dmr(vdmrp, &vdmr);
1921

2022
// CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops
2123
// CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops
@@ -26,4 +28,6 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
2628
// CHECK: error: '__builtin_mma_dmsetdmrz' needs target feature mma,isa-future-instructions
2729
// CHECK: error: '__builtin_mma_dmmr' needs target feature mma,isa-future-instructions
2830
// CHECK: error: '__builtin_mma_dmxor' needs target feature mma,isa-future-instructions
31+
// CHECK: error: '__builtin_mma_build_dmr' needs target feature mma,isa-future-instructions
32+
// CHECK: error: '__builtin_mma_disassemble_dmr' needs target feature mma,isa-future-instructions
2933
}

llvm/include/llvm/IR/IntrinsicsPowerPC.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,6 +1701,16 @@ let TargetPrefix = "ppc" in {
17011701
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v256i1_ty,
17021702
llvm_i32_ty], [IntrNoMem]>;
17031703

1704+
def int_ppc_mma_disassemble_dmr :
1705+
DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_v1024i1_ty],
1706+
[IntrWriteMem, IntrArgMemOnly]>;
1707+
1708+
def int_ppc_mma_build_dmr :
1709+
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
1710+
llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
1711+
llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
1712+
[IntrNoMem]>;
1713+
17041714
// MMA Reduced-Precision: Outer Product Intrinsic Definitions.
17051715
defm int_ppc_mma_xvi4ger8 :
17061716
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11292,6 +11292,25 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1129211292
return DAG.getMergeValues(RetOps, dl);
1129311293
}
1129411294

11295+
case Intrinsic::ppc_mma_build_dmr: {
11296+
SmallVector<SDValue, 8> Pairs;
11297+
SmallVector<SDValue, 8> Chains;
11298+
for (int i = 1; i < 9; i += 2) {
11299+
SDValue Hi = Op.getOperand(i);
11300+
SDValue Lo = Op.getOperand(i + 1);
11301+
if (Hi->getOpcode() == ISD::LOAD)
11302+
Chains.push_back(Hi.getValue(1));
11303+
if (Lo->getOpcode() == ISD::LOAD)
11304+
Chains.push_back(Lo.getValue(1));
11305+
Pairs.push_back(DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1,
11306+
{Hi, Lo}));
11307+
}
11308+
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11309+
SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11310+
SDValue RetOps[] = {Value, TF};
11311+
return DAG.getMergeValues(RetOps, dl);
11312+
}
11313+
1129511314
case Intrinsic::ppc_mma_dmxxextfdmr512: {
1129611315
assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
1129711316
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
@@ -11628,6 +11647,11 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
1162811647
Op.getOperand(0)),
1162911648
0);
1163011649
}
11650+
case Intrinsic::ppc_mma_disassemble_dmr: {
11651+
return
11652+
DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2),
11653+
Op.getOperand(ArgStart + 1), MachinePointerInfo());
11654+
}
1163111655
default:
1163211656
break;
1163311657
}
@@ -12117,6 +12141,24 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
1211712141
return DAG.getMergeValues({DmrPValue, TF}, dl);
1211812142
}
1211912143

12144+
SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12145+
const SDLoc &dl,
12146+
SelectionDAG &DAG) const {
12147+
SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Pairs[0],
12148+
Pairs[1]),
12149+
0);
12150+
SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12151+
SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12152+
Pairs[2], Pairs[3]),
12153+
0);
12154+
SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12155+
SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12156+
const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
12157+
12158+
return
12159+
SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
12160+
}
12161+
1212012162
SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
1212112163
SelectionDAG &DAG) const {
1212212164
SDLoc dl(Op);

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,6 +1361,8 @@ namespace llvm {
13611361
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
13621362
SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;
13631363
SDValue LowerDMFVectorStore(SDValue Op, SelectionDAG &DAG) const;
1364+
SDValue DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
1365+
const SDLoc &dl, SelectionDAG &DAG) const;
13641366

13651367
SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
13661368
CallingConv::ID CallConv, bool isVarArg,

llvm/test/CodeGen/PowerPC/dmr-enable.ll

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,69 @@ entry:
367367
ret void
368368
}
369369

370+
define void @tbuild(ptr %p1, ptr %p2, ptr %res1, ptr %res2, ptr %v) {
371+
; CHECK-LABEL: tbuild:
372+
; CHECK: # %bb.0: # %entry
373+
; CHECK-NEXT: lxv v3, 0(r7)
374+
; CHECK-NEXT: vmr v2, v3
375+
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp34, 1
376+
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0
377+
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
378+
; CHECK-NEXT: stxvp vsp34, 96(r6)
379+
; CHECK-NEXT: stxvp vsp36, 64(r6)
380+
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
381+
; CHECK-NEXT: stxvp vsp34, 32(r6)
382+
; CHECK-NEXT: stxvp vsp36, 0(r6)
383+
; CHECK-NEXT: lxvp vsp34, 0(r3)
384+
; CHECK-NEXT: lxvp vsp36, 32(r3)
385+
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
386+
; CHECK-NEXT: lxvp vsp34, 64(r3)
387+
; CHECK-NEXT: lxvp vsp36, 96(r3)
388+
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
389+
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
390+
; CHECK-NEXT: stxvp vsp34, 96(r5)
391+
; CHECK-NEXT: stxvp vsp36, 64(r5)
392+
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
393+
; CHECK-NEXT: stxvp vsp34, 32(r5)
394+
; CHECK-NEXT: stxvp vsp36, 0(r5)
395+
; CHECK-NEXT: blr
396+
;
397+
; CHECK-BE-LABEL: tbuild:
398+
; CHECK-BE: # %bb.0: # %entry
399+
; CHECK-BE-NEXT: lxv v3, 0(r7)
400+
; CHECK-BE-NEXT: vmr v2, v3
401+
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp34, 1
402+
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0
403+
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
404+
; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
405+
; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
406+
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
407+
; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
408+
; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
409+
; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
410+
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
411+
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
412+
; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
413+
; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
414+
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
415+
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
416+
; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
417+
; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
418+
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
419+
; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
420+
; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
421+
; CHECK-BE-NEXT: blr
422+
entry:
423+
%0 = load <16 x i8>, ptr %v, align 16
424+
%1 = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0)
425+
store <1024 x i1> %1, ptr %res2, align 128
426+
%2 = load <1024 x i1>, ptr %p1, align 128
427+
tail call void @llvm.ppc.mma.disassemble.dmr(ptr %res1, <1024 x i1> %2)
428+
ret void
429+
}
430+
431+
declare <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
432+
declare void @llvm.ppc.mma.disassemble.dmr(ptr, <1024 x i1>)
370433
declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
371434
declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
372435
declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)

0 commit comments

Comments
 (0)