Skip to content

Commit b3b8416

Browse files
committed
custom lower v1024i1 load/store
1 parent 3706dfe commit b3b8416

File tree

4 files changed

+197
-68
lines changed

4 files changed

+197
-68
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 75 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,6 +1363,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
13631363
setOperationAction(ISD::STORE, MVT::v512i1, Custom);
13641364
setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
13651365
}
1366+
if (Subtarget.isISAFuture()) {
1367+
setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1368+
setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1369+
addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1370+
}
13661371

13671372
if (Subtarget.has64BitSupport())
13681373
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
@@ -11766,9 +11771,13 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
1176611771
SDValue BasePtr = LN->getBasePtr();
1176711772
EVT VT = Op.getValueType();
1176811773

11769-
if (VT != MVT::v256i1 && VT != MVT::v512i1)
11774+
if (VT != MVT::v256i1 && VT != MVT::v512i1 && VT != MVT::v1024i1)
1177011775
return Op;
1177111776

11777+
// Used for dense math registers.
11778+
assert((VT != MVT::v1024i1 || Subtarget.isISAFuture()) &&
11779+
"Type unsupported for this processor");
11780+
1177211781
// Type v256i1 is used for pairs and v512i1 is used for accumulators.
1177311782
// Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
1177411783
// 2 or 4 vsx registers.
@@ -11796,9 +11805,36 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
1179611805
std::reverse(LoadChains.begin(), LoadChains.end());
1179711806
}
1179811807
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11799-
SDValue Value =
11800-
DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11801-
dl, VT, Loads);
11808+
SDValue Value;
11809+
if (VT == MVT::v1024i1) {
11810+
SmallVector<SDValue, 4> Pairs;
11811+
SDValue Vsx0Idx = DAG.getTargetConstant(PPC::sub_vsx0, dl, MVT::i32);
11812+
SDValue Vsx1Idx = DAG.getTargetConstant(PPC::sub_vsx1, dl, MVT::i32);
11813+
SDValue VSRpRC = DAG.getTargetConstant(PPC::VSRpRCRegClassID, dl, MVT::i32);
11814+
NumVecs >>= 1;
11815+
for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11816+
const SDValue Ops[] = {VSRpRC, Loads[Idx * 2], Vsx0Idx,
11817+
Loads[Idx * 2 + 1], Vsx1Idx};
11818+
Pairs.push_back(SDValue(
11819+
DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v256i1, Ops), 0));
11820+
}
11821+
SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTFDMR512, dl, MVT::v512i1,
11822+
Pairs[0], Pairs[1]),
11823+
0);
11824+
SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
11825+
SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTFDMR512_HI, dl, MVT::v512i1,
11826+
Pairs[2], Pairs[3]),
11827+
0);
11828+
SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
11829+
SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
11830+
const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
11831+
Value = SDValue(
11832+
DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
11833+
} else {
11834+
Value =
11835+
DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11836+
dl, VT, Loads);
11837+
}
1180211838
SDValue RetOps[] = {Value, TF};
1180311839
return DAG.getMergeValues(RetOps, dl);
1180411840
}
@@ -11810,12 +11846,17 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
1181011846
SDValue StoreChain = SN->getChain();
1181111847
SDValue BasePtr = SN->getBasePtr();
1181211848
SDValue Value = SN->getValue();
11813-
SDValue Value2 = SN->getValue();
1181411849
EVT StoreVT = Value.getValueType();
11850+
SmallVector<SDValue, 4> ValueVec;
1181511851

11816-
if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11852+
if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1 &&
11853+
StoreVT != MVT::v1024i1)
1181711854
return Op;
1181811855

11856+
// Used for dense math registers.
11857+
assert((StoreVT != MVT::v1024i1 || Subtarget.isISAFuture()) &&
11858+
"Type unsupported for this processor");
11859+
1181911860
// Type v256i1 is used for pairs and v512i1 is used for accumulators.
1182011861
// Here we create 2 or 4 v16i8 stores to store the pair or accumulator
1182111862
// underlying registers individually.
@@ -11832,20 +11873,43 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
1183211873
MachineSDNode *ExtNode = DAG.getMachineNode(
1183311874
PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
1183411875

11835-
Value = SDValue(ExtNode, 0);
11836-
Value2 = SDValue(ExtNode, 1);
11876+
ValueVec.push_back(SDValue(ExtNode, 0));
11877+
ValueVec.push_back(SDValue(ExtNode, 1));
1183711878
} else
1183811879
Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
1183911880
NumVecs = 4;
11881+
11882+
} else if (StoreVT == MVT::v1024i1) {
11883+
SDValue Lo(DAG.getMachineNode(
11884+
TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11885+
Op.getOperand(1),
11886+
DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
11887+
0);
11888+
SDValue Hi(DAG.getMachineNode(
11889+
TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11890+
Op.getOperand(1),
11891+
DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
11892+
0);
11893+
EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11894+
MachineSDNode *ExtNode =
11895+
DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
11896+
ValueVec.push_back(SDValue(ExtNode, 0));
11897+
ValueVec.push_back(SDValue(ExtNode, 1));
11898+
ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
11899+
ValueVec.push_back(SDValue(ExtNode, 0));
11900+
ValueVec.push_back(SDValue(ExtNode, 1));
11901+
NumVecs = 8;
1184011902
}
1184111903
for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
1184211904
unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
1184311905
SDValue Elt;
1184411906
if (Subtarget.isISAFuture()) {
1184511907
VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11846-
Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11847-
Idx > 1 ? Value2 : Value,
11848-
DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11908+
unsigned Pairx =
11909+
Subtarget.isLittleEndian() ? (NumVecs - Idx - 1) / 2 : Idx / 2;
11910+
Elt = DAG.getNode(
11911+
PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, ValueVec[Pairx],
11912+
DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
1184911913
} else
1185011914
Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
1185111915
DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));

llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,10 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
4646
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
4747
; CHECK-NEXT: xvf16ger2pp wacc0, v28, v30
4848
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
49-
; CHECK-NEXT: stxv v4, 48(r30)
50-
; CHECK-NEXT: stxv v5, 32(r30)
51-
; CHECK-NEXT: stxv v2, 16(r30)
52-
; CHECK-NEXT: stxv v3, 0(r30)
49+
; CHECK-NEXT: stxv v2, 48(r30)
50+
; CHECK-NEXT: stxv v3, 32(r30)
51+
; CHECK-NEXT: stxv v4, 16(r30)
52+
; CHECK-NEXT: stxv v5, 0(r30)
5353
; CHECK-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload
5454
; CHECK-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload
5555
; CHECK-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload

llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ define void @ass_acc(ptr %ptr, <16 x i8> %vc) {
3131
; CHECK-NEXT: vmr v3, v2
3232
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp34, 0
3333
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
34-
; CHECK-NEXT: stxv v4, 48(r3)
35-
; CHECK-NEXT: stxv v5, 32(r3)
36-
; CHECK-NEXT: stxv v2, 16(r3)
37-
; CHECK-NEXT: stxv v3, 0(r3)
34+
; CHECK-NEXT: stxv v2, 48(r3)
35+
; CHECK-NEXT: stxv v3, 32(r3)
36+
; CHECK-NEXT: stxv v4, 16(r3)
37+
; CHECK-NEXT: stxv v5, 0(r3)
3838
; CHECK-NEXT: blr
3939
;
4040
; CHECK-BE-LABEL: ass_acc:
@@ -55,7 +55,7 @@ define void @ass_acc(ptr %ptr, <16 x i8> %vc) {
5555
; CHECK-O0-NEXT: vmr v3, v4
5656
; CHECK-O0-NEXT: vmr v2, v4
5757
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp34, 0
58-
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
58+
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
5959
; CHECK-O0-NEXT: xxlor vs0, v4, v4
6060
; CHECK-O0-NEXT: stxv vs0, 48(r3)
6161
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -121,10 +121,10 @@ define void @ld_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
121121
; CHECK-NEXT: lxv v4, 48(r3)
122122
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
123123
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
124-
; CHECK-NEXT: stxv v4, 48(r7)
125-
; CHECK-NEXT: stxv v5, 32(r7)
126-
; CHECK-NEXT: stxv v2, 16(r7)
127-
; CHECK-NEXT: stxv v3, 0(r7)
124+
; CHECK-NEXT: stxv v2, 48(r7)
125+
; CHECK-NEXT: stxv v3, 32(r7)
126+
; CHECK-NEXT: stxv v4, 16(r7)
127+
; CHECK-NEXT: stxv v5, 0(r7)
128128
; CHECK-NEXT: blr
129129
;
130130
; CHECK-BE-LABEL: ld_st_xxmtacc:
@@ -154,7 +154,7 @@ define void @ld_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
154154
; CHECK-O0-NEXT: lxv vs0, 48(r3)
155155
; CHECK-O0-NEXT: xxlor v2, vs0, vs0
156156
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
157-
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
157+
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
158158
; CHECK-O0-NEXT: xxlor vs0, v4, v4
159159
; CHECK-O0-NEXT: stxv vs0, 48(r7)
160160
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -236,10 +236,10 @@ define void @ld_op_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
236236
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp32, vsp36, 0
237237
; CHECK-NEXT: xvi4ger8pp wacc0, v2, v2
238238
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
239-
; CHECK-NEXT: stxv v4, 48(r7)
240-
; CHECK-NEXT: stxv v5, 32(r7)
241-
; CHECK-NEXT: stxv v2, 16(r7)
242-
; CHECK-NEXT: stxv v3, 0(r7)
239+
; CHECK-NEXT: stxv v2, 48(r7)
240+
; CHECK-NEXT: stxv v3, 32(r7)
241+
; CHECK-NEXT: stxv v4, 16(r7)
242+
; CHECK-NEXT: stxv v5, 0(r7)
243243
; CHECK-NEXT: blr
244244
;
245245
; CHECK-BE-LABEL: ld_op_st_xxmtacc:
@@ -271,7 +271,7 @@ define void @ld_op_st_xxmtacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
271271
; CHECK-O0-NEXT: xxlor v4, vs0, vs0
272272
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp32, 0
273273
; CHECK-O0-NEXT: xvi4ger8pp wacc0, v2, v2
274-
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
274+
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
275275
; CHECK-O0-NEXT: xxlor vs0, v4, v4
276276
; CHECK-O0-NEXT: stxv vs0, 48(r7)
277277
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -356,14 +356,14 @@ define void @ld_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
356356
; CHECK-NEXT: lxv v4, 48(r3)
357357
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
358358
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
359-
; CHECK-NEXT: stxv v4, 48(r3)
360-
; CHECK-NEXT: stxv v5, 32(r3)
361-
; CHECK-NEXT: stxv v2, 16(r3)
362-
; CHECK-NEXT: stxv v3, 0(r3)
363-
; CHECK-NEXT: stxv v4, 48(r7)
364-
; CHECK-NEXT: stxv v5, 32(r7)
365-
; CHECK-NEXT: stxv v2, 16(r7)
366-
; CHECK-NEXT: stxv v3, 0(r7)
359+
; CHECK-NEXT: stxv v2, 48(r3)
360+
; CHECK-NEXT: stxv v3, 32(r3)
361+
; CHECK-NEXT: stxv v4, 16(r3)
362+
; CHECK-NEXT: stxv v5, 0(r3)
363+
; CHECK-NEXT: stxv v2, 48(r7)
364+
; CHECK-NEXT: stxv v3, 32(r7)
365+
; CHECK-NEXT: stxv v4, 16(r7)
366+
; CHECK-NEXT: stxv v5, 0(r7)
367367
; CHECK-NEXT: blr
368368
;
369369
; CHECK-BE-LABEL: ld_st_xxmfacc:
@@ -397,7 +397,7 @@ define void @ld_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
397397
; CHECK-O0-NEXT: lxv vs0, 48(r3)
398398
; CHECK-O0-NEXT: xxlor v2, vs0, vs0
399399
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp34, vsp36, 0
400-
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
400+
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
401401
; CHECK-O0-NEXT: xxlor vs3, v4, v4
402402
; CHECK-O0-NEXT: stxv vs3, 48(r3)
403403
; CHECK-O0-NEXT: xxlor vs2, v5, v5
@@ -496,10 +496,10 @@ define void @ld_op_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
496496
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp32, vsp36, 0
497497
; CHECK-NEXT: xvi4ger8pp wacc0, v2, v2
498498
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
499-
; CHECK-NEXT: stxv v4, 48(r7)
500-
; CHECK-NEXT: stxv v5, 32(r7)
501-
; CHECK-NEXT: stxv v2, 16(r7)
502-
; CHECK-NEXT: stxv v3, 0(r7)
499+
; CHECK-NEXT: stxv v2, 48(r7)
500+
; CHECK-NEXT: stxv v3, 32(r7)
501+
; CHECK-NEXT: stxv v4, 16(r7)
502+
; CHECK-NEXT: stxv v5, 0(r7)
503503
; CHECK-NEXT: blr
504504
;
505505
; CHECK-BE-LABEL: ld_op_st_xxmfacc:
@@ -531,7 +531,7 @@ define void @ld_op_st_xxmfacc(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) {
531531
; CHECK-O0-NEXT: xxlor v4, vs0, vs0
532532
; CHECK-O0-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp32, 0
533533
; CHECK-O0-NEXT: xvi4ger8pp wacc0, v2, v2
534-
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
534+
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
535535
; CHECK-O0-NEXT: xxlor vs0, v4, v4
536536
; CHECK-O0-NEXT: stxv vs0, 48(r7)
537537
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -621,10 +621,10 @@ define void @cmplx_xxmacc(ptr %ptr1, ptr %ptr2, <16 x i8> %vc1, <16 x i8> %vc2)
621621
; CHECK-NEXT: xvf64gerpp wacc0, vsp34, v5
622622
; CHECK-NEXT: xvf64gerpp wacc0, vsp36, v4
623623
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
624-
; CHECK-NEXT: stxv v4, 48(r3)
625-
; CHECK-NEXT: stxv v5, 32(r3)
626-
; CHECK-NEXT: stxv v2, 16(r3)
627-
; CHECK-NEXT: stxv v3, 0(r3)
624+
; CHECK-NEXT: stxv v2, 48(r3)
625+
; CHECK-NEXT: stxv v3, 32(r3)
626+
; CHECK-NEXT: stxv v4, 16(r3)
627+
; CHECK-NEXT: stxv v5, 0(r3)
628628
; CHECK-NEXT: blr
629629
;
630630
; CHECK-BE-LABEL: cmplx_xxmacc:
@@ -673,7 +673,7 @@ define void @cmplx_xxmacc(ptr %ptr1, ptr %ptr2, <16 x i8> %vc1, <16 x i8> %vc2)
673673
; CHECK-O0-NEXT: xvf64gerpp wacc0, vsp32, vs0
674674
; CHECK-O0-NEXT: xxlor vs0, v4, v4
675675
; CHECK-O0-NEXT: xvf64gerpp wacc0, vsp34, vs0
676-
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
676+
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
677677
; CHECK-O0-NEXT: xxlor vs0, v4, v4
678678
; CHECK-O0-NEXT: stxv vs0, 48(r3)
679679
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -783,10 +783,10 @@ define void @int_xxsetaccz(ptr %ptr) {
783783
; CHECK: # %bb.0: # %entry
784784
; CHECK-NEXT: xxsetaccz wacc0
785785
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
786-
; CHECK-NEXT: stxv v4, 48(r3)
787-
; CHECK-NEXT: stxv v5, 32(r3)
788-
; CHECK-NEXT: stxv v2, 16(r3)
789-
; CHECK-NEXT: stxv v3, 0(r3)
786+
; CHECK-NEXT: stxv v2, 48(r3)
787+
; CHECK-NEXT: stxv v3, 32(r3)
788+
; CHECK-NEXT: stxv v4, 16(r3)
789+
; CHECK-NEXT: stxv v5, 0(r3)
790790
; CHECK-NEXT: blr
791791
;
792792
; CHECK-BE-LABEL: int_xxsetaccz:
@@ -802,7 +802,7 @@ define void @int_xxsetaccz(ptr %ptr) {
802802
; CHECK-O0-LABEL: int_xxsetaccz:
803803
; CHECK-O0: # %bb.0: # %entry
804804
; CHECK-O0-NEXT: xxsetaccz wacc0
805-
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
805+
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
806806
; CHECK-O0-NEXT: xxlor vs0, v4, v4
807807
; CHECK-O0-NEXT: stxv vs0, 48(r3)
808808
; CHECK-O0-NEXT: xxlor vs0, v5, v5
@@ -946,14 +946,14 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
946946
; CHECK-NEXT: xxsetaccz wacc0
947947
; CHECK-NEXT: xvf32gerpp wacc0, v2, v2
948948
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
949-
; CHECK-NEXT: stxv v4, 48(r3)
950-
; CHECK-NEXT: stxv v5, 32(r3)
951-
; CHECK-NEXT: stxv v2, 16(r3)
952-
; CHECK-NEXT: stxv v3, 0(r3)
953-
; CHECK-NEXT: stxv v4, 112(r3)
954-
; CHECK-NEXT: stxv v5, 96(r3)
955-
; CHECK-NEXT: stxv v2, 80(r3)
956-
; CHECK-NEXT: stxv v3, 64(r3)
949+
; CHECK-NEXT: stxv v2, 48(r3)
950+
; CHECK-NEXT: stxv v3, 32(r3)
951+
; CHECK-NEXT: stxv v4, 16(r3)
952+
; CHECK-NEXT: stxv v5, 0(r3)
953+
; CHECK-NEXT: stxv v2, 112(r3)
954+
; CHECK-NEXT: stxv v3, 96(r3)
955+
; CHECK-NEXT: stxv v4, 80(r3)
956+
; CHECK-NEXT: stxv v5, 64(r3)
957957
; CHECK-NEXT: blr
958958
;
959959
; CHECK-BE-LABEL: testcse:
@@ -975,7 +975,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
975975
; CHECK-O0: # %bb.0: # %entry
976976
; CHECK-O0-NEXT: xxsetaccz wacc0
977977
; CHECK-O0-NEXT: xvf32gerpp wacc0, v2, v2
978-
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
978+
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
979979
; CHECK-O0-NEXT: xxlor vs3, v4, v4
980980
; CHECK-O0-NEXT: stxv vs3, 48(r3)
981981
; CHECK-O0-NEXT: xxlor vs2, v5, v5
@@ -1065,10 +1065,10 @@ define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, p
10651065
; CHECK-NEXT: plxvp vsp36, 8(r4), 0
10661066
; CHECK-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0
10671067
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
1068-
; CHECK-NEXT: stxv v4, 48(r7)
1069-
; CHECK-NEXT: stxv v5, 32(r7)
1070-
; CHECK-NEXT: stxv v2, 16(r7)
1071-
; CHECK-NEXT: stxv v3, 0(r7)
1068+
; CHECK-NEXT: stxv v2, 48(r7)
1069+
; CHECK-NEXT: stxv v3, 32(r7)
1070+
; CHECK-NEXT: stxv v4, 16(r7)
1071+
; CHECK-NEXT: stxv v5, 0(r7)
10721072
; CHECK-NEXT: blr
10731073
;
10741074
; CHECK-BE-LABEL: test_ldst_1:
@@ -1104,7 +1104,7 @@ define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, p
11041104
; CHECK-O0-NEXT: plxvp vsp34, 8(r4), 0
11051105
; CHECK-O0-NEXT: xxlor vs0, v4, v4
11061106
; CHECK-O0-NEXT: pmxvf64gernn wacc0, vsp34, vs0, 0, 0
1107-
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
1107+
; CHECK-O0-NEXT: dmxxextfdmr512 wacc0, vsp36, vsp34, 0
11081108
; CHECK-O0-NEXT: xxlor vs0, v4, v4
11091109
; CHECK-O0-NEXT: stxv vs0, 48(r7)
11101110
; CHECK-O0-NEXT: xxlor vs0, v5, v5

0 commit comments

Comments
 (0)