Skip to content

Commit bff468f

Browse files
committed
add check Subtarget hasVSX check
1 parent 46d907a commit bff468f

File tree

3 files changed

+91
-203
lines changed

3 files changed

+91
-203
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 53 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -15556,60 +15556,62 @@ SDValue PPCTargetLowering::combineSetCC(SDNode *N,
1555615556
SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
1555715557
return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
1555815558
}
15559+
if (Subtarget.hasVSX()) {
15560+
if (LHS.getOpcode() == ISD::LOAD && RHS.getOpcode() == ISD::LOAD &&
15561+
LHS.hasOneUse() && RHS.hasOneUse() &&
15562+
LHS.getValueType() == MVT::i128 && RHS.getValueType() == MVT::i128) {
15563+
SDLoc DL(N);
15564+
SelectionDAG &DAG = DCI.DAG;
15565+
auto *LA = dyn_cast<LoadSDNode>(LHS);
15566+
auto *LB = dyn_cast<LoadSDNode>(RHS);
15567+
if (!LA || !LB)
15568+
return SDValue();
1555915569

15560-
if (LHS.getOpcode() == ISD::LOAD && RHS.getOpcode() == ISD::LOAD &&
15561-
LHS.hasOneUse() && RHS.hasOneUse() && LHS.getValueType() == MVT::i128 &&
15562-
RHS.getValueType() == MVT::i128) {
15563-
SDLoc DL(N);
15564-
SelectionDAG &DAG = DCI.DAG;
15565-
auto *LA = dyn_cast<LoadSDNode>(LHS);
15566-
auto *LB = dyn_cast<LoadSDNode>(RHS);
15567-
if (!LA || !LB)
15568-
return SDValue();
15569-
15570-
// If either memory operation (LA or LB) is volatile, do not perform any
15571-
// optimization or transformation. Volatile operations must be preserved
15572-
// as written to ensure correct program behavior, so we return an empty
15573-
// SDValue to indicate no action.
15574-
if (LA->isVolatile() || LB->isVolatile())
15575-
return SDValue();
15570+
// If either memory operation (LA or LB) is volatile, do not perform any
15571+
// optimization or transformation. Volatile operations must be preserved
15572+
// as written to ensure correct program behavior, so we return an empty
15573+
// SDValue to indicate no action.
15574+
if (LA->isVolatile() || LB->isVolatile())
15575+
return SDValue();
1557615576

15577-
// Only combine loads if both use the unindexed addressing mode.
15578-
// PowerPC AltiVec/VMX does not support vector loads or stores with
15579-
// pre/post-increment addressing. Indexed modes may imply implicit pointer
15580-
// updates, which are not compatible with AltiVec vector instructions.
15581-
if (LA->getAddressingMode() != ISD::UNINDEXED ||
15582-
LB->getAddressingMode() != ISD::UNINDEXED)
15583-
return SDValue();
15577+
// Only combine loads if both use the unindexed addressing mode.
15578+
// PowerPC AltiVec/VMX does not support vector loads or stores with
15579+
// pre/post-increment addressing. Indexed modes may imply implicit
15580+
// pointer updates, which are not compatible with AltiVec vector
15581+
// instructions.
15582+
if (LA->getAddressingMode() != ISD::UNINDEXED ||
15583+
LB->getAddressingMode() != ISD::UNINDEXED)
15584+
return SDValue();
1558415585

15585-
// Only combine loads if both are non-extending loads (ISD::NON_EXTLOAD).
15586-
// Extending loads (such as ISD::ZEXTLOAD or ISD::SEXTLOAD) perform zero
15587-
// or sign extension, which may change the loaded value's semantics and
15588-
// are not compatible with vector loads.
15589-
if (LA->getExtensionType() != ISD::NON_EXTLOAD ||
15590-
LB->getExtensionType() != ISD::NON_EXTLOAD)
15591-
return SDValue();
15592-
// Build new v16i8 loads using the same chain/base/MMO (no extra memory
15593-
// op).
15594-
SDValue LHSVec = DAG.getLoad(MVT::v16i8, DL, LA->getChain(),
15595-
LA->getBasePtr(), LA->getMemOperand());
15596-
SDValue RHSVec = DAG.getLoad(MVT::v16i8, DL, LB->getChain(),
15597-
LB->getBasePtr(), LB->getMemOperand());
15598-
15599-
SDValue IntrID =
15600-
DAG.getTargetConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL,
15601-
Subtarget.isPPC64() ? MVT::i64 : MVT::i32);
15602-
SDValue CRSel =
15603-
DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
15604-
SDValue Ops[] = {IntrID, CRSel, LHSVec, RHSVec};
15605-
SDValue PredResult =
15606-
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, Ops);
15607-
15608-
// ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15609-
// so we need to invert the CC opcode.
15610-
return DAG.getSetCC(DL, N->getValueType(0), PredResult,
15611-
DAG.getConstant(0, DL, MVT::i32),
15612-
CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15586+
// Only combine loads if both are non-extending loads
15587+
// (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15588+
// ISD::SEXTLOAD) perform zero or sign extension, which may change the
15589+
// loaded value's semantics and are not compatible with vector loads.
15590+
if (LA->getExtensionType() != ISD::NON_EXTLOAD ||
15591+
LB->getExtensionType() != ISD::NON_EXTLOAD)
15592+
return SDValue();
15593+
// Build new v16i8 loads using the same chain/base/MMO (no extra memory
15594+
// op).
15595+
SDValue LHSVec = DAG.getLoad(MVT::v16i8, DL, LA->getChain(),
15596+
LA->getBasePtr(), LA->getMemOperand());
15597+
SDValue RHSVec = DAG.getLoad(MVT::v16i8, DL, LB->getChain(),
15598+
LB->getBasePtr(), LB->getMemOperand());
15599+
15600+
SDValue IntrID =
15601+
DAG.getTargetConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL,
15602+
Subtarget.isPPC64() ? MVT::i64 : MVT::i32);
15603+
SDValue CRSel =
15604+
DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
15605+
SDValue Ops[] = {IntrID, CRSel, LHSVec, RHSVec};
15606+
SDValue PredResult =
15607+
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, Ops);
15608+
15609+
// ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15610+
// so we need to invert the CC opcode.
15611+
return DAG.getSetCC(DL, N->getValueType(0), PredResult,
15612+
DAG.getConstant(0, DL, MVT::i32),
15613+
CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15614+
}
1561315615
}
1561415616
}
1561515617

llvm/test/CodeGen/PowerPC/memcmp32_fixsize.ll

Lines changed: 20 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -14,110 +14,38 @@
1414
define dso_local signext range(i32 0, 2) i32 @cmpeq16(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) {
1515
; CHECK-AIX32-P8-LABEL: cmpeq16:
1616
; CHECK-AIX32-P8: # %bb.0: # %entry
17-
; CHECK-AIX32-P8-NEXT: lwz r5, 4(r3)
18-
; CHECK-AIX32-P8-NEXT: lwz r6, 0(r3)
19-
; CHECK-AIX32-P8-NEXT: lwz r7, 4(r4)
20-
; CHECK-AIX32-P8-NEXT: lwz r8, 0(r4)
21-
; CHECK-AIX32-P8-NEXT: xor r6, r6, r8
22-
; CHECK-AIX32-P8-NEXT: xor r5, r5, r7
23-
; CHECK-AIX32-P8-NEXT: or. r5, r5, r6
24-
; CHECK-AIX32-P8-NEXT: bne cr0, L..BB0_2
25-
; CHECK-AIX32-P8-NEXT: # %bb.1: # %loadbb1
26-
; CHECK-AIX32-P8-NEXT: lwz r5, 12(r3)
27-
; CHECK-AIX32-P8-NEXT: lwz r3, 8(r3)
28-
; CHECK-AIX32-P8-NEXT: lwz r6, 12(r4)
29-
; CHECK-AIX32-P8-NEXT: lwz r4, 8(r4)
30-
; CHECK-AIX32-P8-NEXT: xor r3, r3, r4
31-
; CHECK-AIX32-P8-NEXT: xor r4, r5, r6
32-
; CHECK-AIX32-P8-NEXT: or. r3, r4, r3
33-
; CHECK-AIX32-P8-NEXT: li r3, 0
34-
; CHECK-AIX32-P8-NEXT: beq cr0, L..BB0_3
35-
; CHECK-AIX32-P8-NEXT: L..BB0_2: # %res_block
36-
; CHECK-AIX32-P8-NEXT: li r3, 1
37-
; CHECK-AIX32-P8-NEXT: L..BB0_3: # %endblock
38-
; CHECK-AIX32-P8-NEXT: cntlzw r3, r3
39-
; CHECK-AIX32-P8-NEXT: rlwinm r3, r3, 27, 31, 31
17+
; CHECK-AIX32-P8-NEXT: lxvw4x vs34, 0, r4
18+
; CHECK-AIX32-P8-NEXT: lxvw4x vs35, 0, r3
19+
; CHECK-AIX32-P8-NEXT: vcmpequb. v2, v3, v2
20+
; CHECK-AIX32-P8-NEXT: mfocrf r3, 2
21+
; CHECK-AIX32-P8-NEXT: rlwinm r3, r3, 25, 31, 31
4022
; CHECK-AIX32-P8-NEXT: blr
4123
;
4224
; CHECK-AIX32-P10-LABEL: cmpeq16:
4325
; CHECK-AIX32-P10: # %bb.0: # %entry
44-
; CHECK-AIX32-P10-NEXT: lwz r5, 4(r3)
45-
; CHECK-AIX32-P10-NEXT: lwz r6, 0(r3)
46-
; CHECK-AIX32-P10-NEXT: lwz r7, 4(r4)
47-
; CHECK-AIX32-P10-NEXT: xor r5, r5, r7
48-
; CHECK-AIX32-P10-NEXT: lwz r8, 0(r4)
49-
; CHECK-AIX32-P10-NEXT: xor r6, r6, r8
50-
; CHECK-AIX32-P10-NEXT: or. r5, r5, r6
51-
; CHECK-AIX32-P10-NEXT: bne cr0, L..BB0_2
52-
; CHECK-AIX32-P10-NEXT: # %bb.1: # %loadbb1
53-
; CHECK-AIX32-P10-NEXT: lwz r5, 12(r3)
54-
; CHECK-AIX32-P10-NEXT: lwz r3, 8(r3)
55-
; CHECK-AIX32-P10-NEXT: lwz r6, 12(r4)
56-
; CHECK-AIX32-P10-NEXT: lwz r4, 8(r4)
57-
; CHECK-AIX32-P10-NEXT: xor r3, r3, r4
58-
; CHECK-AIX32-P10-NEXT: xor r4, r5, r6
59-
; CHECK-AIX32-P10-NEXT: or. r3, r4, r3
60-
; CHECK-AIX32-P10-NEXT: li r3, 0
61-
; CHECK-AIX32-P10-NEXT: beq cr0, L..BB0_3
62-
; CHECK-AIX32-P10-NEXT: L..BB0_2: # %res_block
63-
; CHECK-AIX32-P10-NEXT: li r3, 1
64-
; CHECK-AIX32-P10-NEXT: L..BB0_3: # %endblock
65-
; CHECK-AIX32-P10-NEXT: cntlzw r3, r3
66-
; CHECK-AIX32-P10-NEXT: rlwinm r3, r3, 27, 31, 31
26+
; CHECK-AIX32-P10-NEXT: lxv vs34, 0(r4)
27+
; CHECK-AIX32-P10-NEXT: lxv vs35, 0(r3)
28+
; CHECK-AIX32-P10-NEXT: vcmpequb. v2, v3, v2
29+
; CHECK-AIX32-P10-NEXT: setbc r3, 4*cr6+lt
6730
; CHECK-AIX32-P10-NEXT: blr
6831
;
6932
; CHECK-LINUX32-P8-LABEL: cmpeq16:
7033
; CHECK-LINUX32-P8: # %bb.0: # %entry
71-
; CHECK-LINUX32-P8-NEXT: lwz r5, 0(r3)
72-
; CHECK-LINUX32-P8-NEXT: lwz r6, 4(r3)
73-
; CHECK-LINUX32-P8-NEXT: lwz r7, 0(r4)
74-
; CHECK-LINUX32-P8-NEXT: lwz r8, 4(r4)
75-
; CHECK-LINUX32-P8-NEXT: xor r6, r6, r8
76-
; CHECK-LINUX32-P8-NEXT: xor r5, r5, r7
77-
; CHECK-LINUX32-P8-NEXT: or. r5, r5, r6
78-
; CHECK-LINUX32-P8-NEXT: bne cr0, .LBB0_2
79-
; CHECK-LINUX32-P8-NEXT: # %bb.1: # %loadbb1
80-
; CHECK-LINUX32-P8-NEXT: lwz r5, 8(r3)
81-
; CHECK-LINUX32-P8-NEXT: lwz r3, 12(r3)
82-
; CHECK-LINUX32-P8-NEXT: lwz r6, 8(r4)
83-
; CHECK-LINUX32-P8-NEXT: lwz r4, 12(r4)
84-
; CHECK-LINUX32-P8-NEXT: xor r3, r3, r4
85-
; CHECK-LINUX32-P8-NEXT: xor r4, r5, r6
86-
; CHECK-LINUX32-P8-NEXT: or. r3, r4, r3
87-
; CHECK-LINUX32-P8-NEXT: li r3, 0
88-
; CHECK-LINUX32-P8-NEXT: beq cr0, .LBB0_3
89-
; CHECK-LINUX32-P8-NEXT: .LBB0_2: # %res_block
90-
; CHECK-LINUX32-P8-NEXT: li r3, 1
91-
; CHECK-LINUX32-P8-NEXT: .LBB0_3: # %endblock
92-
; CHECK-LINUX32-P8-NEXT: cntlzw r3, r3
93-
; CHECK-LINUX32-P8-NEXT: rlwinm r3, r3, 27, 31, 31
34+
; CHECK-LINUX32-P8-NEXT: lxvd2x vs0, 0, r4
35+
; CHECK-LINUX32-P8-NEXT: xxswapd vs34, vs0
36+
; CHECK-LINUX32-P8-NEXT: lxvd2x vs0, 0, r3
37+
; CHECK-LINUX32-P8-NEXT: xxswapd vs35, vs0
38+
; CHECK-LINUX32-P8-NEXT: vcmpequb. v2, v3, v2
39+
; CHECK-LINUX32-P8-NEXT: mfocrf r3, 2
40+
; CHECK-LINUX32-P8-NEXT: rlwinm r3, r3, 25, 31, 31
9441
; CHECK-LINUX32-P8-NEXT: blr
9542
;
9643
; CHECK-LINUX32-P10-LABEL: cmpeq16:
9744
; CHECK-LINUX32-P10: # %bb.0: # %entry
98-
; CHECK-LINUX32-P10-NEXT: lwz r5, 0(r3)
99-
; CHECK-LINUX32-P10-NEXT: lwz r6, 4(r3)
100-
; CHECK-LINUX32-P10-NEXT: lwz r7, 0(r4)
101-
; CHECK-LINUX32-P10-NEXT: xor r5, r5, r7
102-
; CHECK-LINUX32-P10-NEXT: lwz r8, 4(r4)
103-
; CHECK-LINUX32-P10-NEXT: xor r6, r6, r8
104-
; CHECK-LINUX32-P10-NEXT: or. r5, r5, r6
105-
; CHECK-LINUX32-P10-NEXT: bne cr0, .LBB0_2
106-
; CHECK-LINUX32-P10-NEXT: # %bb.1: # %loadbb1
107-
; CHECK-LINUX32-P10-NEXT: lwz r5, 8(r3)
108-
; CHECK-LINUX32-P10-NEXT: lwz r3, 12(r3)
109-
; CHECK-LINUX32-P10-NEXT: lwz r6, 8(r4)
110-
; CHECK-LINUX32-P10-NEXT: lwz r4, 12(r4)
111-
; CHECK-LINUX32-P10-NEXT: xor r3, r3, r4
112-
; CHECK-LINUX32-P10-NEXT: xor r4, r5, r6
113-
; CHECK-LINUX32-P10-NEXT: or. r3, r4, r3
114-
; CHECK-LINUX32-P10-NEXT: li r3, 0
115-
; CHECK-LINUX32-P10-NEXT: beq cr0, .LBB0_3
116-
; CHECK-LINUX32-P10-NEXT: .LBB0_2: # %res_block
117-
; CHECK-LINUX32-P10-NEXT: li r3, 1
118-
; CHECK-LINUX32-P10-NEXT: .LBB0_3: # %endblock
119-
; CHECK-LINUX32-P10-NEXT: cntlzw r3, r3
120-
; CHECK-LINUX32-P10-NEXT: rlwinm r3, r3, 27, 31, 31
45+
; CHECK-LINUX32-P10-NEXT: lxv vs34, 0(r4)
46+
; CHECK-LINUX32-P10-NEXT: lxv vs35, 0(r3)
47+
; CHECK-LINUX32-P10-NEXT: vcmpequb. v2, v3, v2
48+
; CHECK-LINUX32-P10-NEXT: setbc r3, 4*cr6+lt
12149
; CHECK-LINUX32-P10-NEXT: blr
12250
entry:
12351
%bcmp = tail call i32 @bcmp(ptr noundef nonnull dereferenceable(16) %a, ptr noundef nonnull dereferenceable(16) %b, i32 16)

llvm/test/CodeGen/PowerPC/memcmp64_fixsize.ll

Lines changed: 18 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -14,78 +14,36 @@
1414
define dso_local signext range(i32 0, 2) i32 @cmpeq16(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b) {
1515
; CHECK-AIX64-32-P8-LABEL: cmpeq16:
1616
; CHECK-AIX64-32-P8: # %bb.0: # %entry
17-
; CHECK-AIX64-32-P8-NEXT: ld r5, 0(r3)
18-
; CHECK-AIX64-32-P8-NEXT: ld r6, 0(r4)
19-
; CHECK-AIX64-32-P8-NEXT: cmpld r5, r6
20-
; CHECK-AIX64-32-P8-NEXT: bne cr0, L..BB0_2
21-
; CHECK-AIX64-32-P8-NEXT: # %bb.1: # %loadbb1
22-
; CHECK-AIX64-32-P8-NEXT: ld r5, 8(r3)
23-
; CHECK-AIX64-32-P8-NEXT: ld r4, 8(r4)
24-
; CHECK-AIX64-32-P8-NEXT: li r3, 0
25-
; CHECK-AIX64-32-P8-NEXT: cmpld r5, r4
26-
; CHECK-AIX64-32-P8-NEXT: beq cr0, L..BB0_3
27-
; CHECK-AIX64-32-P8-NEXT: L..BB0_2: # %res_block
28-
; CHECK-AIX64-32-P8-NEXT: li r3, 1
29-
; CHECK-AIX64-32-P8-NEXT: L..BB0_3: # %endblock
30-
; CHECK-AIX64-32-P8-NEXT: cntlzw r3, r3
31-
; CHECK-AIX64-32-P8-NEXT: srwi r3, r3, 5
17+
; CHECK-AIX64-32-P8-NEXT: lxvw4x vs34, 0, r4
18+
; CHECK-AIX64-32-P8-NEXT: lxvw4x vs35, 0, r3
19+
; CHECK-AIX64-32-P8-NEXT: vcmpequb. v2, v3, v2
20+
; CHECK-AIX64-32-P8-NEXT: mfocrf r3, 2
21+
; CHECK-AIX64-32-P8-NEXT: rlwinm r3, r3, 25, 31, 31
3222
; CHECK-AIX64-32-P8-NEXT: blr
3323
;
3424
; CHECK-AIX64-32-P10-LABEL: cmpeq16:
3525
; CHECK-AIX64-32-P10: # %bb.0: # %entry
36-
; CHECK-AIX64-32-P10-NEXT: ld r5, 0(r3)
37-
; CHECK-AIX64-32-P10-NEXT: ld r6, 0(r4)
38-
; CHECK-AIX64-32-P10-NEXT: cmpld r5, r6
39-
; CHECK-AIX64-32-P10-NEXT: bne cr0, L..BB0_2
40-
; CHECK-AIX64-32-P10-NEXT: # %bb.1: # %loadbb1
41-
; CHECK-AIX64-32-P10-NEXT: ld r5, 8(r3)
42-
; CHECK-AIX64-32-P10-NEXT: ld r4, 8(r4)
43-
; CHECK-AIX64-32-P10-NEXT: li r3, 0
44-
; CHECK-AIX64-32-P10-NEXT: cmpld r5, r4
45-
; CHECK-AIX64-32-P10-NEXT: beq cr0, L..BB0_3
46-
; CHECK-AIX64-32-P10-NEXT: L..BB0_2: # %res_block
47-
; CHECK-AIX64-32-P10-NEXT: li r3, 1
48-
; CHECK-AIX64-32-P10-NEXT: L..BB0_3: # %endblock
49-
; CHECK-AIX64-32-P10-NEXT: cntlzw r3, r3
50-
; CHECK-AIX64-32-P10-NEXT: rlwinm r3, r3, 27, 31, 31
26+
; CHECK-AIX64-32-P10-NEXT: lxv vs34, 0(r4)
27+
; CHECK-AIX64-32-P10-NEXT: lxv vs35, 0(r3)
28+
; CHECK-AIX64-32-P10-NEXT: vcmpequb. v2, v3, v2
29+
; CHECK-AIX64-32-P10-NEXT: setbc r3, 4*cr6+lt
5130
; CHECK-AIX64-32-P10-NEXT: blr
5231
;
5332
; CHECK-LINUX64-P8-LABEL: cmpeq16:
5433
; CHECK-LINUX64-P8: # %bb.0: # %entry
55-
; CHECK-LINUX64-P8-NEXT: ld r5, 0(r3)
56-
; CHECK-LINUX64-P8-NEXT: ld r6, 0(r4)
57-
; CHECK-LINUX64-P8-NEXT: cmpld r5, r6
58-
; CHECK-LINUX64-P8-NEXT: bne cr0, .LBB0_2
59-
; CHECK-LINUX64-P8-NEXT: # %bb.1: # %loadbb1
60-
; CHECK-LINUX64-P8-NEXT: ld r5, 8(r3)
61-
; CHECK-LINUX64-P8-NEXT: ld r4, 8(r4)
62-
; CHECK-LINUX64-P8-NEXT: li r3, 0
63-
; CHECK-LINUX64-P8-NEXT: cmpld r5, r4
64-
; CHECK-LINUX64-P8-NEXT: beq cr0, .LBB0_3
65-
; CHECK-LINUX64-P8-NEXT: .LBB0_2: # %res_block
66-
; CHECK-LINUX64-P8-NEXT: li r3, 1
67-
; CHECK-LINUX64-P8-NEXT: .LBB0_3: # %endblock
68-
; CHECK-LINUX64-P8-NEXT: cntlzw r3, r3
69-
; CHECK-LINUX64-P8-NEXT: srwi r3, r3, 5
34+
; CHECK-LINUX64-P8-NEXT: lxvd2x vs34, 0, r4
35+
; CHECK-LINUX64-P8-NEXT: lxvd2x vs35, 0, r3
36+
; CHECK-LINUX64-P8-NEXT: vcmpequb. v2, v3, v2
37+
; CHECK-LINUX64-P8-NEXT: mfocrf r3, 2
38+
; CHECK-LINUX64-P8-NEXT: rlwinm r3, r3, 25, 31, 31
7039
; CHECK-LINUX64-P8-NEXT: blr
7140
;
7241
; CHECK-LINUX64-P10-LABEL: cmpeq16:
7342
; CHECK-LINUX64-P10: # %bb.0: # %entry
74-
; CHECK-LINUX64-P10-NEXT: ld r5, 0(r3)
75-
; CHECK-LINUX64-P10-NEXT: ld r6, 0(r4)
76-
; CHECK-LINUX64-P10-NEXT: cmpld r5, r6
77-
; CHECK-LINUX64-P10-NEXT: bne cr0, .LBB0_2
78-
; CHECK-LINUX64-P10-NEXT: # %bb.1: # %loadbb1
79-
; CHECK-LINUX64-P10-NEXT: ld r5, 8(r3)
80-
; CHECK-LINUX64-P10-NEXT: ld r4, 8(r4)
81-
; CHECK-LINUX64-P10-NEXT: li r3, 0
82-
; CHECK-LINUX64-P10-NEXT: cmpld r5, r4
83-
; CHECK-LINUX64-P10-NEXT: beq cr0, .LBB0_3
84-
; CHECK-LINUX64-P10-NEXT: .LBB0_2: # %res_block
85-
; CHECK-LINUX64-P10-NEXT: li r3, 1
86-
; CHECK-LINUX64-P10-NEXT: .LBB0_3: # %endblock
87-
; CHECK-LINUX64-P10-NEXT: cntlzw r3, r3
88-
; CHECK-LINUX64-P10-NEXT: rlwinm r3, r3, 27, 31, 31
43+
; CHECK-LINUX64-P10-NEXT: lxv vs34, 0(r4)
44+
; CHECK-LINUX64-P10-NEXT: lxv vs35, 0(r3)
45+
; CHECK-LINUX64-P10-NEXT: vcmpequb. v2, v3, v2
46+
; CHECK-LINUX64-P10-NEXT: setbc r3, 4*cr6+lt
8947
; CHECK-LINUX64-P10-NEXT: blr
9048
entry:
9149
%bcmp = tail call i32 @bcmp(ptr noundef nonnull dereferenceable(16) %a, ptr noundef nonnull dereferenceable(16) %b, i64 16)

0 commit comments

Comments
 (0)