Skip to content

Commit 26f1cb3

Browse files
committed
Port foldCSelOfCSel to ARM
There is only one degenerate case in fpclamptostat, and I don't know to fix without another fold or where.
1 parent d43030f commit 26f1cb3

14 files changed

+2540
-2768
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18371,9 +18371,75 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
1837118371
return SDValue();
1837218372
}
1837318373

18374+
static SDValue foldCMOVOfCMOV(SDNode *Op, SelectionDAG &DAG) {
18375+
// (cmov A, B, CC1, (cmov C, D, CC2, Flags))
18376+
// -> (cmov (cmov A, B, CC1, C), (cmov A, B, CC1, D), CC2, Flags)
18377+
SDValue L = Op->getOperand(0);
18378+
SDValue R = Op->getOperand(1);
18379+
ARMCC::CondCodes OpCC =
18380+
static_cast<ARMCC::CondCodes>(Op->getConstantOperandVal(2));
18381+
18382+
SDValue OpCmp = Op->getOperand(3);
18383+
if (OpCmp.getOpcode() != ARMISD::CMPZ && OpCmp.getOpcode() != ARMISD::CMP)
18384+
// Only looking at EQ and NE cases.
18385+
return SDValue();
18386+
18387+
SDValue CmpLHS = OpCmp.getOperand(0);
18388+
SDValue CmpRHS = OpCmp.getOperand(1);
18389+
18390+
if (CmpRHS.getOpcode() == ARMISD::CMOV)
18391+
std::swap(CmpLHS, CmpRHS);
18392+
else if (CmpLHS.getOpcode() != ARMISD::CMOV)
18393+
return SDValue();
18394+
18395+
SDValue X = CmpLHS->getOperand(0);
18396+
SDValue Y = CmpLHS->getOperand(1);
18397+
if (!isa<ConstantSDNode>(X) || !isa<ConstantSDNode>(Y) || X == Y)
18398+
return SDValue();
18399+
18400+
ConstantSDNode *CX = cast<ConstantSDNode>(X);
18401+
ConstantSDNode *CY = cast<ConstantSDNode>(Y);
18402+
if (CX->getAPIntValue() == CY->getAPIntValue())
18403+
return SDValue();
18404+
18405+
ARMCC::CondCodes CC =
18406+
static_cast<ARMCC::CondCodes>(CmpLHS->getConstantOperandVal(2));
18407+
SDValue CondFlags = CmpLHS->getOperand(3);
18408+
18409+
if (CmpRHS == Y) {
18410+
// If the compare uses the second constant, flip the condition.
18411+
// VERIFY: getOppositeCondition does the same flip as AArch64's
18412+
// getInvertedCondCode.
18413+
CC = ARMCC::getOppositeCondition(CC);
18414+
} else if (CmpRHS != X) {
18415+
return SDValue();
18416+
}
18417+
18418+
if (OpCC == ARMCC::NE) {
18419+
// Outer NE inverts the sense.
18420+
CC = ARMCC::getOppositeCondition(CC);
18421+
} else if (OpCC != ARMCC::EQ) {
18422+
return SDValue();
18423+
}
18424+
18425+
SDLoc DL(Op);
18426+
EVT VT = Op->getValueType(0);
18427+
// CMOV takes (falseVal, trueVal, CC, Flags). To match (CSEL L,R,CC), pass
18428+
// (R,L).
18429+
SDValue CCValue = DAG.getConstant(CC, DL, FlagsVT);
18430+
return DAG.getNode(ARMISD::CMOV, DL, VT, R, L, CCValue, CondFlags);
18431+
}
18432+
1837418433
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
1837518434
SDValue
1837618435
ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
18436+
// CMOV x, x, cc -> x
18437+
if (N->getOperand(0) == N->getOperand(1))
18438+
return N->getOperand(0);
18439+
18440+
if (SDValue R = foldCMOVOfCMOV(N, DAG))
18441+
return R;
18442+
1837718443
SDValue Cmp = N->getOperand(3);
1837818444
if (Cmp.getOpcode() != ARMISD::CMPZ)
1837918445
// Only looking at EQ and NE cases.

llvm/test/CodeGen/ARM/addsubo-legalization.ll

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,17 @@ define <2 x i1> @uaddo(ptr %ptr, ptr %ptr2) {
2020
; CHECK-NEXT: vmov r4, r5, d17
2121
; CHECK-NEXT: subs.w r3, lr, r3
2222
; CHECK-NEXT: sbcs.w r2, r12, r2
23+
; CHECK-NEXT: mov.w r3, #-1
2324
; CHECK-NEXT: mov.w r2, #0
2425
; CHECK-NEXT: it lo
2526
; CHECK-NEXT: movlo r2, #1
26-
; CHECK-NEXT: cmp r2, #0
27-
; CHECK-NEXT: it ne
28-
; CHECK-NEXT: movne.w r2, #-1
29-
; CHECK-NEXT: subs r3, r4, r6
30-
; CHECK-NEXT: sbcs.w r3, r5, r7
3127
; CHECK-NEXT: it lo
28+
; CHECK-NEXT: movlo r2, r3
29+
; CHECK-NEXT: subs r6, r4, r6
30+
; CHECK-NEXT: sbcs.w r7, r5, r7
31+
; CHECK-NEXT: itt lo
3232
; CHECK-NEXT: movlo r1, #1
33-
; CHECK-NEXT: cmp r1, #0
34-
; CHECK-NEXT: it ne
35-
; CHECK-NEXT: movne.w r1, #-1
33+
; CHECK-NEXT: movlo r1, r3
3634
; CHECK-NEXT: vst1.64 {d16, d17}, [r0]
3735
; CHECK-NEXT: mov r0, r2
3836
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
@@ -59,19 +57,17 @@ define <2 x i1> @usubo(ptr %ptr, ptr %ptr2) {
5957
; CHECK-NEXT: vmov r6, r7, d17
6058
; CHECK-NEXT: subs.w r3, lr, r3
6159
; CHECK-NEXT: sbcs.w r2, r12, r2
60+
; CHECK-NEXT: mov.w r3, #-1
6261
; CHECK-NEXT: mov.w r2, #0
6362
; CHECK-NEXT: it lo
6463
; CHECK-NEXT: movlo r2, #1
65-
; CHECK-NEXT: cmp r2, #0
66-
; CHECK-NEXT: it ne
67-
; CHECK-NEXT: movne.w r2, #-1
68-
; CHECK-NEXT: subs r3, r4, r6
69-
; CHECK-NEXT: sbcs.w r3, r5, r7
7064
; CHECK-NEXT: it lo
65+
; CHECK-NEXT: movlo r2, r3
66+
; CHECK-NEXT: subs r6, r4, r6
67+
; CHECK-NEXT: sbcs.w r7, r5, r7
68+
; CHECK-NEXT: itt lo
7169
; CHECK-NEXT: movlo r1, #1
72-
; CHECK-NEXT: cmp r1, #0
73-
; CHECK-NEXT: it ne
74-
; CHECK-NEXT: movne.w r1, #-1
70+
; CHECK-NEXT: movlo r1, r3
7571
; CHECK-NEXT: vst1.64 {d16, d17}, [r0]
7672
; CHECK-NEXT: mov r0, r2
7773
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}

0 commit comments

Comments
 (0)