Skip to content

Commit 8396443

Browse files
committed
[AArch64] Allow commuting the cmn if both sides of the comparison are safe to be negated
a > -b means -a > b, if -a and -b are not INT_MIN in the signed case, or 0 in the unsigned case. Because adds is communative, cmn w0, w1 and cmn w1, w0 do the same thing. Which means that if the other side can be cmn, we can still do the cmn, as it doesn't matter! We have to check both sides for the transformation to still hold the syntax meaning of what we meant though: for -a > b, -b > a holds but -a and -b have to not be 0 if unsigned, and 0 if signed. Finally, counter-act this when judging whether to swap the operands for a cmn https://alive2.llvm.org/ce/z/oQXEmh
1 parent 303bc0d commit 8396443

File tree

2 files changed

+49
-19
lines changed

2 files changed

+49
-19
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3409,6 +3409,22 @@ static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG) {
34093409
(isSignedIntSetCC(CC) && isSafeSignedCMN(Op, DAG)));
34103410
}
34113411

3412+
static bool canBeCommutedToCMN(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3413+
SelectionDAG &DAG) {
3414+
3415+
// Commuting cmn is based on the fact that adds is communative.
3416+
// cmn x1, x2 is the same as cmn x2, x1. In theory, this should work, but
3417+
// we have to realize that this is doing now: cmp x2, -x1, which means -x1 has
3418+
// to not be 0 if unsigned, or INT_MIN if signed. However, a > -b and -b > a
3419+
// works assuming no wrap.
3420+
3421+
return isCMN(LHS, CC, DAG) &&
3422+
(isIntEqualitySetCC(CC) ||
3423+
(isUnsignedIntSetCC(CC) && DAG.isKnownNeverZero(RHS)) ||
3424+
(isSignedIntSetCC(CC) &&
3425+
!DAG.computeKnownBits(RHS).getSignedMinValue().isMinSignedValue()));
3426+
}
3427+
34123428
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL,
34133429
SelectionDAG &DAG, SDValue Chain,
34143430
bool IsSignaling) {
@@ -3429,8 +3445,9 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL,
34293445
return DAG.getNode(Opcode, DL, {MVT::i32, MVT::Other}, {Chain, LHS, RHS});
34303446
}
34313447

3432-
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3433-
const SDLoc &DL, SelectionDAG &DAG) {
3448+
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode &CC,
3449+
AArch64CC::CondCode &OutCC, const SDLoc &DL,
3450+
SelectionDAG &DAG) {
34343451
EVT VT = LHS.getValueType();
34353452
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
34363453

@@ -3453,12 +3470,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
34533470
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
34543471
Opcode = AArch64ISD::ADDS;
34553472
RHS = RHS.getOperand(1);
3456-
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3457-
isIntEqualitySetCC(CC)) {
3473+
} else if (canBeCommutedToCMN(LHS, RHS, CC, DAG)) {
34583474
// As we are looking for EQ/NE compares, the operands can be commuted ; can
34593475
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
34603476
Opcode = AArch64ISD::ADDS;
34613477
LHS = LHS.getOperand(1);
3478+
CC = ISD::getSetCCSwappedOperands(CC);
34623479
} else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
34633480
if (LHS.getOpcode() == ISD::AND) {
34643481
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
@@ -3469,13 +3486,16 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
34693486
LHS.getOperand(0), LHS.getOperand(1));
34703487
// Replace all users of (and X, Y) with newly generated (ands X, Y)
34713488
DAG.ReplaceAllUsesWith(LHS, ANDSNode);
3489+
OutCC = changeIntCCToAArch64CC(CC);
34723490
return ANDSNode.getValue(1);
34733491
} else if (LHS.getOpcode() == AArch64ISD::ANDS) {
34743492
// Use result of ANDS
3493+
OutCC = changeIntCCToAArch64CC(CC);
34753494
return LHS.getValue(1);
34763495
}
34773496
}
34783497

3498+
OutCC = changeIntCCToAArch64CC(CC);
34793499
return DAG.getNode(Opcode, DL, DAG.getVTList(VT, MVT_CC), LHS, RHS)
34803500
.getValue(1);
34813501
}
@@ -3558,8 +3578,7 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
35583578
} else if (isCMN(RHS, CC, DAG)) {
35593579
Opcode = AArch64ISD::CCMN;
35603580
RHS = RHS.getOperand(1);
3561-
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3562-
isIntEqualitySetCC(CC)) {
3581+
} else if (canBeCommutedToCMN(LHS, RHS, CC, DAG)) {
35633582
// As we are looking for EQ/NE compares, the operands can be commuted ; can
35643583
// we combine a (CCMP (sub 0, op1), op2) into a CCMN instruction ?
35653584
Opcode = AArch64ISD::CCMN;
@@ -3676,7 +3695,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
36763695
if (ExtraCC != AArch64CC::AL) {
36773696
SDValue ExtraCmp;
36783697
if (!CCOp.getNode())
3679-
ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
3698+
ExtraCmp = emitComparison(LHS, RHS, CC, OutCC, DL, DAG);
36803699
else
36813700
ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
36823701
ExtraCC, DL, DAG);
@@ -3687,7 +3706,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
36873706

36883707
// Produce a normal comparison if we are first in the chain
36893708
if (!CCOp)
3690-
return emitComparison(LHS, RHS, CC, DL, DAG);
3709+
return emitComparison(LHS, RHS, CC, OutCC, DL, DAG);
36913710
// Otherwise produce a ccmp.
36923711
return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
36933712
DAG);
@@ -3909,6 +3928,17 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
39093928
SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
39103929
SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;
39113930

3931+
// Do not count twice. If the CMN can be commuted, hence OR.
3932+
// TODO: Is it possible for us to choose between two CMN? If so,
3933+
// this should be changed to an add. This is an or because as far as I can
3934+
// tell, emitComparison only changes the subs to an adds and not back, so
3935+
// this reflects the fact There can be at most one removal of a neg
3936+
// instruction.
3937+
3938+
// So, do not count twice if the CMN can be commuted, hence OR.
3939+
LHSIsCMN |= canBeCommutedToCMN(RHS, LHS, CC, DAG);
3940+
RHSIsCMN |= canBeCommutedToCMN(LHS, RHS, CC, DAG);
3941+
39123942
if (getCmpOperandFoldingProfit(TheLHS) + (LHSIsCMN ? 1 : 0) >
39133943
getCmpOperandFoldingProfit(TheRHS) + (RHSIsCMN ? 1 : 0)) {
39143944
std::swap(LHS, RHS);
@@ -3948,8 +3978,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
39483978
DAG.getValueType(MVT::i16));
39493979
Cmp = emitComparison(
39503980
SExt, DAG.getSignedConstant(ValueofRHS, DL, RHS.getValueType()), CC,
3951-
DL, DAG);
3952-
AArch64CC = changeIntCCToAArch64CC(CC);
3981+
AArch64CC, DL, DAG);
39533982
}
39543983
}
39553984

@@ -3962,8 +3991,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
39623991
}
39633992

39643993
if (!Cmp) {
3965-
Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
3966-
AArch64CC = changeIntCCToAArch64CC(CC);
3994+
Cmp = emitComparison(LHS, RHS, CC, AArch64CC, DL, DAG);
39673995
}
39683996
AArch64cc = DAG.getConstant(AArch64CC, DL, MVT_CC);
39693997
return Cmp;
@@ -10548,7 +10576,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
1054810576

1054910577
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
1055010578
// clean. Some of them require two branches to implement.
10551-
SDValue Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
10579+
AArch64CC::CondCode OutCC;
10580+
SDValue Cmp = emitComparison(LHS, RHS, CC, OutCC, DL, DAG);
1055210581
AArch64CC::CondCode CC1, CC2;
1055310582
changeFPCCToAArch64CC(CC, CC1, CC2);
1055410583
SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32);
@@ -11009,7 +11038,7 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1100911038
if (IsStrict)
1101011039
Cmp = emitStrictFPComparison(LHS, RHS, DL, DAG, Chain, IsSignaling);
1101111040
else
11012-
Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
11041+
Cmp = emitComparison(LHS, RHS, CC, OutCC, DL, DAG);
1101311042

1101411043
AArch64CC::CondCode CC1, CC2;
1101511044
changeFPCCToAArch64CC(CC, CC1, CC2);
@@ -11392,7 +11421,8 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
1139211421
return VectorCmp;
1139311422
}
1139411423

11395-
SDValue Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
11424+
AArch64CC::CondCode OutCC;
11425+
SDValue Cmp = emitComparison(LHS, RHS, CC, OutCC, DL, DAG);
1139611426

1139711427
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
1139811428
// clean. Some of them require two CSELs to implement.

llvm/test/CodeGen/AArch64/cmp-to-cmn.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ target triple = "arm64"
66
define i1 @test_EQ_IllEbT(i64 %a, i64 %b) {
77
; CHECK-LABEL: test_EQ_IllEbT:
88
; CHECK: // %bb.0: // %entry
9-
; CHECK-NEXT: cmn x0, x1
9+
; CHECK-NEXT: cmn x1, x0
1010
; CHECK-NEXT: cset w0, eq
1111
; CHECK-NEXT: ret
1212
entry:
@@ -70,7 +70,7 @@ entry:
7070
define i1 @test_EQ_IiiEbT(i32 %a, i32 %b) {
7171
; CHECK-LABEL: test_EQ_IiiEbT:
7272
; CHECK: // %bb.0: // %entry
73-
; CHECK-NEXT: cmn w0, w1
73+
; CHECK-NEXT: cmn w1, w0
7474
; CHECK-NEXT: cset w0, eq
7575
; CHECK-NEXT: ret
7676
entry:
@@ -220,7 +220,7 @@ entry:
220220
define i1 @test_NE_IllEbT(i64 %a, i64 %b) {
221221
; CHECK-LABEL: test_NE_IllEbT:
222222
; CHECK: // %bb.0: // %entry
223-
; CHECK-NEXT: cmn x0, x1
223+
; CHECK-NEXT: cmn x1, x0
224224
; CHECK-NEXT: cset w0, ne
225225
; CHECK-NEXT: ret
226226
entry:
@@ -284,7 +284,7 @@ entry:
284284
define i1 @test_NE_IiiEbT(i32 %a, i32 %b) {
285285
; CHECK-LABEL: test_NE_IiiEbT:
286286
; CHECK: // %bb.0: // %entry
287-
; CHECK-NEXT: cmn w0, w1
287+
; CHECK-NEXT: cmn w1, w0
288288
; CHECK-NEXT: cset w0, ne
289289
; CHECK-NEXT: ret
290290
entry:

0 commit comments

Comments
 (0)