@@ -3409,6 +3409,22 @@ static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG) {
34093409 (isSignedIntSetCC(CC) && isSafeSignedCMN(Op, DAG)));
34103410}
34113411
3412+ static bool canBeCommutedToCMN(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3413+ SelectionDAG &DAG) {
3414+
3415+ // Commuting cmn is based on the fact that adds is communative.
3416+ // cmn x1, x2 is the same as cmn x2, x1. In theory, this should work, but
3417+ // we have to realize that this is doing now: cmp x2, -x1, which means -x1 has
3418+ // to not be 0 if unsigned, or INT_MIN if signed. However, a > -b and -b > a
3419+ // works assuming no wrap.
3420+
3421+ return isCMN(LHS, CC, DAG) &&
3422+ (isIntEqualitySetCC(CC) ||
3423+ (isUnsignedIntSetCC(CC) && DAG.isKnownNeverZero(RHS)) ||
3424+ (isSignedIntSetCC(CC) &&
3425+ !DAG.computeKnownBits(RHS).getSignedMinValue().isMinSignedValue()));
3426+ }
3427+
34123428static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL,
34133429 SelectionDAG &DAG, SDValue Chain,
34143430 bool IsSignaling) {
@@ -3429,8 +3445,9 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL,
34293445 return DAG.getNode(Opcode, DL, {MVT::i32, MVT::Other}, {Chain, LHS, RHS});
34303446}
34313447
3432- static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3433- const SDLoc &DL, SelectionDAG &DAG) {
3448+ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode &CC,
3449+ AArch64CC::CondCode &OutCC, const SDLoc &DL,
3450+ SelectionDAG &DAG) {
34343451 EVT VT = LHS.getValueType();
34353452 const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
34363453
@@ -3453,12 +3470,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
34533470 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
34543471 Opcode = AArch64ISD::ADDS;
34553472 RHS = RHS.getOperand(1);
3456- } else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3457- isIntEqualitySetCC(CC)) {
3473+ } else if (canBeCommutedToCMN(LHS, RHS, CC, DAG)) {
34583474 // As we are looking for EQ/NE compares, the operands can be commuted ; can
34593475 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
34603476 Opcode = AArch64ISD::ADDS;
34613477 LHS = LHS.getOperand(1);
3478+ CC = ISD::getSetCCSwappedOperands(CC);
34623479 } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
34633480 if (LHS.getOpcode() == ISD::AND) {
34643481 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
@@ -3469,13 +3486,16 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
34693486 LHS.getOperand(0), LHS.getOperand(1));
34703487 // Replace all users of (and X, Y) with newly generated (ands X, Y)
34713488 DAG.ReplaceAllUsesWith(LHS, ANDSNode);
3489+ OutCC = changeIntCCToAArch64CC(CC);
34723490 return ANDSNode.getValue(1);
34733491 } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
34743492 // Use result of ANDS
3493+ OutCC = changeIntCCToAArch64CC(CC);
34753494 return LHS.getValue(1);
34763495 }
34773496 }
34783497
3498+ OutCC = changeIntCCToAArch64CC(CC);
34793499 return DAG.getNode(Opcode, DL, DAG.getVTList(VT, MVT_CC), LHS, RHS)
34803500 .getValue(1);
34813501}
@@ -3558,8 +3578,7 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
35583578 } else if (isCMN(RHS, CC, DAG)) {
35593579 Opcode = AArch64ISD::CCMN;
35603580 RHS = RHS.getOperand(1);
3561- } else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3562- isIntEqualitySetCC(CC)) {
3581+ } else if (canBeCommutedToCMN(LHS, RHS, CC, DAG)) {
35633582 // As we are looking for EQ/NE compares, the operands can be commuted ; can
35643583 // we combine a (CCMP (sub 0, op1), op2) into a CCMN instruction ?
35653584 Opcode = AArch64ISD::CCMN;
@@ -3676,7 +3695,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
36763695 if (ExtraCC != AArch64CC::AL) {
36773696 SDValue ExtraCmp;
36783697 if (!CCOp.getNode())
3679- ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
3698+ ExtraCmp = emitComparison(LHS, RHS, CC, OutCC, DL, DAG);
36803699 else
36813700 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
36823701 ExtraCC, DL, DAG);
@@ -3687,7 +3706,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
36873706
36883707 // Produce a normal comparison if we are first in the chain
36893708 if (!CCOp)
3690- return emitComparison(LHS, RHS, CC, DL, DAG);
3709+ return emitComparison(LHS, RHS, CC, OutCC, DL, DAG);
36913710 // Otherwise produce a ccmp.
36923711 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
36933712 DAG);
@@ -3909,6 +3928,17 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
39093928 SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
39103929 SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;
39113930
3931+ // Do not count twice. If the CMN can be commuted, hence OR.
3932+ // TODO: Is it possible for us to choose between two CMN? If so,
3933+ // this should be changed to an add. This is an or because as far as I can
3934+ // tell, emitComparison only changes the subs to an adds and not back, so
3935+ // this reflects the fact There can be at most one removal of a neg
3936+ // instruction.
3937+
3938+ // So, do not count twice if the CMN can be commuted, hence OR.
3939+ LHSIsCMN |= canBeCommutedToCMN(RHS, LHS, CC, DAG);
3940+ RHSIsCMN |= canBeCommutedToCMN(LHS, RHS, CC, DAG);
3941+
39123942 if (getCmpOperandFoldingProfit(TheLHS) + (LHSIsCMN ? 1 : 0) >
39133943 getCmpOperandFoldingProfit(TheRHS) + (RHSIsCMN ? 1 : 0)) {
39143944 std::swap(LHS, RHS);
@@ -3948,8 +3978,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
39483978 DAG.getValueType(MVT::i16));
39493979 Cmp = emitComparison(
39503980 SExt, DAG.getSignedConstant(ValueofRHS, DL, RHS.getValueType()), CC,
3951- DL, DAG);
3952- AArch64CC = changeIntCCToAArch64CC(CC);
3981+ AArch64CC, DL, DAG);
39533982 }
39543983 }
39553984
@@ -3962,8 +3991,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
39623991 }
39633992
39643993 if (!Cmp) {
3965- Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
3966- AArch64CC = changeIntCCToAArch64CC(CC);
3994+ Cmp = emitComparison(LHS, RHS, CC, AArch64CC, DL, DAG);
39673995 }
39683996 AArch64cc = DAG.getConstant(AArch64CC, DL, MVT_CC);
39693997 return Cmp;
@@ -10548,7 +10576,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
1054810576
1054910577 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
1055010578 // clean. Some of them require two branches to implement.
10551- SDValue Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
10579+ AArch64CC::CondCode OutCC;
10580+ SDValue Cmp = emitComparison(LHS, RHS, CC, OutCC, DL, DAG);
1055210581 AArch64CC::CondCode CC1, CC2;
1055310582 changeFPCCToAArch64CC(CC, CC1, CC2);
1055410583 SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32);
@@ -11009,7 +11038,7 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1100911038 if (IsStrict)
1101011039 Cmp = emitStrictFPComparison(LHS, RHS, DL, DAG, Chain, IsSignaling);
1101111040 else
11012- Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
11041+ Cmp = emitComparison(LHS, RHS, CC, OutCC, DL, DAG);
1101311042
1101411043 AArch64CC::CondCode CC1, CC2;
1101511044 changeFPCCToAArch64CC(CC, CC1, CC2);
@@ -11392,7 +11421,8 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
1139211421 return VectorCmp;
1139311422 }
1139411423
11395- SDValue Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
11424+ AArch64CC::CondCode OutCC;
11425+ SDValue Cmp = emitComparison(LHS, RHS, CC, OutCC, DL, DAG);
1139611426
1139711427 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
1139811428 // clean. Some of them require two CSELs to implement.
0 commit comments