@@ -24838,6 +24838,122 @@ static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG) {
2483824838 return DAG.getNode(AArch64ISD::CSEL, DL, VT, L, R, CCValue, Cond);
2483924839}
2484024840
24841+ // Reassociate the true/false expressions of a CSEL instruction to obtain a
24842+ // common subexpression with the comparison instruction. For example, change
24843+ // (CSEL (ADD (ADD x y) -c) f LO (SUBS x c)) to
24844+ // (CSEL (ADD (SUBS x c) y) f LO (SUBS x c)) such that (SUBS x c) is a common
24845+ // subexpression.
24846+ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
24847+ SDValue SubsNode = N->getOperand(3);
24848+ if (SubsNode.getOpcode() != AArch64ISD::SUBS || !SubsNode.hasOneUse())
24849+ return SDValue();
24850+ auto *CmpOpConst = dyn_cast<ConstantSDNode>(SubsNode.getOperand(1));
24851+ if (!CmpOpConst)
24852+ return SDValue();
24853+
24854+ SDValue CmpOpOther = SubsNode.getOperand(0);
24855+ EVT VT = N->getValueType(0);
24856+
24857+ // Get the operand that can be reassociated with the SUBS instruction.
24858+ auto GetReassociationOp = [&](SDValue Op, APInt ExpectedConst) {
24859+ if (Op.getOpcode() != ISD::ADD)
24860+ return SDValue();
24861+ if (Op.getOperand(0).getOpcode() != ISD::ADD ||
24862+ !Op.getOperand(0).hasOneUse())
24863+ return SDValue();
24864+ SDValue X = Op.getOperand(0).getOperand(0);
24865+ SDValue Y = Op.getOperand(0).getOperand(1);
24866+ if (X != CmpOpOther)
24867+ std::swap(X, Y);
24868+ if (X != CmpOpOther)
24869+ return SDValue();
24870+ auto *AddOpConst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
24871+ if (!AddOpConst || AddOpConst->getAPIntValue() != ExpectedConst)
24872+ return SDValue();
24873+ return Y;
24874+ };
24875+
24876+ // Try the reassociation using the given constant and condition code.
24877+ auto Fold = [&](APInt NewCmpConst, AArch64CC::CondCode NewCC) {
24878+ APInt ExpectedConst = -NewCmpConst;
24879+ SDValue TReassocOp = GetReassociationOp(N->getOperand(0), ExpectedConst);
24880+ SDValue FReassocOp = GetReassociationOp(N->getOperand(1), ExpectedConst);
24881+ if (!TReassocOp && !FReassocOp)
24882+ return SDValue();
24883+
24884+ SDValue NewCmp = DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode),
24885+ DAG.getVTList(VT, MVT_CC), CmpOpOther,
24886+ DAG.getConstant(NewCmpConst, SDLoc(CmpOpConst),
24887+ CmpOpConst->getValueType(0)));
24888+
24889+ auto Reassociate = [&](SDValue ReassocOp, unsigned OpNum) {
24890+ if (!ReassocOp)
24891+ return N->getOperand(OpNum);
24892+ SDValue Res = DAG.getNode(ISD::ADD, SDLoc(N->getOperand(OpNum)), VT,
24893+ NewCmp.getValue(0), ReassocOp);
24894+ DAG.ReplaceAllUsesWith(N->getOperand(OpNum), Res);
24895+ return Res;
24896+ };
24897+
24898+ SDValue TValReassoc = Reassociate(TReassocOp, 0);
24899+ SDValue FValReassoc = Reassociate(FReassocOp, 1);
24900+ return DAG.getNode(AArch64ISD::CSEL, SDLoc(N), VT, TValReassoc, FValReassoc,
24901+ DAG.getConstant(NewCC, SDLoc(N->getOperand(2)), MVT_CC),
24902+ NewCmp.getValue(1));
24903+ };
24904+
24905+ auto CC = static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
24906+
24907+ // First, try to eliminate the compare instruction by searching for a
24908+ // subtraction with the same constant.
24909+ if (SDValue R = Fold(CmpOpConst->getAPIntValue(), CC))
24910+ return R;
24911+
24912+ if ((CC == AArch64CC::EQ || CC == AArch64CC::NE) && !CmpOpConst->isZero())
24913+ return SDValue();
24914+
24915+ // Next, search for a subtraction with a slightly different constant. By
24916+ // adjusting the condition code, we can still eliminate the compare
24917+ // instruction. Adjusting the constant is only valid if it does not result
24918+ // in signed/unsigned wrap for signed/unsigned comparisons, respectively.
24919+ // Since such comparisons are trivially true/false, we should not encounter
24920+ // them here but check for them nevertheless to be on the safe side.
24921+ auto CheckedFold = [&](bool Check, APInt NewCmpConst,
24922+ AArch64CC::CondCode NewCC) {
24923+ return Check ? Fold(NewCmpConst, NewCC) : SDValue();
24924+ };
24925+ switch (CC) {
24926+ case AArch64CC::EQ:
24927+ case AArch64CC::LS:
24928+ return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
24929+ CmpOpConst->getAPIntValue() + 1, AArch64CC::LO);
24930+ case AArch64CC::NE:
24931+ case AArch64CC::HI:
24932+ return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
24933+ CmpOpConst->getAPIntValue() + 1, AArch64CC::HS);
24934+ case AArch64CC::LO:
24935+ return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
24936+ CmpOpConst->getAPIntValue() - 1, AArch64CC::LS);
24937+ case AArch64CC::HS:
24938+ return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
24939+ CmpOpConst->getAPIntValue() - 1, AArch64CC::HI);
24940+ case AArch64CC::LT:
24941+ return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
24942+ CmpOpConst->getAPIntValue() - 1, AArch64CC::LE);
24943+ case AArch64CC::LE:
24944+ return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
24945+ CmpOpConst->getAPIntValue() + 1, AArch64CC::LT);
24946+ case AArch64CC::GT:
24947+ return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
24948+ CmpOpConst->getAPIntValue() + 1, AArch64CC::GE);
24949+ case AArch64CC::GE:
24950+ return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
24951+ CmpOpConst->getAPIntValue() - 1, AArch64CC::GT);
24952+ default:
24953+ return SDValue();
24954+ }
24955+ }
24956+
2484124957// Optimize CSEL instructions
2484224958static SDValue performCSELCombine(SDNode *N,
2484324959 TargetLowering::DAGCombinerInfo &DCI,
@@ -24849,6 +24965,11 @@ static SDValue performCSELCombine(SDNode *N,
2484924965 if (SDValue R = foldCSELOfCSEL(N, DAG))
2485024966 return R;
2485124967
24968+ // Try to reassociate the true/false expressions so that we can do CSE with
24969+ // a SUBS instruction used to perform the comparison.
24970+ if (SDValue R = reassociateCSELOperandsForCSE(N, DAG))
24971+ return R;
24972+
2485224973 // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
2485324974 // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
2485424975 if (SDValue Folded = foldCSELofCTTZ(N, DAG))
0 commit comments