@@ -24867,6 +24867,122 @@ static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG) {
2486724867 return DAG.getNode(AArch64ISD::CSEL, DL, VT, L, R, CCValue, Cond);
2486824868}
2486924869
24870+ // Reassociate the true/false expressions of a CSEL instruction to obtain a
24871+ // common subexpression with the comparison instruction. For example, change
24872+ // (CSEL (ADD (ADD x y) -c) f LO (SUBS x c)) to
24873+ // (CSEL (ADD (SUBS x c) y) f LO (SUBS x c)) such that (SUBS x c) is a common
24874+ // subexpression.
24875+ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
24876+ SDValue SubsNode = N->getOperand(3);
24877+ if (SubsNode.getOpcode() != AArch64ISD::SUBS || !SubsNode.hasOneUse())
24878+ return SDValue();
24879+ auto *CmpOpConst = dyn_cast<ConstantSDNode>(SubsNode.getOperand(1));
24880+ if (!CmpOpConst)
24881+ return SDValue();
24882+
24883+ SDValue CmpOpOther = SubsNode.getOperand(0);
24884+ EVT VT = N->getValueType(0);
24885+
24886+ // Get the operand that can be reassociated with the SUBS instruction.
24887+ auto GetReassociationOp = [&](SDValue Op, APInt ExpectedConst) {
24888+ if (Op.getOpcode() != ISD::ADD)
24889+ return SDValue();
24890+ if (Op.getOperand(0).getOpcode() != ISD::ADD ||
24891+ !Op.getOperand(0).hasOneUse())
24892+ return SDValue();
24893+ SDValue X = Op.getOperand(0).getOperand(0);
24894+ SDValue Y = Op.getOperand(0).getOperand(1);
24895+ if (X != CmpOpOther)
24896+ std::swap(X, Y);
24897+ if (X != CmpOpOther)
24898+ return SDValue();
24899+ auto *AddOpConst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
24900+ if (!AddOpConst || AddOpConst->getAPIntValue() != ExpectedConst)
24901+ return SDValue();
24902+ return Y;
24903+ };
24904+
24905+ // Try the reassociation using the given constant and condition code.
24906+ auto Fold = [&](APInt NewCmpConst, AArch64CC::CondCode NewCC) {
24907+ APInt ExpectedConst = -NewCmpConst;
24908+ SDValue TReassocOp = GetReassociationOp(N->getOperand(0), ExpectedConst);
24909+ SDValue FReassocOp = GetReassociationOp(N->getOperand(1), ExpectedConst);
24910+ if (!TReassocOp && !FReassocOp)
24911+ return SDValue();
24912+
24913+ SDValue NewCmp = DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode),
24914+ DAG.getVTList(VT, MVT_CC), CmpOpOther,
24915+ DAG.getConstant(NewCmpConst, SDLoc(CmpOpConst),
24916+ CmpOpConst->getValueType(0)));
24917+
24918+ auto Reassociate = [&](SDValue ReassocOp, unsigned OpNum) {
24919+ if (!ReassocOp)
24920+ return N->getOperand(OpNum);
24921+ SDValue Res = DAG.getNode(ISD::ADD, SDLoc(N->getOperand(OpNum)), VT,
24922+ NewCmp.getValue(0), ReassocOp);
24923+ DAG.ReplaceAllUsesWith(N->getOperand(OpNum), Res);
24924+ return Res;
24925+ };
24926+
24927+ SDValue TValReassoc = Reassociate(TReassocOp, 0);
24928+ SDValue FValReassoc = Reassociate(FReassocOp, 1);
24929+ return DAG.getNode(AArch64ISD::CSEL, SDLoc(N), VT, TValReassoc, FValReassoc,
24930+ DAG.getConstant(NewCC, SDLoc(N->getOperand(2)), MVT_CC),
24931+ NewCmp.getValue(1));
24932+ };
24933+
24934+ auto CC = static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
24935+
24936+ // First, try to eliminate the compare instruction by searching for a
24937+ // subtraction with the same constant.
24938+ if (SDValue R = Fold(CmpOpConst->getAPIntValue(), CC))
24939+ return R;
24940+
24941+ if ((CC == AArch64CC::EQ || CC == AArch64CC::NE) && !CmpOpConst->isZero())
24942+ return SDValue();
24943+
24944+ // Next, search for a subtraction with a slightly different constant. By
24945+ // adjusting the condition code, we can still eliminate the compare
24946+ // instruction. Adjusting the constant is only valid if it does not result
24947+ // in signed/unsigned wrap for signed/unsigned comparisons, respectively.
24948+ // Since such comparisons are trivially true/false, we should not encounter
24949+ // them here but check for them nevertheless to be on the safe side.
24950+ auto CheckedFold = [&](bool Check, APInt NewCmpConst,
24951+ AArch64CC::CondCode NewCC) {
24952+ return Check ? Fold(NewCmpConst, NewCC) : SDValue();
24953+ };
24954+ switch (CC) {
24955+ case AArch64CC::EQ:
24956+ case AArch64CC::LS:
24957+ return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
24958+ CmpOpConst->getAPIntValue() + 1, AArch64CC::LO);
24959+ case AArch64CC::NE:
24960+ case AArch64CC::HI:
24961+ return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
24962+ CmpOpConst->getAPIntValue() + 1, AArch64CC::HS);
24963+ case AArch64CC::LO:
24964+ return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
24965+ CmpOpConst->getAPIntValue() - 1, AArch64CC::LS);
24966+ case AArch64CC::HS:
24967+ return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
24968+ CmpOpConst->getAPIntValue() - 1, AArch64CC::HI);
24969+ case AArch64CC::LT:
24970+ return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
24971+ CmpOpConst->getAPIntValue() - 1, AArch64CC::LE);
24972+ case AArch64CC::LE:
24973+ return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
24974+ CmpOpConst->getAPIntValue() + 1, AArch64CC::LT);
24975+ case AArch64CC::GT:
24976+ return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
24977+ CmpOpConst->getAPIntValue() + 1, AArch64CC::GE);
24978+ case AArch64CC::GE:
24979+ return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
24980+ CmpOpConst->getAPIntValue() - 1, AArch64CC::GT);
24981+ default:
24982+ return SDValue();
24983+ }
24984+ }
24985+
2487024986// Optimize CSEL instructions
2487124987static SDValue performCSELCombine(SDNode *N,
2487224988 TargetLowering::DAGCombinerInfo &DCI,
@@ -24878,6 +24994,11 @@ static SDValue performCSELCombine(SDNode *N,
2487824994 if (SDValue R = foldCSELOfCSEL(N, DAG))
2487924995 return R;
2488024996
24997+ // Try to reassociate the true/false expressions so that we can do CSE with
24998+ // a SUBS instruction used to perform the comparison.
24999+ if (SDValue R = reassociateCSELOperandsForCSE(N, DAG))
25000+ return R;
25001+
2488125002 // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
2488225003 // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
2488325004 if (SDValue Folded = foldCSELofCTTZ(N, DAG))
0 commit comments