@@ -53441,6 +53441,69 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
5344153441 truncateAVX512SetCCNoBWI(VT, OpVT, LHS, RHS, CC, DL, DAG, Subtarget))
5344253442 return R;
5344353443
53444+ // In the middle end transforms:
53445+ // `(or (icmp eq X, C), (icmp eq X, C+1))`
53446+ // -> `(icmp ult (add x, -C), 2)`
53447+ // Likewise inverted cases with `ugt`.
53448+ //
53449+ // Since x86, pre avx512, doesn't have unsigned vector compares, this results
53450+ // in worse codegen. So, undo the middle-end transform and go back to `(or
53451+ // (icmp eq), (icmp eq))` form.
53452+ // Also skip AVX1 with ymm vectors, as the umin approach combines better than
53453+ // the xmm approach.
53454+ //
53455+ // NB: We don't handle the similiar simplication of `(and (icmp ne), (icmp
53456+ // ne))` as it doesn't end up instruction positive.
53457+ // TODO: We might want to do this for avx512 as well if we `sext` the result.
53458+ if (VT.isVector() && OpVT.isVector() && OpVT.isInteger() &&
53459+ ISD::isUnsignedIntSetCC(CC) && LHS.getOpcode() == ISD::ADD &&
53460+ !Subtarget.hasAVX512() &&
53461+ (OpVT.getSizeInBits() <= 128 || !Subtarget.hasAVX() ||
53462+ Subtarget.hasAVX2()) &&
53463+ LHS.hasOneUse()) {
53464+
53465+ APInt CmpC;
53466+ SDValue AddC = LHS.getOperand(1);
53467+ if (ISD::isConstantSplatVector(RHS.getNode(), CmpC) &&
53468+ DAG.isConstantIntBuildVectorOrConstantInt(AddC)) {
53469+ // See which form we have depending on the constant/condition.
53470+ SDValue C0 = SDValue();
53471+ SDValue C1 = SDValue();
53472+
53473+ // If we had `(add x, -1)` and can lower with `umin`, don't transform as
53474+ // we will end up generating an additional constant. Keeping in the
53475+ // current form has a slight latency cost, but it probably worth saving a
53476+ // constant.
53477+ if (ISD::isConstantSplatVectorAllOnes(AddC.getNode()) &&
53478+ DAG.getTargetLoweringInfo().isOperationLegal(ISD::UMIN, OpVT)) {
53479+ // Pass
53480+ }
53481+ // Normal Cases
53482+ else if ((CC == ISD::SETULT && CmpC == 2) ||
53483+ (CC == ISD::SETULE && CmpC == 1)) {
53484+ // These will constant fold.
53485+ C0 = DAG.getNegative(AddC, DL, OpVT);
53486+ C1 = DAG.getNode(ISD::SUB, DL, OpVT, C0,
53487+ DAG.getAllOnesConstant(DL, OpVT));
53488+ }
53489+ // Inverted Cases
53490+ else if ((CC == ISD::SETUGT && (-CmpC) == 3) ||
53491+ (CC == ISD::SETUGE && (-CmpC) == 2)) {
53492+ // These will constant fold.
53493+ C0 = DAG.getNOT(DL, AddC, OpVT);
53494+ C1 = DAG.getNode(ISD::ADD, DL, OpVT, C0,
53495+ DAG.getAllOnesConstant(DL, OpVT));
53496+ }
53497+ if (C0 && C1) {
53498+ SDValue NewLHS =
53499+ DAG.getSetCC(DL, VT, LHS.getOperand(0), C0, ISD::SETEQ);
53500+ SDValue NewRHS =
53501+ DAG.getSetCC(DL, VT, LHS.getOperand(0), C1, ISD::SETEQ);
53502+ return DAG.getNode(ISD::OR, DL, VT, NewLHS, NewRHS);
53503+ }
53504+ }
53505+ }
53506+
5344453507 // For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early
5344553508 // to avoid scalarization via legalization because v4i32 is not a legal type.
5344653509 if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 &&
0 commit comments