From 1e8ef2e14f700ce33d7a12e955ef66a6f6fb3c80 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 26 Jun 2025 17:55:18 +0100 Subject: [PATCH 1/3] [LV] Add support for cmp reductions with decreasing IVs using SMin. Similar to FindLastIV, add FindFirstIV to support select (icmp(), x, y) reductions where one of x or y is a decreasing induction. This is done via a new recurrence kind FindFirstIVSMin, which selects the first value from the reduction vector using smin instead of the last value (FindLastIV). It uses signed max as sentinel value. The --- llvm/include/llvm/Analysis/IVDescriptors.h | 42 +- llvm/lib/Analysis/IVDescriptors.cpp | 68 +- llvm/lib/Transforms/Utils/LoopUtils.cpp | 13 +- .../Transforms/Vectorize/LoopVectorize.cpp | 62 +- .../Transforms/Vectorize/SLPVectorizer.cpp | 3 + llvm/lib/Transforms/Vectorize/VPlan.h | 2 +- .../Transforms/Vectorize/VPlanAnalysis.cpp | 2 +- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 32 +- llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 2 +- .../LoopVectorize/iv-select-cmp-decreasing.ll | 1060 +++++++++++++++-- .../vplan-printing-reductions.ll | 2 +- 11 files changed, 1140 insertions(+), 148 deletions(-) diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 463249461483f..310d05b6f04af 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -54,6 +54,9 @@ enum class RecurKind { FMulAdd, ///< Sum of float products with llvm.fmuladd(a * b + sum). AnyOf, ///< AnyOf reduction with select(cmp(),x,y) where one of (x,y) is ///< loop invariant, and both x and y are integer type. + FindFirstIVSMin, /// FindFirst reduction with select(icmp(),x,y) where one of + ///< (x,y) is a decreasing loop induction, and both x and y + ///< are integer type, producing a SMin reduction. FindLastIVSMax, ///< FindLast reduction with select(cmp(),x,y) where one of ///< (x,y) is increasing loop induction, and both x and y ///< are integer type, producing a SMax reduction. @@ -165,13 +168,13 @@ class RecurrenceDescriptor { /// Returns a struct describing whether the instruction is either a /// Select(ICmp(A, B), X, Y), or /// Select(FCmp(A, B), X, Y) - /// where one of (X, Y) is an increasing loop induction variable, and the - /// other is a PHI value. + /// where one of (X, Y) is an increasing (FindLast) or decreasing (FindFirst) + /// loop induction variable, and the other is a PHI value. // TODO: Support non-monotonic variable. FindLast does not need be restricted // to increasing loop induction variables. - LLVM_ABI static InstDesc isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi, - Instruction *I, - ScalarEvolution &SE); + LLVM_ABI static InstDesc isFindIVPattern(RecurKind Kind, Loop *TheLoop, + PHINode *OrigPhi, Instruction *I, + ScalarEvolution &SE); /// Returns a struct describing if the instruction is a /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern. @@ -259,6 +262,12 @@ class RecurrenceDescriptor { return Kind == RecurKind::AnyOf; } + /// Returns true if the recurrence kind is of the form + /// select(cmp(),x,y) where one of (x,y) is decreasing loop induction. + static bool isFindFirstIVRecurrenceKind(RecurKind Kind) { + return Kind == RecurKind::FindFirstIVSMin; + } + /// Returns true if the recurrence kind is of the form /// select(cmp(),x,y) where one of (x,y) is increasing loop induction. static bool isFindLastIVRecurrenceKind(RecurKind Kind) { @@ -269,22 +278,35 @@ class RecurrenceDescriptor { /// Returns true if recurrece kind is a signed redux kind. static bool isSignedRecurrenceKind(RecurKind Kind) { return Kind == RecurKind::SMax || Kind == RecurKind::SMin || + Kind == RecurKind::FindFirstIVSMin || Kind == RecurKind::FindLastIVSMax; } + /// Returns true if the recurrence kind is of the form + /// select(cmp(),x,y) where one of (x,y) is an increasing or decreasing loop + /// induction. + static bool isFindIVRecurrenceKind(RecurKind Kind) { + return isFindFirstIVRecurrenceKind(Kind) || + isFindLastIVRecurrenceKind(Kind); + } + /// Returns the type of the recurrence. This type can be narrower than the /// actual type of the Phi if the recurrence has been type-promoted. Type *getRecurrenceType() const { return RecurrenceType; } - /// Returns the sentinel value for FindLastIV recurrences to replace the start - /// value. + /// Returns the sentinel value for FindFirstIV &FindLastIV recurrences to + /// replace the start value. Value *getSentinelValue() const { - assert(isFindLastIVRecurrenceKind(Kind) && "Unexpected recurrence kind"); Type *Ty = StartValue->getType(); unsigned BW = Ty->getIntegerBitWidth(); + if (isFindLastIVRecurrenceKind(Kind)) { + return ConstantInt::get(Ty, isSignedRecurrenceKind(Kind) + ? APInt::getSignedMinValue(BW) + : APInt::getMinValue(BW)); + } return ConstantInt::get(Ty, isSignedRecurrenceKind(Kind) - ? APInt::getSignedMinValue(BW) - : APInt::getMinValue(BW)); + ? APInt::getSignedMaxValue(BW) + : APInt::getMaxValue(BW)); } /// Returns a reference to the instructions used for type-promoting the diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index c8e97e5ec0e58..b49258e3b54ef 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -50,6 +50,7 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) { case RecurKind::UMax: case RecurKind::UMin: case RecurKind::AnyOf: + case RecurKind::FindFirstIVSMin: case RecurKind::FindLastIVSMax: case RecurKind::FindLastIVUMax: return true; @@ -684,8 +685,9 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi, // value of the data type or a non-constant value by using mask and multiple // reduction operations. RecurrenceDescriptor::InstDesc -RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi, - Instruction *I, ScalarEvolution &SE) { +RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop, + PHINode *OrigPhi, Instruction *I, + ScalarEvolution &SE) { // TODO: Support the vectorization of FindLastIV when the reduction phi is // used by more than one select instruction. This vectorization is only // performed when the SCEV of each increasing induction variable used by the @@ -713,25 +715,49 @@ RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi, return std::nullopt; const SCEV *Step = AR->getStepRecurrence(SE); - if (!SE.isKnownPositive(Step)) + + if (isFindFirstIVRecurrenceKind(Kind)) { + if (!SE.isKnownNegative(Step)) + return std::nullopt; + } else if (!SE.isKnownPositive(Step)) return std::nullopt; // Keep the minimum value of the recurrence type as the sentinel value. // The maximum acceptable range for the increasing induction variable, // called the valid range, will be defined as + + const ConstantRange IVRange = SE.getSignedRange(AR); + // Keep the minimum (FindLast) or maximum (FindFirst) value of the + // recurrence type as the sentinel value. The maximum acceptable range for + // the induction variable, called the valid range, will be defined as // [ + 1, ) - // where is [Signed|Unsigned]Min() + // where is [Signed|Unsigned]Min() for + // FindLastIV or [Signed|Unsigned]Max() for FindFirstIV. // TODO: This range restriction can be lifted by adding an additional // virtual OR reduction. auto CheckRange = [&](bool IsSigned) { const ConstantRange IVRange = IsSigned ? SE.getSignedRange(AR) : SE.getUnsignedRange(AR); unsigned NumBits = Ty->getIntegerBitWidth(); - const APInt Sentinel = IsSigned ? APInt::getSignedMinValue(NumBits) - : APInt::getMinValue(NumBits); - const ConstantRange ValidRange = - ConstantRange::getNonEmpty(Sentinel + 1, Sentinel); - LLVM_DEBUG(dbgs() << "LV: FindLastIV valid range is " << ValidRange + ConstantRange ValidRange = ConstantRange::getEmpty(NumBits); + if (isFindLastIVRecurrenceKind(Kind)) { + APInt Sentinel = IsSigned ? APInt::getSignedMinValue(NumBits) + : APInt::getMinValue(NumBits); + ValidRange = ConstantRange::getNonEmpty(Sentinel + 1, Sentinel); + } else { + assert(isFindFirstIVRecurrenceKind(Kind) && + "Kind must either be a FindLastIV or FindFirstIV"); + assert(IsSigned && + "only FindFirstIV with SMax is supported at the moment"); + ValidRange = + ConstantRange::getNonEmpty(APInt::getSignedMinValue(NumBits), + APInt::getSignedMaxValue(NumBits) - 1); + } + + LLVM_DEBUG(dbgs() << "LV: " + << (isFindLastIVRecurrenceKind(Kind) ? "FindLastIV" + : "FindFirstIV") + << " valid range is " << ValidRange << ", and the range of " << *AR << " is " << IVRange << "\n"); @@ -739,10 +765,18 @@ RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi, // its range is fully contained within the valid range. return ValidRange.contains(IVRange); }; + if (isFindLastIVRecurrenceKind(Kind)) { + if (CheckRange(true)) + return RecurKind::FindLastIVSMax; + if (CheckRange(false)) + return RecurKind::FindLastIVUMax; + return std::nullopt; + } + assert(isFindFirstIVRecurrenceKind(Kind) && + "Kind must either be a FindLastIV or FindFirstIV"); + if (CheckRange(true)) - return RecurKind::FindLastIVSMax; - if (CheckRange(false)) - return RecurKind::FindLastIVUMax; + return RecurKind::FindFirstIVSMin; return std::nullopt; }; @@ -888,8 +922,8 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr( if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul || Kind == RecurKind::Add || Kind == RecurKind::Mul) return isConditionalRdxPattern(I); - if (isFindLastIVRecurrenceKind(Kind) && SE) - return isFindLastIVPattern(L, OrigPhi, I, *SE); + if (isFindIVRecurrenceKind(Kind) && SE) + return isFindIVPattern(Kind, L, OrigPhi, I, *SE); [[fallthrough]]; case Instruction::FCmp: case Instruction::ICmp: @@ -1003,6 +1037,11 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, LLVM_DEBUG(dbgs() << "Found a FindLastIV reduction PHI." << *Phi << "\n"); return true; } + if (AddReductionVar(Phi, RecurKind::FindFirstIVSMin, TheLoop, FMF, RedDes, DB, + AC, DT, SE)) { + LLVM_DEBUG(dbgs() << "Found a FindFirstIV reduction PHI." << *Phi << "\n"); + return true; + } if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT, SE)) { LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); @@ -1150,6 +1189,7 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { case RecurKind::Mul: return Instruction::Mul; case RecurKind::AnyOf: + case RecurKind::FindFirstIVSMin: case RecurKind::FindLastIVSMax: case RecurKind::FindLastIVUMax: case RecurKind::Or: diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index c50bb4a497c6a..ac27ccf409d6b 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1227,9 +1227,12 @@ Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src, RecurKind RdxKind, Value *Start, Value *Sentinel) { bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(RdxKind); - Value *MaxRdx = Src->getType()->isVectorTy() - ? Builder.CreateIntMaxReduce(Src, IsSigned) - : Src; + Value *MaxRdx = + Src->getType()->isVectorTy() + ? (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RdxKind) + ? Builder.CreateIntMaxReduce(Src, IsSigned) + : Builder.CreateIntMinReduce(Src, IsSigned)) + : Src; // Correct the final reduction result back to the start value if the maximum // reduction is sentinel value. Value *Cmp = @@ -1324,8 +1327,8 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src, Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src, RecurKind Kind, Value *Mask, Value *EVL) { assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) && - !RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) && - "AnyOf or FindLastIV reductions are not supported."); + !RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) && + "AnyOf, FindFirstIV and FindLastIV reductions are not supported."); Intrinsic::ID Id = getReductionIntrinsicID(Kind); auto VPID = VPIntrinsic::getForIntrinsic(Id); assert(VPReductionIntrinsic::isVPReduction(VPID) && diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index aa16083829625..95479373b4393 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4922,7 +4922,7 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF, const RecurrenceDescriptor &RdxDesc = Reduction.second; RecurKind RK = RdxDesc.getRecurrenceKind(); return RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) || - RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK); + RecurrenceDescriptor::isFindIVRecurrenceKind(RK); }); if (HasSelectCmpReductions) { LLVM_DEBUG(dbgs() << "LV: Not interleaving select-cmp reductions.\n"); @@ -7240,8 +7240,8 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) { static Value *getStartValueFromReductionResult(VPInstruction *RdxResult) { using namespace VPlanPatternMatch; - assert(RdxResult->getOpcode() == VPInstruction::ComputeFindLastIVResult && - "RdxResult must be ComputeFindLastIVResult"); + assert(RdxResult->getOpcode() == VPInstruction::ComputeFindIVResult && + "RdxResult must be ComputeFindIVResult"); VPValue *StartVPV = RdxResult->getOperand(1); match(StartVPV, m_Freeze(m_VPValue(StartVPV))); return StartVPV->getLiveInIRValue(); @@ -7259,7 +7259,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( if (!EpiRedResult || (EpiRedResult->getOpcode() != VPInstruction::ComputeAnyOfResult && EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult && - EpiRedResult->getOpcode() != VPInstruction::ComputeFindLastIVResult)) + EpiRedResult->getOpcode() != VPInstruction::ComputeFindIVResult)) return; auto *EpiRedHeaderPhi = @@ -7285,7 +7285,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( "AnyOf expected to start by comparing main resume value to original " "start value"); MainResumeValue = Cmp->getOperand(0); - } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind( + } else if (RecurrenceDescriptor::isFindIVRecurrenceKind( RdxDesc.getRecurrenceKind())) { Value *StartV = getStartValueFromReductionResult(EpiRedResult); Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue(); @@ -9041,8 +9041,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( RecurKind Kind = RdxDesc.getRecurrenceKind(); assert( !RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) && - !RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) && - "AnyOf and FindLast reductions are not allowed for in-loop reductions"); + !RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) && + "AnyOf and FindIV reductions are not allowed for in-loop reductions"); // Collect the chain of "link" recipes for the reduction starting at PhiR. SetVector Worklist; @@ -9200,7 +9200,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( cast(&U)->getOpcode() == VPInstruction::ComputeReductionResult || cast(&U)->getOpcode() == - VPInstruction::ComputeFindLastIVResult); + VPInstruction::ComputeFindIVResult); }); if (CM.usePredicatedReductionSelect()) PhiR->setOperand(1, NewExitingVPV); @@ -9244,12 +9244,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( VPInstruction *FinalReductionResult; VPBuilder::InsertPointGuard Guard(Builder); Builder.setInsertPoint(MiddleVPBB, IP); - if (RecurrenceDescriptor::isFindLastIVRecurrenceKind( + if (RecurrenceDescriptor::isFindIVRecurrenceKind( RdxDesc.getRecurrenceKind())) { VPValue *Start = PhiR->getStartValue(); VPValue *Sentinel = Plan->getOrAddLiveIn(RdxDesc.getSentinelValue()); FinalReductionResult = - Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult, + Builder.createNaryOp(VPInstruction::ComputeFindIVResult, {PhiR, Start, Sentinel, NewExitingVPV}, ExitDL); } else if (RecurrenceDescriptor::isAnyOfRecurrenceKind( RdxDesc.getRecurrenceKind())) { @@ -9312,16 +9312,16 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( continue; } - if (RecurrenceDescriptor::isFindLastIVRecurrenceKind( + if (RecurrenceDescriptor::isFindIVRecurrenceKind( RdxDesc.getRecurrenceKind())) { - // Adjust the start value for FindLastIV recurrences to use the sentinel - // value after generating the ResumePhi recipe, which uses the original - // start value. + // Adjust the start value for FindFirstIV/FindLastIV recurrences to use + // the sentinel value after generating the ResumePhi recipe, which uses + // the original start value. PhiR->setOperand(0, Plan->getOrAddLiveIn(RdxDesc.getSentinelValue())); } RecurKind RK = RdxDesc.getRecurrenceKind(); if ((!RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) && - !RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) && + !RecurrenceDescriptor::isFindIVRecurrenceKind(RK) && !RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))) { VPBuilder PHBuilder(Plan->getVectorPreheader()); VPValue *Iden = Plan->getOrAddLiveIn( @@ -9704,18 +9704,18 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { VPlanTransforms::runPass(VPlanTransforms::removeDeadRecipes, MainPlan); using namespace VPlanPatternMatch; - // When vectorizing the epilogue, FindLastIV reductions can introduce multiple - // uses of undef/poison. If the reduction start value may be undef or poison - // it needs to be frozen and the frozen start has to be used when computing - // the reduction result. We also need to use the frozen value in the resume - // phi generated by the main vector loop, as this is also used to compute the - // reduction result after the epilogue vector loop. + // When vectorizing the epilogue, FindFirstIV & FindLastIV reductions can + // introduce multiple uses of undef/poison. If the reduction start value may + // be undef or poison it needs to be frozen and the frozen start has to be + // used when computing the reduction result. We also need to use the frozen + // value in the resume phi generated by the main vector loop, as this is also + // used to compute the reduction result after the epilogue vector loop. auto AddFreezeForFindLastIVReductions = [](VPlan &Plan, bool UpdateResumePhis) { VPBuilder Builder(Plan.getEntry()); for (VPRecipeBase &R : *Plan.getMiddleBlock()) { auto *VPI = dyn_cast(&R); - if (!VPI || VPI->getOpcode() != VPInstruction::ComputeFindLastIVResult) + if (!VPI || VPI->getOpcode() != VPInstruction::ComputeFindIVResult) continue; VPValue *OrigStart = VPI->getOperand(1); if (isGuaranteedNotToBeUndefOrPoison(OrigStart->getLiveInIRValue())) @@ -9810,7 +9810,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, return VPI && (VPI->getOpcode() == VPInstruction::ComputeAnyOfResult || VPI->getOpcode() == VPInstruction::ComputeReductionResult || - VPI->getOpcode() == VPInstruction::ComputeFindLastIVResult); + VPI->getOpcode() == VPInstruction::ComputeFindIVResult); })); ResumeV = cast(ReductionPhi->getUnderlyingInstr()) ->getIncomingValueForBlock(L->getLoopPreheader()); @@ -9828,20 +9828,20 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, BasicBlock *PBB = cast(ResumeV)->getParent(); IRBuilder<> Builder(PBB, PBB->getFirstNonPHIIt()); ResumeV = Builder.CreateICmpNE(ResumeV, StartV); - } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) { + } else if (RecurrenceDescriptor::isFindIVRecurrenceKind(RK)) { Value *StartV = getStartValueFromReductionResult(RdxResult); assert(RdxDesc.getRecurrenceStartValue() == StartV && - "start value from ComputeFindLastIVResult must match"); + "start value from ComputeFinIVResult must match"); ToFrozen[StartV] = cast(ResumeV)->getIncomingValueForBlock( EPI.MainLoopIterationCountCheck); - // VPReductionPHIRecipe for FindLastIV reductions requires an adjustment - // to the resume value. The resume value is adjusted to the sentinel - // value when the final value from the main vector loop equals the start - // value. This ensures correctness when the start value might not be - // less than the minimum value of a monotonically increasing induction - // variable. + // VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions requires + // an adjustment to the resume value. The resume value is adjusted to + // the sentinel value when the final value from the main vector loop + // equals the start value. This ensures correctness when the start value + // might not be less than the minimum value of a monotonically + // increasing induction variable. BasicBlock *ResumeBB = cast(ResumeV)->getParent(); IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt()); Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 27a7538ecd939..0941bf61953f1 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -23180,6 +23180,7 @@ class HorizontalReduction { case RecurKind::FMul: case RecurKind::FMulAdd: case RecurKind::AnyOf: + case RecurKind::FindFirstIVSMin: case RecurKind::FindLastIVSMax: case RecurKind::FindLastIVUMax: case RecurKind::FMaximumNum: @@ -23315,6 +23316,7 @@ class HorizontalReduction { case RecurKind::FMul: case RecurKind::FMulAdd: case RecurKind::AnyOf: + case RecurKind::FindFirstIVSMin: case RecurKind::FindLastIVSMax: case RecurKind::FindLastIVUMax: case RecurKind::FMaximumNum: @@ -23415,6 +23417,7 @@ class HorizontalReduction { case RecurKind::FMul: case RecurKind::FMulAdd: case RecurKind::AnyOf: + case RecurKind::FindFirstIVSMin: case RecurKind::FindLastIVSMax: case RecurKind::FindLastIVUMax: case RecurKind::FMaximumNum: diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 32e788be76cb7..61b5ccd85bc6e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -946,7 +946,7 @@ class VPInstruction : public VPRecipeWithIRFlags, /// Compute the final result of a AnyOf reduction with select(cmp(),x,y), /// where one of (x,y) is loop invariant, and both x and y are integer type. ComputeAnyOfResult, - ComputeFindLastIVResult, + ComputeFindIVResult, ComputeReductionResult, // Extracts the last lane from its operand if it is a vector, or the last // part if scalar. In the latter case, the recipe will be removed during diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index da4a52203db3f..8b48dda5bcf71 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -91,7 +91,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { "different types inferred for different operands"); return IntegerType::get(Ctx, 1); case VPInstruction::ComputeAnyOfResult: - case VPInstruction::ComputeFindLastIVResult: + case VPInstruction::ComputeFindIVResult: case VPInstruction::ComputeReductionResult: { auto *PhiR = cast(R->getOperand(0)); auto *OrigPhi = cast(PhiR->getUnderlyingValue()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 7b302407519e7..73d82319e13ec 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -460,7 +460,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { case VPInstruction::ComputeAnyOfResult: case VPInstruction::ReductionStartVector: return 3; - case VPInstruction::ComputeFindLastIVResult: + case VPInstruction::ComputeFindIVResult: return 4; case Instruction::Call: case Instruction::GetElementPtr: @@ -725,14 +725,14 @@ Value *VPInstruction::generate(VPTransformState &State) { return createAnyOfReduction(Builder, ReducedPartRdx, State.get(getOperand(1), VPLane(0)), OrigPhi); } - case VPInstruction::ComputeFindLastIVResult: { + case VPInstruction::ComputeFindIVResult: { // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary // and will be removed by breaking up the recipe further. auto *PhiR = cast(getOperand(0)); // Get its reduction variable descriptor. const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); RecurKind RK = RdxDesc.getRecurrenceKind(); - assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) && + assert(RecurrenceDescriptor::isFindIVRecurrenceKind(RK) && "Unexpected reduction kind"); assert(!PhiR->isInLoop() && "In-loop FindLastIV reduction is not supported yet"); @@ -741,9 +741,17 @@ Value *VPInstruction::generate(VPTransformState &State) { // sentinel value, followed by one operand for each part of the reduction. unsigned UF = getNumOperands() - 3; Value *ReducedPartRdx = State.get(getOperand(3)); - RecurKind MinMaxKind = RecurrenceDescriptor::isSignedRecurrenceKind(RK) - ? RecurKind::SMax - : RecurKind::UMax; + RecurKind MinMaxKind; + bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(RK); + if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) { + MinMaxKind = IsSigned ? RecurKind::SMax : RecurKind::UMax; + } else { + assert(RecurrenceDescriptor::isFindFirstIVRecurrenceKind(RK) && + "Kind must either be a FindLastIV or FindFirstIV"); + assert(IsSigned && + "only FindFirstIV with SMax is supported at the moment"); + MinMaxKind = RecurKind::SMin; + } for (unsigned Part = 1; Part < UF; ++Part) ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx, State.get(getOperand(3 + Part))); @@ -761,8 +769,8 @@ Value *VPInstruction::generate(VPTransformState &State) { const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); RecurKind RK = RdxDesc.getRecurrenceKind(); - assert(!RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) && - "should be handled by ComputeFindLastIVResult"); + assert(!RecurrenceDescriptor::isFindIVRecurrenceKind(RK) && + "should be handled by ComputeFindIVResult"); Type *ResultTy = State.TypeAnalysis.inferScalarType(this); // The recipe's operands are the reduction phi, followed by one operand for @@ -977,7 +985,7 @@ bool VPInstruction::isVectorToScalar() const { getOpcode() == Instruction::ExtractElement || getOpcode() == VPInstruction::FirstActiveLane || getOpcode() == VPInstruction::ComputeAnyOfResult || - getOpcode() == VPInstruction::ComputeFindLastIVResult || + getOpcode() == VPInstruction::ComputeFindIVResult || getOpcode() == VPInstruction::ComputeReductionResult || getOpcode() == VPInstruction::AnyOf; } @@ -1079,7 +1087,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::PtrAdd: return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); case VPInstruction::ComputeAnyOfResult: - case VPInstruction::ComputeFindLastIVResult: + case VPInstruction::ComputeFindIVResult: return Op == getOperand(1); }; llvm_unreachable("switch should return"); @@ -1168,8 +1176,8 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ComputeAnyOfResult: O << "compute-anyof-result"; break; - case VPInstruction::ComputeFindLastIVResult: - O << "compute-find-last-iv-result"; + case VPInstruction::ComputeFindIVResult: + O << "compute-find-iv-result"; break; case VPInstruction::ComputeReductionResult: O << "compute-reduction-result"; diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 6c2b8210ffb7b..2dd43c092ff7a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -355,7 +355,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) { m_VPValue(), m_VPValue(), m_VPValue(Op1))) || match(&R, m_VPInstruction( m_VPValue(), m_VPValue(Op1))) || - match(&R, m_VPInstruction( + match(&R, m_VPInstruction( m_VPValue(), m_VPValue(), m_VPValue(), m_VPValue(Op1)))) { addUniformForAllParts(cast(&R)); for (unsigned Part = 1; Part != UF; ++Part) diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index cf10d32e063ab..ee0a64397e2ac 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -1,26 +1,185 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 -; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s -; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s -; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck --check-prefix=IC1VF4 %s +; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck --check-prefix=IC4VF4 %s +; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck --check-prefix=IC4VF1 %s define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { -; CHECK-LABEL: define i64 @select_decreasing_induction_icmp_const_start( -; CHECK-SAME: ptr [[A:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 19999, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 -; CHECK-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 -; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 -; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ] -; CHECK-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start( +; IC1VF4-SAME: ptr [[A:%.*]]) { +; IC1VF4-NEXT: [[ENTRY:.*]]: +; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IC1VF4: [[VECTOR_PH]]: +; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; IC1VF4: [[VECTOR_BODY]]: +; IC1VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] +; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> +; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) +; IC1VF4-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) +; IC1VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 +; IC1VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IC1VF4: [[MIDDLE_BLOCK]]: +; IC1VF4-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP4]]) +; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], 9223372036854775807 +; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 331 +; IC1VF4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IC1VF4: [[SCALAR_PH]]: +; IC1VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 19999, %[[ENTRY]] ] +; IC1VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ] +; IC1VF4-NEXT: br label %[[LOOP:.*]] +; IC1VF4: [[LOOP]]: +; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 +; IC1VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 +; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] +; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 +; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; IC1VF4: [[EXIT]]: +; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; IC1VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; +; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start( +; IC4VF4-SAME: ptr [[A:%.*]]) { +; IC4VF4-NEXT: [[ENTRY:.*]]: +; IC4VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IC4VF4: [[VECTOR_PH]]: +; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; IC4VF4: [[VECTOR_BODY]]: +; IC4VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] +; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] +; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] +; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) +; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 -4) +; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4) +; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] +; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] +; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 +; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3 +; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -4 +; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 -3 +; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -8 +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3 +; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -12 +; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3 +; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> +; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; IC4VF4-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD4]], <4 x i64> poison, <4 x i32> +; IC4VF4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8 +; IC4VF4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD6]], <4 x i64> poison, <4 x i32> +; IC4VF4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 +; IC4VF4-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD8]], <4 x i64> poison, <4 x i32> +; IC4VF4-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) +; IC4VF4-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i64> [[REVERSE5]], splat (i64 3) +; IC4VF4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[REVERSE7]], splat (i64 3) +; IC4VF4-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i64> [[REVERSE9]], splat (i64 3) +; IC4VF4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; IC4VF4-NEXT: [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]] +; IC4VF4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] +; IC4VF4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] +; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -4) +; IC4VF4-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 +; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IC4VF4: [[MIDDLE_BLOCK]]: +; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP13]], <4 x i64> [[TMP14]]) +; IC4VF4-NEXT: [[RDX_MINMAX10:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP15]]) +; IC4VF4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX10]], <4 x i64> [[TMP16]]) +; IC4VF4-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[RDX_MINMAX11]]) +; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP18]], 9223372036854775807 +; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP18]], i64 331 +; IC4VF4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IC4VF4: [[SCALAR_PH]]: +; IC4VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 19999, %[[ENTRY]] ] +; IC4VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ] +; IC4VF4-NEXT: br label %[[LOOP:.*]] +; IC4VF4: [[LOOP]]: +; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 +; IC4VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 +; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] +; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 +; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; IC4VF4: [[EXIT]]: +; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; IC4VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; +; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_const_start( +; IC4VF1-SAME: ptr [[A:%.*]]) { +; IC4VF1-NEXT: [[ENTRY:.*]]: +; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IC4VF1: [[VECTOR_PH]]: +; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]] +; IC4VF1: [[VECTOR_BODY]]: +; IC4VF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] +; IC4VF1-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 +; IC4VF1-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -2 +; IC4VF1-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -3 +; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] +; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] +; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] +; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] +; IC4VF1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP3]], align 8 +; IC4VF1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +; IC4VF1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 8 +; IC4VF1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8 +; IC4VF1-NEXT: [[TMP11:%.*]] = icmp sgt i64 [[TMP7]], 3 +; IC4VF1-NEXT: [[TMP12:%.*]] = icmp sgt i64 [[TMP8]], 3 +; IC4VF1-NEXT: [[TMP13:%.*]] = icmp sgt i64 [[TMP9]], 3 +; IC4VF1-NEXT: [[TMP14:%.*]] = icmp sgt i64 [[TMP10]], 3 +; IC4VF1-NEXT: [[TMP15]] = select i1 [[TMP11]], i64 [[OFFSET_IDX]], i64 [[VEC_PHI]] +; IC4VF1-NEXT: [[TMP16]] = select i1 [[TMP12]], i64 [[TMP0]], i64 [[VEC_PHI1]] +; IC4VF1-NEXT: [[TMP17]] = select i1 [[TMP13]], i64 [[TMP1]], i64 [[VEC_PHI2]] +; IC4VF1-NEXT: [[TMP18]] = select i1 [[TMP14]], i64 [[TMP2]], i64 [[VEC_PHI3]] +; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; IC4VF1-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 +; IC4VF1-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IC4VF1: [[MIDDLE_BLOCK]]: +; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP15]], i64 [[TMP16]]) +; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX]], i64 [[TMP17]]) +; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX4]], i64 [[TMP18]]) +; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], 9223372036854775807 +; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 331 +; IC4VF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IC4VF1: [[SCALAR_PH]]: +; IC4VF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 19999, %[[ENTRY]] ] +; IC4VF1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ] +; IC4VF1-NEXT: br label %[[LOOP:.*]] +; IC4VF1: [[LOOP]]: +; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 +; IC4VF1-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 +; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] +; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 +; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; IC4VF1: [[EXIT]]: +; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; IC4VF1-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] ; entry: br label %loop @@ -43,23 +202,354 @@ exit: ; preds = %loop @table = constant [13 x i16] [i16 10, i16 35, i16 69, i16 147, i16 280, i16 472, i16 682, i16 1013, i16 1559, i16 2544, i16 4553, i16 6494, i16 10000], align 1 define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { -; CHECK-LABEL: define i16 @select_decreasing_induction_icmp_table_i16( -; CHECK-SAME: i16 noundef [[VAL:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] -; CHECK-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1 -; CHECK-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]] -; CHECK-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1 -; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]] -; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ] -; CHECK-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] +; IC1VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_i16( +; IC1VF4-SAME: i16 noundef [[VAL:%.*]]) { +; IC1VF4-NEXT: [[ENTRY:.*]]: +; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IC1VF4: [[VECTOR_PH]]: +; IC1VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0 +; IC1VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer +; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; IC1VF4: [[VECTOR_BODY]]: +; IC1VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 +; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] +; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]] +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 1 +; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> poison, <4 x i32> +; IC1VF4-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i16> [[REVERSE]], [[BROADCAST_SPLAT]] +; IC1VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1) +; IC1VF4-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]] +; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4) +; IC1VF4-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 +; IC1VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IC1VF4: [[MIDDLE_BLOCK]]: +; IC1VF4-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[TMP5]]) +; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP7]], 32767 +; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0 +; IC1VF4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IC1VF4: [[SCALAR_PH]]: +; IC1VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ] +; IC1VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; IC1VF4-NEXT: br label %[[LOOP:.*]] +; IC1VF4: [[LOOP]]: +; IC1VF4-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] +; IC1VF4-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1 +; IC1VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]] +; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1 +; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]] +; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0 +; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; IC1VF4: [[EXIT]]: +; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; IC1VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] +; +; IC4VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_i16( +; IC4VF4-SAME: i16 noundef [[VAL:%.*]]) { +; IC4VF4-NEXT: [[ENTRY:.*]]: +; IC4VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IC4VF4: [[VECTOR_PH]]: +; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0 +; IC4VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer +; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; IC4VF4: [[VECTOR_BODY]]: +; IC4VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ] +; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ] +; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ] +; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ] +; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ] +; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ] +; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4) +; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4) +; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4) +; IC4VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 +; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] +; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0 +; IC4VF4-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer +; IC4VF4-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], +; IC4VF4-NEXT: [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], +; IC4VF4-NEXT: [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], +; IC4VF4-NEXT: [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], +; IC4VF4-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11) +; IC4VF4-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11) +; IC4VF4-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11) +; IC4VF4-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11) +; IC4VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; IC4VF4-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; IC4VF4: [[PRED_LOAD_IF]]: +; IC4VF4-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 0 +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP5]] +; IC4VF4-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1 +; IC4VF4-NEXT: [[TMP8:%.*]] = insertelement <4 x i16> poison, i16 [[TMP7]], i32 0 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; IC4VF4: [[PRED_LOAD_CONTINUE]]: +; IC4VF4-NEXT: [[TMP9:%.*]] = phi <4 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ] +; IC4VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; IC4VF4-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]] +; IC4VF4: [[PRED_LOAD_IF15]]: +; IC4VF4-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], -1 +; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP11]] +; IC4VF4-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 1 +; IC4VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x i16> [[TMP9]], i16 [[TMP13]], i32 1 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE16]] +; IC4VF4: [[PRED_LOAD_CONTINUE16]]: +; IC4VF4-NEXT: [[TMP15:%.*]] = phi <4 x i16> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF15]] ] +; IC4VF4-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; IC4VF4-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]] +; IC4VF4: [[PRED_LOAD_IF17]]: +; IC4VF4-NEXT: [[TMP17:%.*]] = add i16 [[OFFSET_IDX]], -2 +; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP17]] +; IC4VF4-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP18]], align 1 +; IC4VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP19]], i32 2 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE18]] +; IC4VF4: [[PRED_LOAD_CONTINUE18]]: +; IC4VF4-NEXT: [[TMP21:%.*]] = phi <4 x i16> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ] +; IC4VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; IC4VF4-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]] +; IC4VF4: [[PRED_LOAD_IF19]]: +; IC4VF4-NEXT: [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3 +; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]] +; IC4VF4-NEXT: [[TMP25:%.*]] = load i16, ptr [[TMP24]], align 1 +; IC4VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> [[TMP21]], i16 [[TMP25]], i32 3 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE20]] +; IC4VF4: [[PRED_LOAD_CONTINUE20]]: +; IC4VF4-NEXT: [[TMP27:%.*]] = phi <4 x i16> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ] +; IC4VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; IC4VF4-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]] +; IC4VF4: [[PRED_LOAD_IF21]]: +; IC4VF4-NEXT: [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4 +; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]] +; IC4VF4-NEXT: [[TMP31:%.*]] = load i16, ptr [[TMP30]], align 1 +; IC4VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> poison, i16 [[TMP31]], i32 0 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE22]] +; IC4VF4: [[PRED_LOAD_CONTINUE22]]: +; IC4VF4-NEXT: [[TMP33:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ] +; IC4VF4-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; IC4VF4-NEXT: br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]] +; IC4VF4: [[PRED_LOAD_IF23]]: +; IC4VF4-NEXT: [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5 +; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]] +; IC4VF4-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP36]], align 1 +; IC4VF4-NEXT: [[TMP38:%.*]] = insertelement <4 x i16> [[TMP33]], i16 [[TMP37]], i32 1 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE24]] +; IC4VF4: [[PRED_LOAD_CONTINUE24]]: +; IC4VF4-NEXT: [[TMP39:%.*]] = phi <4 x i16> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ] +; IC4VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; IC4VF4-NEXT: br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]] +; IC4VF4: [[PRED_LOAD_IF25]]: +; IC4VF4-NEXT: [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6 +; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]] +; IC4VF4-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP42]], align 1 +; IC4VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x i16> [[TMP39]], i16 [[TMP43]], i32 2 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE26]] +; IC4VF4: [[PRED_LOAD_CONTINUE26]]: +; IC4VF4-NEXT: [[TMP45:%.*]] = phi <4 x i16> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ] +; IC4VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; IC4VF4-NEXT: br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]] +; IC4VF4: [[PRED_LOAD_IF27]]: +; IC4VF4-NEXT: [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7 +; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]] +; IC4VF4-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP48]], align 1 +; IC4VF4-NEXT: [[TMP50:%.*]] = insertelement <4 x i16> [[TMP45]], i16 [[TMP49]], i32 3 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE28]] +; IC4VF4: [[PRED_LOAD_CONTINUE28]]: +; IC4VF4-NEXT: [[TMP51:%.*]] = phi <4 x i16> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ] +; IC4VF4-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 +; IC4VF4-NEXT: br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]] +; IC4VF4: [[PRED_LOAD_IF29]]: +; IC4VF4-NEXT: [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8 +; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]] +; IC4VF4-NEXT: [[TMP55:%.*]] = load i16, ptr [[TMP54]], align 1 +; IC4VF4-NEXT: [[TMP56:%.*]] = insertelement <4 x i16> poison, i16 [[TMP55]], i32 0 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE30]] +; IC4VF4: [[PRED_LOAD_CONTINUE30]]: +; IC4VF4-NEXT: [[TMP57:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ] +; IC4VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 +; IC4VF4-NEXT: br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]] +; IC4VF4: [[PRED_LOAD_IF31]]: +; IC4VF4-NEXT: [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9 +; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]] +; IC4VF4-NEXT: [[TMP61:%.*]] = load i16, ptr [[TMP60]], align 1 +; IC4VF4-NEXT: [[TMP62:%.*]] = insertelement <4 x i16> [[TMP57]], i16 [[TMP61]], i32 1 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE32]] +; IC4VF4: [[PRED_LOAD_CONTINUE32]]: +; IC4VF4-NEXT: [[TMP63:%.*]] = phi <4 x i16> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ] +; IC4VF4-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 +; IC4VF4-NEXT: br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]] +; IC4VF4: [[PRED_LOAD_IF33]]: +; IC4VF4-NEXT: [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10 +; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]] +; IC4VF4-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP66]], align 1 +; IC4VF4-NEXT: [[TMP68:%.*]] = insertelement <4 x i16> [[TMP63]], i16 [[TMP67]], i32 2 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE34]] +; IC4VF4: [[PRED_LOAD_CONTINUE34]]: +; IC4VF4-NEXT: [[TMP69:%.*]] = phi <4 x i16> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ] +; IC4VF4-NEXT: [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 +; IC4VF4-NEXT: br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]] +; IC4VF4: [[PRED_LOAD_IF35]]: +; IC4VF4-NEXT: [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11 +; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]] +; IC4VF4-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP72]], align 1 +; IC4VF4-NEXT: [[TMP74:%.*]] = insertelement <4 x i16> [[TMP69]], i16 [[TMP73]], i32 3 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE36]] +; IC4VF4: [[PRED_LOAD_CONTINUE36]]: +; IC4VF4-NEXT: [[TMP75:%.*]] = phi <4 x i16> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ] +; IC4VF4-NEXT: [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 +; IC4VF4-NEXT: br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]] +; IC4VF4: [[PRED_LOAD_IF37]]: +; IC4VF4-NEXT: [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12 +; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]] +; IC4VF4-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP78]], align 1 +; IC4VF4-NEXT: [[TMP80:%.*]] = insertelement <4 x i16> poison, i16 [[TMP79]], i32 0 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE38]] +; IC4VF4: [[PRED_LOAD_CONTINUE38]]: +; IC4VF4-NEXT: [[TMP81:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ] +; IC4VF4-NEXT: [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 +; IC4VF4-NEXT: br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]] +; IC4VF4: [[PRED_LOAD_IF39]]: +; IC4VF4-NEXT: [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13 +; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]] +; IC4VF4-NEXT: [[TMP85:%.*]] = load i16, ptr [[TMP84]], align 1 +; IC4VF4-NEXT: [[TMP86:%.*]] = insertelement <4 x i16> [[TMP81]], i16 [[TMP85]], i32 1 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE40]] +; IC4VF4: [[PRED_LOAD_CONTINUE40]]: +; IC4VF4-NEXT: [[TMP87:%.*]] = phi <4 x i16> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ] +; IC4VF4-NEXT: [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 +; IC4VF4-NEXT: br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]] +; IC4VF4: [[PRED_LOAD_IF41]]: +; IC4VF4-NEXT: [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14 +; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP89]] +; IC4VF4-NEXT: [[TMP91:%.*]] = load i16, ptr [[TMP90]], align 1 +; IC4VF4-NEXT: [[TMP92:%.*]] = insertelement <4 x i16> [[TMP87]], i16 [[TMP91]], i32 2 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE42]] +; IC4VF4: [[PRED_LOAD_CONTINUE42]]: +; IC4VF4-NEXT: [[TMP93:%.*]] = phi <4 x i16> [ [[TMP87]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP92]], %[[PRED_LOAD_IF41]] ] +; IC4VF4-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 +; IC4VF4-NEXT: br i1 [[TMP94]], label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44]] +; IC4VF4: [[PRED_LOAD_IF43]]: +; IC4VF4-NEXT: [[TMP95:%.*]] = add i16 [[OFFSET_IDX]], -15 +; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP95]] +; IC4VF4-NEXT: [[TMP97:%.*]] = load i16, ptr [[TMP96]], align 1 +; IC4VF4-NEXT: [[TMP98:%.*]] = insertelement <4 x i16> [[TMP93]], i16 [[TMP97]], i32 3 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE44]] +; IC4VF4: [[PRED_LOAD_CONTINUE44]]: +; IC4VF4-NEXT: [[TMP99:%.*]] = phi <4 x i16> [ [[TMP93]], %[[PRED_LOAD_CONTINUE42]] ], [ [[TMP98]], %[[PRED_LOAD_IF43]] ] +; IC4VF4-NEXT: [[TMP100:%.*]] = icmp ugt <4 x i16> [[TMP27]], [[BROADCAST_SPLAT]] +; IC4VF4-NEXT: [[TMP101:%.*]] = icmp ugt <4 x i16> [[TMP51]], [[BROADCAST_SPLAT]] +; IC4VF4-NEXT: [[TMP102:%.*]] = icmp ugt <4 x i16> [[TMP75]], [[BROADCAST_SPLAT]] +; IC4VF4-NEXT: [[TMP103:%.*]] = icmp ugt <4 x i16> [[TMP99]], [[BROADCAST_SPLAT]] +; IC4VF4-NEXT: [[TMP104:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1) +; IC4VF4-NEXT: [[TMP105:%.*]] = add nsw <4 x i16> [[STEP_ADD]], splat (i16 -1) +; IC4VF4-NEXT: [[TMP106:%.*]] = add nsw <4 x i16> [[STEP_ADD_2]], splat (i16 -1) +; IC4VF4-NEXT: [[TMP107:%.*]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -1) +; IC4VF4-NEXT: [[TMP108]] = select <4 x i1> [[TMP100]], <4 x i16> [[TMP104]], <4 x i16> [[VEC_PHI]] +; IC4VF4-NEXT: [[TMP109]] = select <4 x i1> [[TMP101]], <4 x i16> [[TMP105]], <4 x i16> [[VEC_PHI1]] +; IC4VF4-NEXT: [[TMP110]] = select <4 x i1> [[TMP102]], <4 x i16> [[TMP106]], <4 x i16> [[VEC_PHI2]] +; IC4VF4-NEXT: [[TMP111]] = select <4 x i1> [[TMP103]], <4 x i16> [[TMP107]], <4 x i16> [[VEC_PHI3]] +; IC4VF4-NEXT: [[TMP112:%.*]] = select <4 x i1> [[TMP0]], <4 x i16> [[TMP108]], <4 x i16> [[VEC_PHI]] +; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]] +; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]] +; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]] +; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 +; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4) +; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IC4VF4: [[MIDDLE_BLOCK]]: +; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]]) +; IC4VF4-NEXT: [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]]) +; IC4VF4-NEXT: [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX45]], <4 x i16> [[TMP115]]) +; IC4VF4-NEXT: [[TMP116:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[RDX_MINMAX46]]) +; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP116]], 32767 +; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP116]], i16 0 +; IC4VF4-NEXT: br label %[[EXIT:.*]] +; IC4VF4: [[SCALAR_PH]]: +; IC4VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 12, %[[ENTRY]] ] +; IC4VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ] +; IC4VF4-NEXT: br label %[[LOOP:.*]] +; IC4VF4: [[LOOP]]: +; IC4VF4-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC4VF4-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC4VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] +; IC4VF4-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1 +; IC4VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]] +; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1 +; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]] +; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0 +; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; IC4VF4: [[EXIT]]: +; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; IC4VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] +; +; IC4VF1-LABEL: define i16 @select_decreasing_induction_icmp_table_i16( +; IC4VF1-SAME: i16 noundef [[VAL:%.*]]) { +; IC4VF1-NEXT: [[ENTRY:.*]]: +; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IC4VF1: [[VECTOR_PH]]: +; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]] +; IC4VF1: [[VECTOR_BODY]]: +; IC4VF1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 +; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] +; IC4VF1-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], -1 +; IC4VF1-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], -2 +; IC4VF1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], -3 +; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]] +; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP0]] +; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP1]] +; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP2]] +; IC4VF1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP3]], align 1 +; IC4VF1-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP4]], align 1 +; IC4VF1-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP5]], align 1 +; IC4VF1-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1 +; IC4VF1-NEXT: [[TMP11:%.*]] = icmp ugt i16 [[TMP7]], [[VAL]] +; IC4VF1-NEXT: [[TMP12:%.*]] = icmp ugt i16 [[TMP8]], [[VAL]] +; IC4VF1-NEXT: [[TMP13:%.*]] = icmp ugt i16 [[TMP9]], [[VAL]] +; IC4VF1-NEXT: [[TMP14:%.*]] = icmp ugt i16 [[TMP10]], [[VAL]] +; IC4VF1-NEXT: [[TMP15:%.*]] = add nsw i16 [[OFFSET_IDX]], -1 +; IC4VF1-NEXT: [[TMP16:%.*]] = add nsw i16 [[TMP0]], -1 +; IC4VF1-NEXT: [[TMP17:%.*]] = add nsw i16 [[TMP1]], -1 +; IC4VF1-NEXT: [[TMP18:%.*]] = add nsw i16 [[TMP2]], -1 +; IC4VF1-NEXT: [[TMP19]] = select i1 [[TMP11]], i16 [[TMP15]], i16 [[VEC_PHI]] +; IC4VF1-NEXT: [[TMP20]] = select i1 [[TMP12]], i16 [[TMP16]], i16 [[VEC_PHI1]] +; IC4VF1-NEXT: [[TMP21]] = select i1 [[TMP13]], i16 [[TMP17]], i16 [[VEC_PHI2]] +; IC4VF1-NEXT: [[TMP22]] = select i1 [[TMP14]], i16 [[TMP18]], i16 [[VEC_PHI3]] +; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; IC4VF1-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 +; IC4VF1-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IC4VF1: [[MIDDLE_BLOCK]]: +; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP19]], i16 [[TMP20]]) +; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX]], i16 [[TMP21]]) +; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX4]], i16 [[TMP22]]) +; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[RDX_MINMAX5]], 32767 +; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[RDX_MINMAX5]], i16 0 +; IC4VF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IC4VF1: [[SCALAR_PH]]: +; IC4VF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ] +; IC4VF1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; IC4VF1-NEXT: br label %[[LOOP:.*]] +; IC4VF1: [[LOOP]]: +; IC4VF1-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] +; IC4VF1-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1 +; IC4VF1-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]] +; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1 +; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]] +; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0 +; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; IC4VF1: [[EXIT]]: +; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; IC4VF1-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] ; entry: br label %loop @@ -83,23 +573,354 @@ exit: ; preds = %loop @tablef = constant [13 x half] [half 10.0, half 35.0, half 69.0, half 147.0, half 280.0, half 472.0, half 682.0, half 1013.0, half 1559.0, half 2544.0, half 4556.0, half 6496.0, half 10000.0], align 1 define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { -; CHECK-LABEL: define i16 @select_decreasing_induction_icmp_table_half( -; CHECK-SAME: half noundef [[VAL:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] -; CHECK-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1 -; CHECK-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]] -; CHECK-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1 -; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]] -; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ] -; CHECK-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] +; IC1VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_half( +; IC1VF4-SAME: half noundef [[VAL:%.*]]) { +; IC1VF4-NEXT: [[ENTRY:.*]]: +; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IC1VF4: [[VECTOR_PH]]: +; IC1VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x half> poison, half [[VAL]], i64 0 +; IC1VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer +; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; IC1VF4: [[VECTOR_BODY]]: +; IC1VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 +; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] +; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]] +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x half>, ptr [[TMP2]], align 1 +; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x half> [[WIDE_LOAD]], <4 x half> poison, <4 x i32> +; IC1VF4-NEXT: [[TMP3:%.*]] = fcmp ugt <4 x half> [[REVERSE]], [[BROADCAST_SPLAT]] +; IC1VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1) +; IC1VF4-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]] +; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4) +; IC1VF4-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 +; IC1VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IC1VF4: [[MIDDLE_BLOCK]]: +; IC1VF4-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[TMP5]]) +; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP7]], 32767 +; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0 +; IC1VF4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IC1VF4: [[SCALAR_PH]]: +; IC1VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ] +; IC1VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; IC1VF4-NEXT: br label %[[LOOP:.*]] +; IC1VF4: [[LOOP]]: +; IC1VF4-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] +; IC1VF4-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1 +; IC1VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]] +; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1 +; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]] +; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0 +; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; IC1VF4: [[EXIT]]: +; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; IC1VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] +; +; IC4VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_half( +; IC4VF4-SAME: half noundef [[VAL:%.*]]) { +; IC4VF4-NEXT: [[ENTRY:.*]]: +; IC4VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IC4VF4: [[VECTOR_PH]]: +; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x half> poison, half [[VAL]], i64 0 +; IC4VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer +; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]] +; IC4VF4: [[VECTOR_BODY]]: +; IC4VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ] +; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ] +; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ] +; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ] +; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ] +; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ] +; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4) +; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4) +; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4) +; IC4VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 +; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] +; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0 +; IC4VF4-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer +; IC4VF4-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], +; IC4VF4-NEXT: [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], +; IC4VF4-NEXT: [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], +; IC4VF4-NEXT: [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], +; IC4VF4-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11) +; IC4VF4-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11) +; IC4VF4-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11) +; IC4VF4-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11) +; IC4VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; IC4VF4-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; IC4VF4: [[PRED_LOAD_IF]]: +; IC4VF4-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 0 +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP5]] +; IC4VF4-NEXT: [[TMP7:%.*]] = load half, ptr [[TMP6]], align 1 +; IC4VF4-NEXT: [[TMP8:%.*]] = insertelement <4 x half> poison, half [[TMP7]], i32 0 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; IC4VF4: [[PRED_LOAD_CONTINUE]]: +; IC4VF4-NEXT: [[TMP9:%.*]] = phi <4 x half> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ] +; IC4VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; IC4VF4-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]] +; IC4VF4: [[PRED_LOAD_IF15]]: +; IC4VF4-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], -1 +; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP11]] +; IC4VF4-NEXT: [[TMP13:%.*]] = load half, ptr [[TMP12]], align 1 +; IC4VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x half> [[TMP9]], half [[TMP13]], i32 1 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE16]] +; IC4VF4: [[PRED_LOAD_CONTINUE16]]: +; IC4VF4-NEXT: [[TMP15:%.*]] = phi <4 x half> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF15]] ] +; IC4VF4-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; IC4VF4-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]] +; IC4VF4: [[PRED_LOAD_IF17]]: +; IC4VF4-NEXT: [[TMP17:%.*]] = add i16 [[OFFSET_IDX]], -2 +; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP17]] +; IC4VF4-NEXT: [[TMP19:%.*]] = load half, ptr [[TMP18]], align 1 +; IC4VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x half> [[TMP15]], half [[TMP19]], i32 2 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE18]] +; IC4VF4: [[PRED_LOAD_CONTINUE18]]: +; IC4VF4-NEXT: [[TMP21:%.*]] = phi <4 x half> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ] +; IC4VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; IC4VF4-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]] +; IC4VF4: [[PRED_LOAD_IF19]]: +; IC4VF4-NEXT: [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3 +; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]] +; IC4VF4-NEXT: [[TMP25:%.*]] = load half, ptr [[TMP24]], align 1 +; IC4VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x half> [[TMP21]], half [[TMP25]], i32 3 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE20]] +; IC4VF4: [[PRED_LOAD_CONTINUE20]]: +; IC4VF4-NEXT: [[TMP27:%.*]] = phi <4 x half> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ] +; IC4VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; IC4VF4-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]] +; IC4VF4: [[PRED_LOAD_IF21]]: +; IC4VF4-NEXT: [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4 +; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]] +; IC4VF4-NEXT: [[TMP31:%.*]] = load half, ptr [[TMP30]], align 1 +; IC4VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x half> poison, half [[TMP31]], i32 0 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE22]] +; IC4VF4: [[PRED_LOAD_CONTINUE22]]: +; IC4VF4-NEXT: [[TMP33:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ] +; IC4VF4-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; IC4VF4-NEXT: br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]] +; IC4VF4: [[PRED_LOAD_IF23]]: +; IC4VF4-NEXT: [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5 +; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]] +; IC4VF4-NEXT: [[TMP37:%.*]] = load half, ptr [[TMP36]], align 1 +; IC4VF4-NEXT: [[TMP38:%.*]] = insertelement <4 x half> [[TMP33]], half [[TMP37]], i32 1 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE24]] +; IC4VF4: [[PRED_LOAD_CONTINUE24]]: +; IC4VF4-NEXT: [[TMP39:%.*]] = phi <4 x half> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ] +; IC4VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; IC4VF4-NEXT: br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]] +; IC4VF4: [[PRED_LOAD_IF25]]: +; IC4VF4-NEXT: [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6 +; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]] +; IC4VF4-NEXT: [[TMP43:%.*]] = load half, ptr [[TMP42]], align 1 +; IC4VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x half> [[TMP39]], half [[TMP43]], i32 2 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE26]] +; IC4VF4: [[PRED_LOAD_CONTINUE26]]: +; IC4VF4-NEXT: [[TMP45:%.*]] = phi <4 x half> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ] +; IC4VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; IC4VF4-NEXT: br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]] +; IC4VF4: [[PRED_LOAD_IF27]]: +; IC4VF4-NEXT: [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7 +; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]] +; IC4VF4-NEXT: [[TMP49:%.*]] = load half, ptr [[TMP48]], align 1 +; IC4VF4-NEXT: [[TMP50:%.*]] = insertelement <4 x half> [[TMP45]], half [[TMP49]], i32 3 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE28]] +; IC4VF4: [[PRED_LOAD_CONTINUE28]]: +; IC4VF4-NEXT: [[TMP51:%.*]] = phi <4 x half> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ] +; IC4VF4-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 +; IC4VF4-NEXT: br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]] +; IC4VF4: [[PRED_LOAD_IF29]]: +; IC4VF4-NEXT: [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8 +; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]] +; IC4VF4-NEXT: [[TMP55:%.*]] = load half, ptr [[TMP54]], align 1 +; IC4VF4-NEXT: [[TMP56:%.*]] = insertelement <4 x half> poison, half [[TMP55]], i32 0 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE30]] +; IC4VF4: [[PRED_LOAD_CONTINUE30]]: +; IC4VF4-NEXT: [[TMP57:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ] +; IC4VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 +; IC4VF4-NEXT: br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]] +; IC4VF4: [[PRED_LOAD_IF31]]: +; IC4VF4-NEXT: [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9 +; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]] +; IC4VF4-NEXT: [[TMP61:%.*]] = load half, ptr [[TMP60]], align 1 +; IC4VF4-NEXT: [[TMP62:%.*]] = insertelement <4 x half> [[TMP57]], half [[TMP61]], i32 1 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE32]] +; IC4VF4: [[PRED_LOAD_CONTINUE32]]: +; IC4VF4-NEXT: [[TMP63:%.*]] = phi <4 x half> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ] +; IC4VF4-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 +; IC4VF4-NEXT: br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]] +; IC4VF4: [[PRED_LOAD_IF33]]: +; IC4VF4-NEXT: [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10 +; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]] +; IC4VF4-NEXT: [[TMP67:%.*]] = load half, ptr [[TMP66]], align 1 +; IC4VF4-NEXT: [[TMP68:%.*]] = insertelement <4 x half> [[TMP63]], half [[TMP67]], i32 2 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE34]] +; IC4VF4: [[PRED_LOAD_CONTINUE34]]: +; IC4VF4-NEXT: [[TMP69:%.*]] = phi <4 x half> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ] +; IC4VF4-NEXT: [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 +; IC4VF4-NEXT: br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]] +; IC4VF4: [[PRED_LOAD_IF35]]: +; IC4VF4-NEXT: [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11 +; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]] +; IC4VF4-NEXT: [[TMP73:%.*]] = load half, ptr [[TMP72]], align 1 +; IC4VF4-NEXT: [[TMP74:%.*]] = insertelement <4 x half> [[TMP69]], half [[TMP73]], i32 3 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE36]] +; IC4VF4: [[PRED_LOAD_CONTINUE36]]: +; IC4VF4-NEXT: [[TMP75:%.*]] = phi <4 x half> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ] +; IC4VF4-NEXT: [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 +; IC4VF4-NEXT: br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]] +; IC4VF4: [[PRED_LOAD_IF37]]: +; IC4VF4-NEXT: [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12 +; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]] +; IC4VF4-NEXT: [[TMP79:%.*]] = load half, ptr [[TMP78]], align 1 +; IC4VF4-NEXT: [[TMP80:%.*]] = insertelement <4 x half> poison, half [[TMP79]], i32 0 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE38]] +; IC4VF4: [[PRED_LOAD_CONTINUE38]]: +; IC4VF4-NEXT: [[TMP81:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ] +; IC4VF4-NEXT: [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 +; IC4VF4-NEXT: br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]] +; IC4VF4: [[PRED_LOAD_IF39]]: +; IC4VF4-NEXT: [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13 +; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]] +; IC4VF4-NEXT: [[TMP85:%.*]] = load half, ptr [[TMP84]], align 1 +; IC4VF4-NEXT: [[TMP86:%.*]] = insertelement <4 x half> [[TMP81]], half [[TMP85]], i32 1 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE40]] +; IC4VF4: [[PRED_LOAD_CONTINUE40]]: +; IC4VF4-NEXT: [[TMP87:%.*]] = phi <4 x half> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ] +; IC4VF4-NEXT: [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 +; IC4VF4-NEXT: br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]] +; IC4VF4: [[PRED_LOAD_IF41]]: +; IC4VF4-NEXT: [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14 +; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP89]] +; IC4VF4-NEXT: [[TMP91:%.*]] = load half, ptr [[TMP90]], align 1 +; IC4VF4-NEXT: [[TMP92:%.*]] = insertelement <4 x half> [[TMP87]], half [[TMP91]], i32 2 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE42]] +; IC4VF4: [[PRED_LOAD_CONTINUE42]]: +; IC4VF4-NEXT: [[TMP93:%.*]] = phi <4 x half> [ [[TMP87]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP92]], %[[PRED_LOAD_IF41]] ] +; IC4VF4-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 +; IC4VF4-NEXT: br i1 [[TMP94]], label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44]] +; IC4VF4: [[PRED_LOAD_IF43]]: +; IC4VF4-NEXT: [[TMP95:%.*]] = add i16 [[OFFSET_IDX]], -15 +; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP95]] +; IC4VF4-NEXT: [[TMP97:%.*]] = load half, ptr [[TMP96]], align 1 +; IC4VF4-NEXT: [[TMP98:%.*]] = insertelement <4 x half> [[TMP93]], half [[TMP97]], i32 3 +; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE44]] +; IC4VF4: [[PRED_LOAD_CONTINUE44]]: +; IC4VF4-NEXT: [[TMP99:%.*]] = phi <4 x half> [ [[TMP93]], %[[PRED_LOAD_CONTINUE42]] ], [ [[TMP98]], %[[PRED_LOAD_IF43]] ] +; IC4VF4-NEXT: [[TMP100:%.*]] = fcmp ugt <4 x half> [[TMP27]], [[BROADCAST_SPLAT]] +; IC4VF4-NEXT: [[TMP101:%.*]] = fcmp ugt <4 x half> [[TMP51]], [[BROADCAST_SPLAT]] +; IC4VF4-NEXT: [[TMP102:%.*]] = fcmp ugt <4 x half> [[TMP75]], [[BROADCAST_SPLAT]] +; IC4VF4-NEXT: [[TMP103:%.*]] = fcmp ugt <4 x half> [[TMP99]], [[BROADCAST_SPLAT]] +; IC4VF4-NEXT: [[TMP104:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1) +; IC4VF4-NEXT: [[TMP105:%.*]] = add nsw <4 x i16> [[STEP_ADD]], splat (i16 -1) +; IC4VF4-NEXT: [[TMP106:%.*]] = add nsw <4 x i16> [[STEP_ADD_2]], splat (i16 -1) +; IC4VF4-NEXT: [[TMP107:%.*]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -1) +; IC4VF4-NEXT: [[TMP108]] = select <4 x i1> [[TMP100]], <4 x i16> [[TMP104]], <4 x i16> [[VEC_PHI]] +; IC4VF4-NEXT: [[TMP109]] = select <4 x i1> [[TMP101]], <4 x i16> [[TMP105]], <4 x i16> [[VEC_PHI1]] +; IC4VF4-NEXT: [[TMP110]] = select <4 x i1> [[TMP102]], <4 x i16> [[TMP106]], <4 x i16> [[VEC_PHI2]] +; IC4VF4-NEXT: [[TMP111]] = select <4 x i1> [[TMP103]], <4 x i16> [[TMP107]], <4 x i16> [[VEC_PHI3]] +; IC4VF4-NEXT: [[TMP112:%.*]] = select <4 x i1> [[TMP0]], <4 x i16> [[TMP108]], <4 x i16> [[VEC_PHI]] +; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]] +; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]] +; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]] +; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 +; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4) +; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IC4VF4: [[MIDDLE_BLOCK]]: +; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]]) +; IC4VF4-NEXT: [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]]) +; IC4VF4-NEXT: [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX45]], <4 x i16> [[TMP115]]) +; IC4VF4-NEXT: [[TMP116:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[RDX_MINMAX46]]) +; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP116]], 32767 +; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP116]], i16 0 +; IC4VF4-NEXT: br label %[[EXIT:.*]] +; IC4VF4: [[SCALAR_PH]]: +; IC4VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 12, %[[ENTRY]] ] +; IC4VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ] +; IC4VF4-NEXT: br label %[[LOOP:.*]] +; IC4VF4: [[LOOP]]: +; IC4VF4-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC4VF4-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC4VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] +; IC4VF4-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1 +; IC4VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]] +; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1 +; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]] +; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0 +; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; IC4VF4: [[EXIT]]: +; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; IC4VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] +; +; IC4VF1-LABEL: define i16 @select_decreasing_induction_icmp_table_half( +; IC4VF1-SAME: half noundef [[VAL:%.*]]) { +; IC4VF1-NEXT: [[ENTRY:.*]]: +; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IC4VF1: [[VECTOR_PH]]: +; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]] +; IC4VF1: [[VECTOR_BODY]]: +; IC4VF1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] +; IC4VF1-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 +; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] +; IC4VF1-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], -1 +; IC4VF1-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], -2 +; IC4VF1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], -3 +; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]] +; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP0]] +; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP1]] +; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP2]] +; IC4VF1-NEXT: [[TMP7:%.*]] = load half, ptr [[TMP3]], align 1 +; IC4VF1-NEXT: [[TMP8:%.*]] = load half, ptr [[TMP4]], align 1 +; IC4VF1-NEXT: [[TMP9:%.*]] = load half, ptr [[TMP5]], align 1 +; IC4VF1-NEXT: [[TMP10:%.*]] = load half, ptr [[TMP6]], align 1 +; IC4VF1-NEXT: [[TMP11:%.*]] = fcmp ugt half [[TMP7]], [[VAL]] +; IC4VF1-NEXT: [[TMP12:%.*]] = fcmp ugt half [[TMP8]], [[VAL]] +; IC4VF1-NEXT: [[TMP13:%.*]] = fcmp ugt half [[TMP9]], [[VAL]] +; IC4VF1-NEXT: [[TMP14:%.*]] = fcmp ugt half [[TMP10]], [[VAL]] +; IC4VF1-NEXT: [[TMP15:%.*]] = add nsw i16 [[OFFSET_IDX]], -1 +; IC4VF1-NEXT: [[TMP16:%.*]] = add nsw i16 [[TMP0]], -1 +; IC4VF1-NEXT: [[TMP17:%.*]] = add nsw i16 [[TMP1]], -1 +; IC4VF1-NEXT: [[TMP18:%.*]] = add nsw i16 [[TMP2]], -1 +; IC4VF1-NEXT: [[TMP19]] = select i1 [[TMP11]], i16 [[TMP15]], i16 [[VEC_PHI]] +; IC4VF1-NEXT: [[TMP20]] = select i1 [[TMP12]], i16 [[TMP16]], i16 [[VEC_PHI1]] +; IC4VF1-NEXT: [[TMP21]] = select i1 [[TMP13]], i16 [[TMP17]], i16 [[VEC_PHI2]] +; IC4VF1-NEXT: [[TMP22]] = select i1 [[TMP14]], i16 [[TMP18]], i16 [[VEC_PHI3]] +; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; IC4VF1-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 +; IC4VF1-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IC4VF1: [[MIDDLE_BLOCK]]: +; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP19]], i16 [[TMP20]]) +; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX]], i16 [[TMP21]]) +; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX4]], i16 [[TMP22]]) +; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[RDX_MINMAX5]], 32767 +; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[RDX_MINMAX5]], i16 0 +; IC4VF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; IC4VF1: [[SCALAR_PH]]: +; IC4VF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ] +; IC4VF1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; IC4VF1-NEXT: br label %[[LOOP:.*]] +; IC4VF1: [[LOOP]]: +; IC4VF1-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] +; IC4VF1-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1 +; IC4VF1-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]] +; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1 +; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]] +; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0 +; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; IC4VF1: [[EXIT]]: +; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; IC4VF1-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] ; entry: br label %loop @@ -141,6 +962,66 @@ define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(ptr ; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ] ; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; +; IC1VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start( +; IC1VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) { +; IC1VF4-NEXT: [[ENTRY:.*]]: +; IC1VF4-NEXT: br label %[[LOOP:.*]] +; IC1VF4: [[LOOP]]: +; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ] +; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ] +; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]] +; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 +; IC1VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]] +; IC1VF4-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8 +; IC1VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]] +; IC1VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]] +; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1 +; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]] +; IC1VF4: [[EXIT]]: +; IC1VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ] +; IC1VF4-NEXT: ret i64 [[COND_LCSSA]] +; +; IC4VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start( +; IC4VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) { +; IC4VF4-NEXT: [[ENTRY:.*]]: +; IC4VF4-NEXT: br label %[[LOOP:.*]] +; IC4VF4: [[LOOP]]: +; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ] +; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ] +; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]] +; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 +; IC4VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]] +; IC4VF4-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8 +; IC4VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]] +; IC4VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]] +; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1 +; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]] +; IC4VF4: [[EXIT]]: +; IC4VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ] +; IC4VF4-NEXT: ret i64 [[COND_LCSSA]] +; +; IC4VF1-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start( +; IC4VF1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) { +; IC4VF1-NEXT: [[ENTRY:.*]]: +; IC4VF1-NEXT: br label %[[LOOP:.*]] +; IC4VF1: [[LOOP]]: +; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ] +; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ] +; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]] +; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 +; IC4VF1-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]] +; IC4VF1-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8 +; IC4VF1-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]] +; IC4VF1-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]] +; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1 +; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]] +; IC4VF1: [[EXIT]]: +; IC4VF1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ] +; IC4VF1-NEXT: ret i64 [[COND_LCSSA]] +; entry: br label %loop @@ -164,23 +1045,59 @@ exit: ; preds = %loop ; The sentinel value for decreasing-IV vectorization is LONG_MAX, and since ; the IV hits this value, it is impossible to vectorize this case. define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(ptr %a) { -; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound( -; CHECK-SAME: ptr [[A:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 -; CHECK-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 -; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 -; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ] -; CHECK-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; IC1VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound( +; IC1VF4-SAME: ptr [[A:%.*]]) { +; IC1VF4-NEXT: [[ENTRY:.*]]: +; IC1VF4-NEXT: br label %[[LOOP:.*]] +; IC1VF4: [[LOOP]]: +; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 +; IC1VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 +; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] +; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 +; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] +; IC1VF4: [[EXIT]]: +; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ] +; IC1VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; +; IC4VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound( +; IC4VF4-SAME: ptr [[A:%.*]]) { +; IC4VF4-NEXT: [[ENTRY:.*]]: +; IC4VF4-NEXT: br label %[[LOOP:.*]] +; IC4VF4: [[LOOP]]: +; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 +; IC4VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 +; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] +; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 +; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] +; IC4VF4: [[EXIT]]: +; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ] +; IC4VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; +; IC4VF1-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound( +; IC4VF1-SAME: ptr [[A:%.*]]) { +; IC4VF1-NEXT: [[ENTRY:.*]]: +; IC4VF1-NEXT: br label %[[LOOP:.*]] +; IC4VF1: [[LOOP]]: +; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 +; IC4VF1-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 +; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] +; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 +; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] +; IC4VF1: [[EXIT]]: +; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ] +; IC4VF1-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] ; entry: br label %loop @@ -199,4 +1116,3 @@ loop: ; preds = %entry, %loop exit: ; preds = %loop ret i64 %spec.select } - diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll index 9428737814146..2e8109c18948e 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll @@ -238,7 +238,7 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = compute-find-last-iv-result ir<%rdx>, ir<%start>, ir<-9223372036854775808>, ir<%cond> +; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = compute-find-iv-result ir<%rdx>, ir<%start>, ir<-9223372036854775808>, ir<%cond> ; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<{{.+}}> ; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph From 3d8d79aad7efb3c505cf19048efaeec3a276ed97 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 27 Jun 2025 13:02:43 +0100 Subject: [PATCH 2/3] !fixup address comments, thanks --- llvm/include/llvm/Analysis/IVDescriptors.h | 2 +- llvm/lib/Analysis/IVDescriptors.cpp | 6 +- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 5 +- .../LoopVectorize/iv-select-cmp-decreasing.ll | 136 +++--------------- 4 files changed, 25 insertions(+), 124 deletions(-) diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 310d05b6f04af..3b92cbff28de4 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -294,7 +294,7 @@ class RecurrenceDescriptor { /// actual type of the Phi if the recurrence has been type-promoted. Type *getRecurrenceType() const { return RecurrenceType; } - /// Returns the sentinel value for FindFirstIV &FindLastIV recurrences to + /// Returns the sentinel value for FindFirstIV & FindLastIV recurrences to /// replace the start value. Value *getSentinelValue() const { Type *Ty = StartValue->getType(); diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index b49258e3b54ef..523f3694559e6 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -726,7 +726,6 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop, // The maximum acceptable range for the increasing induction variable, // called the valid range, will be defined as - const ConstantRange IVRange = SE.getSignedRange(AR); // Keep the minimum (FindLast) or maximum (FindFirst) value of the // recurrence type as the sentinel value. The maximum acceptable range for // the induction variable, called the valid range, will be defined as @@ -746,9 +745,8 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop, ValidRange = ConstantRange::getNonEmpty(Sentinel + 1, Sentinel); } else { assert(isFindFirstIVRecurrenceKind(Kind) && - "Kind must either be a FindLastIV or FindFirstIV"); - assert(IsSigned && - "only FindFirstIV with SMax is supported at the moment"); + "Kind must either be FindLastIV or FindFirstIV"); + assert(IsSigned && "Only FindFirstIV with SMax is supported currently"); ValidRange = ConstantRange::getNonEmpty(APInt::getSignedMinValue(NumBits), APInt::getSignedMaxValue(NumBits) - 1); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 73d82319e13ec..472b5700bd358 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -747,9 +747,8 @@ Value *VPInstruction::generate(VPTransformState &State) { MinMaxKind = IsSigned ? RecurKind::SMax : RecurKind::UMax; } else { assert(RecurrenceDescriptor::isFindFirstIVRecurrenceKind(RK) && - "Kind must either be a FindLastIV or FindFirstIV"); - assert(IsSigned && - "only FindFirstIV with SMax is supported at the moment"); + "Kind must either be FindLastIV or FindFirstIV"); + assert(IsSigned && "Only FindFirstIV with SMax is currently supported"); MinMaxKind = RecurKind::SMin; } for (unsigned Part = 1; Part < UF; ++Part) diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index ee0a64397e2ac..d224da795997d 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 -; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck --check-prefix=IC1VF4 %s -; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck --check-prefix=IC4VF4 %s -; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck --check-prefix=IC4VF1 %s +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck --check-prefixes=CHECK,IC1VF4 %s +; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck --check-prefixes=CHECK,IC4VF4 %s +; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck --check-prefixes=CHECK,IC4VF1 %s define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start( @@ -962,66 +962,6 @@ define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(ptr ; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ] ; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; -; IC1VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start( -; IC1VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) { -; IC1VF4-NEXT: [[ENTRY:.*]]: -; IC1VF4-NEXT: br label %[[LOOP:.*]] -; IC1VF4: [[LOOP]]: -; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ] -; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ] -; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]] -; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 -; IC1VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]] -; IC1VF4-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8 -; IC1VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]] -; IC1VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]] -; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1 -; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]] -; IC1VF4: [[EXIT]]: -; IC1VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ] -; IC1VF4-NEXT: ret i64 [[COND_LCSSA]] -; -; IC4VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start( -; IC4VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) { -; IC4VF4-NEXT: [[ENTRY:.*]]: -; IC4VF4-NEXT: br label %[[LOOP:.*]] -; IC4VF4: [[LOOP]]: -; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ] -; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ] -; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]] -; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 -; IC4VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]] -; IC4VF4-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8 -; IC4VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]] -; IC4VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]] -; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1 -; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]] -; IC4VF4: [[EXIT]]: -; IC4VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ] -; IC4VF4-NEXT: ret i64 [[COND_LCSSA]] -; -; IC4VF1-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start( -; IC4VF1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) { -; IC4VF1-NEXT: [[ENTRY:.*]]: -; IC4VF1-NEXT: br label %[[LOOP:.*]] -; IC4VF1: [[LOOP]]: -; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ] -; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ] -; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]] -; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 -; IC4VF1-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]] -; IC4VF1-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8 -; IC4VF1-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]] -; IC4VF1-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]] -; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1 -; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]] -; IC4VF1: [[EXIT]]: -; IC4VF1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ] -; IC4VF1-NEXT: ret i64 [[COND_LCSSA]] -; entry: br label %loop @@ -1045,59 +985,23 @@ exit: ; preds = %loop ; The sentinel value for decreasing-IV vectorization is LONG_MAX, and since ; the IV hits this value, it is impossible to vectorize this case. define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(ptr %a) { -; IC1VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound( -; IC1VF4-SAME: ptr [[A:%.*]]) { -; IC1VF4-NEXT: [[ENTRY:.*]]: -; IC1VF4-NEXT: br label %[[LOOP:.*]] -; IC1VF4: [[LOOP]]: -; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 -; IC1VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 -; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] -; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 -; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] -; IC1VF4: [[EXIT]]: -; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ] -; IC1VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] -; -; IC4VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound( -; IC4VF4-SAME: ptr [[A:%.*]]) { -; IC4VF4-NEXT: [[ENTRY:.*]]: -; IC4VF4-NEXT: br label %[[LOOP:.*]] -; IC4VF4: [[LOOP]]: -; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 -; IC4VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 -; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] -; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 -; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] -; IC4VF4: [[EXIT]]: -; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ] -; IC4VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] -; -; IC4VF1-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound( -; IC4VF1-SAME: ptr [[A:%.*]]) { -; IC4VF1-NEXT: [[ENTRY:.*]]: -; IC4VF1-NEXT: br label %[[LOOP:.*]] -; IC4VF1: [[LOOP]]: -; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 -; IC4VF1-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 -; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] -; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 -; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] -; IC4VF1: [[EXIT]]: -; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ] -; IC4VF1-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 +; CHECK-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] ; entry: br label %loop From a87ebd6e292976eb21c9389d15e059a82ba55734 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 28 Jun 2025 22:38:15 +0100 Subject: [PATCH 3/3] !fixup address latest comments, thanks! --- llvm/lib/Analysis/IVDescriptors.cpp | 9 ++------- llvm/lib/Transforms/Utils/LoopUtils.cpp | 13 ++++++------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 523f3694559e6..b275b1064cef2 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -715,11 +715,8 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop, return std::nullopt; const SCEV *Step = AR->getStepRecurrence(SE); - - if (isFindFirstIVRecurrenceKind(Kind)) { - if (!SE.isKnownNegative(Step)) - return std::nullopt; - } else if (!SE.isKnownPositive(Step)) + if ((isFindFirstIVRecurrenceKind(Kind) && !SE.isKnownNegative(Step)) || + (isFindLastIVRecurrenceKind(Kind) && !SE.isKnownPositive(Step))) return std::nullopt; // Keep the minimum value of the recurrence type as the sentinel value. @@ -744,8 +741,6 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop, : APInt::getMinValue(NumBits); ValidRange = ConstantRange::getNonEmpty(Sentinel + 1, Sentinel); } else { - assert(isFindFirstIVRecurrenceKind(Kind) && - "Kind must either be FindLastIV or FindFirstIV"); assert(IsSigned && "Only FindFirstIV with SMax is supported currently"); ValidRange = ConstantRange::getNonEmpty(APInt::getSignedMinValue(NumBits), diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index ac27ccf409d6b..e44fa6af29ffb 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1227,12 +1227,11 @@ Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src, RecurKind RdxKind, Value *Start, Value *Sentinel) { bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(RdxKind); - Value *MaxRdx = - Src->getType()->isVectorTy() - ? (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RdxKind) - ? Builder.CreateIntMaxReduce(Src, IsSigned) - : Builder.CreateIntMinReduce(Src, IsSigned)) - : Src; + bool IsMaxRdx = RecurrenceDescriptor::isFindLastIVRecurrenceKind(RdxKind); + Value *MaxRdx = Src->getType()->isVectorTy() + ? (IsMaxRdx ? Builder.CreateIntMaxReduce(Src, IsSigned) + : Builder.CreateIntMinReduce(Src, IsSigned)) + : Src; // Correct the final reduction result back to the start value if the maximum // reduction is sentinel value. Value *Cmp = @@ -1328,7 +1327,7 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src, RecurKind Kind, Value *Mask, Value *EVL) { assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) && !RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) && - "AnyOf, FindFirstIV and FindLastIV reductions are not supported."); + "AnyOf and FindIV reductions are not supported."); Intrinsic::ID Id = getReductionIntrinsicID(Kind); auto VPID = VPIntrinsic::getForIntrinsic(Id); assert(VPReductionIntrinsic::isVPReduction(VPID) &&