diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 3b627a5140854..463249461483f 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -54,9 +54,12 @@ enum class RecurKind { FMulAdd, ///< Sum of float products with llvm.fmuladd(a * b + sum). AnyOf, ///< AnyOf reduction with select(cmp(),x,y) where one of (x,y) is ///< loop invariant, and both x and y are integer type. - FindLastIV, ///< FindLast reduction with select(cmp(),x,y) where one of - ///< (x,y) is increasing loop induction, and both x and y are - ///< integer type. + FindLastIVSMax, ///< FindLast reduction with select(cmp(),x,y) where one of + ///< (x,y) is increasing loop induction, and both x and y + ///< are integer type, producing a SMax reduction. + FindLastIVUMax, ///< FindLast reduction with select(cmp(),x,y) where one of + ///< (x,y) is increasing loop induction, and both x and y + ///< are integer type, producing a UMax reduction. // clang-format on // TODO: Any_of and FindLast reduction need not be restricted to integer type // only. @@ -259,7 +262,14 @@ class RecurrenceDescriptor { /// Returns true if the recurrence kind is of the form /// select(cmp(),x,y) where one of (x,y) is increasing loop induction. static bool isFindLastIVRecurrenceKind(RecurKind Kind) { - return Kind == RecurKind::FindLastIV; + return Kind == RecurKind::FindLastIVSMax || + Kind == RecurKind::FindLastIVUMax; + } + + /// Returns true if recurrece kind is a signed redux kind. + static bool isSignedRecurrenceKind(RecurKind Kind) { + return Kind == RecurKind::SMax || Kind == RecurKind::SMin || + Kind == RecurKind::FindLastIVSMax; } /// Returns the type of the recurrence. This type can be narrower than the @@ -271,8 +281,10 @@ class RecurrenceDescriptor { Value *getSentinelValue() const { assert(isFindLastIVRecurrenceKind(Kind) && "Unexpected recurrence kind"); Type *Ty = StartValue->getType(); - return ConstantInt::get(Ty, - APInt::getSignedMinValue(Ty->getIntegerBitWidth())); + unsigned BW = Ty->getIntegerBitWidth(); + return ConstantInt::get(Ty, isSignedRecurrenceKind(Kind) + ? APInt::getSignedMinValue(BW) + : APInt::getMinValue(BW)); } /// Returns a reference to the instructions used for type-promoting the diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 12be3bad04d38..e4d2f9d191707 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -434,7 +434,8 @@ LLVM_ABI Value *createAnyOfReduction(IRBuilderBase &B, Value *Src, /// Create a reduction of the given vector \p Src for a reduction of the /// kind RecurKind::FindLastIV. LLVM_ABI Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src, - Value *Start, Value *Sentinel); + RecurKind RdxKind, Value *Start, + Value *Sentinel); /// Create an ordered reduction intrinsic using the given recurrence /// kind \p RdxKind. diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 7232283b9101b..c8e97e5ec0e58 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -50,7 +50,8 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) { case RecurKind::UMax: case RecurKind::UMin: case RecurKind::AnyOf: - case RecurKind::FindLastIV: + case RecurKind::FindLastIVSMax: + case RecurKind::FindLastIVUMax: return true; } return false; @@ -700,47 +701,59 @@ RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi, m_Value(NonRdxPhi))))) return InstDesc(false, I); - auto IsIncreasingLoopInduction = [&](Value *V) { + // Returns a non-nullopt boolean indicating the signedness of the recurrence + // when a valid FindLastIV pattern is found. + auto GetRecurKind = [&](Value *V) -> std::optional { Type *Ty = V->getType(); if (!SE.isSCEVable(Ty)) - return false; + return std::nullopt; auto *AR = dyn_cast(SE.getSCEV(V)); if (!AR || AR->getLoop() != TheLoop) - return false; + return std::nullopt; const SCEV *Step = AR->getStepRecurrence(SE); if (!SE.isKnownPositive(Step)) - return false; + return std::nullopt; - const ConstantRange IVRange = SE.getSignedRange(AR); - unsigned NumBits = Ty->getIntegerBitWidth(); // Keep the minimum value of the recurrence type as the sentinel value. // The maximum acceptable range for the increasing induction variable, // called the valid range, will be defined as // [ + 1, ) - // where is SignedMin() + // where is [Signed|Unsigned]Min() // TODO: This range restriction can be lifted by adding an additional // virtual OR reduction. - const APInt Sentinel = APInt::getSignedMinValue(NumBits); - const ConstantRange ValidRange = - ConstantRange::getNonEmpty(Sentinel + 1, Sentinel); - LLVM_DEBUG(dbgs() << "LV: FindLastIV valid range is " << ValidRange - << ", and the signed range of " << *AR << " is " - << IVRange << "\n"); - // Ensure the induction variable does not wrap around by verifying that its - // range is fully contained within the valid range. - return ValidRange.contains(IVRange); + auto CheckRange = [&](bool IsSigned) { + const ConstantRange IVRange = + IsSigned ? SE.getSignedRange(AR) : SE.getUnsignedRange(AR); + unsigned NumBits = Ty->getIntegerBitWidth(); + const APInt Sentinel = IsSigned ? APInt::getSignedMinValue(NumBits) + : APInt::getMinValue(NumBits); + const ConstantRange ValidRange = + ConstantRange::getNonEmpty(Sentinel + 1, Sentinel); + LLVM_DEBUG(dbgs() << "LV: FindLastIV valid range is " << ValidRange + << ", and the range of " << *AR << " is " << IVRange + << "\n"); + + // Ensure the induction variable does not wrap around by verifying that + // its range is fully contained within the valid range. + return ValidRange.contains(IVRange); + }; + if (CheckRange(true)) + return RecurKind::FindLastIVSMax; + if (CheckRange(false)) + return RecurKind::FindLastIVUMax; + return std::nullopt; }; // We are looking for selects of the form: // select(cmp(), phi, increasing_loop_induction) or // select(cmp(), increasing_loop_induction, phi) // TODO: Support for monotonically decreasing induction variable - if (!IsIncreasingLoopInduction(NonRdxPhi)) - return InstDesc(false, I); + if (auto RK = GetRecurKind(NonRdxPhi)) + return InstDesc(I, *RK); - return InstDesc(I, RecurKind::FindLastIV); + return InstDesc(false, I); } RecurrenceDescriptor::InstDesc @@ -985,8 +998,8 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, << "\n"); return true; } - if (AddReductionVar(Phi, RecurKind::FindLastIV, TheLoop, FMF, RedDes, DB, AC, - DT, SE)) { + if (AddReductionVar(Phi, RecurKind::FindLastIVSMax, TheLoop, FMF, RedDes, DB, + AC, DT, SE)) { LLVM_DEBUG(dbgs() << "Found a FindLastIV reduction PHI." << *Phi << "\n"); return true; } @@ -1137,7 +1150,8 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { case RecurKind::Mul: return Instruction::Mul; case RecurKind::AnyOf: - case RecurKind::FindLastIV: + case RecurKind::FindLastIVSMax: + case RecurKind::FindLastIVUMax: case RecurKind::Or: return Instruction::Or; case RecurKind::And: diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index cf6b183c78ac3..c50bb4a497c6a 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1224,9 +1224,11 @@ Value *llvm::createAnyOfReduction(IRBuilderBase &Builder, Value *Src, } Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src, - Value *Start, Value *Sentinel) { + RecurKind RdxKind, Value *Start, + Value *Sentinel) { + bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(RdxKind); Value *MaxRdx = Src->getType()->isVectorTy() - ? Builder.CreateIntMaxReduce(Src, true) + ? Builder.CreateIntMaxReduce(Src, IsSigned) : Src; // Correct the final reduction result back to the start value if the maximum // reduction is sentinel value. diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4551a365a6967..106dba1bc6195 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -23171,7 +23171,8 @@ class HorizontalReduction { case RecurKind::FMul: case RecurKind::FMulAdd: case RecurKind::AnyOf: - case RecurKind::FindLastIV: + case RecurKind::FindLastIVSMax: + case RecurKind::FindLastIVUMax: case RecurKind::FMaximumNum: case RecurKind::FMinimumNum: case RecurKind::None: @@ -23305,7 +23306,8 @@ class HorizontalReduction { case RecurKind::FMul: case RecurKind::FMulAdd: case RecurKind::AnyOf: - case RecurKind::FindLastIV: + case RecurKind::FindLastIVSMax: + case RecurKind::FindLastIVUMax: case RecurKind::FMaximumNum: case RecurKind::FMinimumNum: case RecurKind::None: @@ -23404,7 +23406,8 @@ class HorizontalReduction { case RecurKind::FMul: case RecurKind::FMulAdd: case RecurKind::AnyOf: - case RecurKind::FindLastIV: + case RecurKind::FindLastIVSMax: + case RecurKind::FindLastIVUMax: case RecurKind::FMaximumNum: case RecurKind::FMinimumNum: case RecurKind::None: diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f3b5c8cfa9885..da9c2228761e3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -642,7 +642,7 @@ Value *VPInstruction::generate(VPTransformState &State) { auto *PhiR = cast(getOperand(0)); // Get its reduction variable descriptor. const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - [[maybe_unused]] RecurKind RK = RdxDesc.getRecurrenceKind(); + RecurKind RK = RdxDesc.getRecurrenceKind(); assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) && "Unexpected reduction kind"); assert(!PhiR->isInLoop() && @@ -652,14 +652,17 @@ Value *VPInstruction::generate(VPTransformState &State) { // sentinel value, followed by one operand for each part of the reduction. unsigned UF = getNumOperands() - 3; Value *ReducedPartRdx = State.get(getOperand(3)); - for (unsigned Part = 1; Part < UF; ++Part) { - ReducedPartRdx = createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, + RecurKind MinMaxKind = RecurrenceDescriptor::isSignedRecurrenceKind(RK) + ? RecurKind::SMax + : RecurKind::UMax; + for (unsigned Part = 1; Part < UF; ++Part) + ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx, State.get(getOperand(3 + Part))); - } Value *Start = State.get(getOperand(1), true); Value *Sentinel = getOperand(2)->getLiveInIRValue(); - return createFindLastIVReduction(Builder, ReducedPartRdx, Start, Sentinel); + return createFindLastIVReduction(Builder, ReducedPartRdx, RK, Start, + Sentinel); } case VPInstruction::ComputeReductionResult: { // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll index e4597ebfe7dc8..6a2e3df50e699 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4 @@ -623,6 +623,207 @@ exit: ; preds = %for.body ret i32 %spec.select } +; The construct that are introduced by IndVarSimplify is: +; %1 = trunc i64 %iv to i32 +; The loop exit condition is a constant that overflows signed i32, +; but not unsigned i32: +; %exitcond.not = icmp eq i64 %inc, 4294967294 +; Hence, we can vectorize with the unsigned variant of FindLastIV. +define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { +; CHECK-VF4IC1-LABEL: define i32 @select_icmp_truncated_unsigned_iv_range( +; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) { +; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC1: [[VECTOR_PH]]: +; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC1: [[VECTOR_BODY]]: +; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[IV:%.*]] = add i64 2147483646, [[INDEX]] +; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 0 +; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-VF4IC1-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] +; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 +; CHECK-VF4IC1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]]) +; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP5]], 0 +; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP5]], i32 331 +; CHECK-VF4IC1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC1: [[SCALAR_PH]]: +; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4294967294, %[[MIDDLE_BLOCK]] ], [ 2147483646, %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] +; CHECK-VF4IC1: [[FOR_BODY]]: +; CHECK-VF4IC1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV1]] +; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3 +; CHECK-VF4IC1-NEXT: [[CONV:%.*]] = trunc i64 [[IV1]] to i32 +; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]] +; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294 +; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-VF4IC1: [[EXIT]]: +; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; +; CHECK-VF4IC4-LABEL: define i32 @select_icmp_truncated_unsigned_iv_range( +; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) { +; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC4: [[VECTOR_PH]]: +; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC4: [[VECTOR_BODY]]: +; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) +; CHECK-VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 2147483646, [[INDEX]] +; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] +; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 12 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD4]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD5]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD6]], splat (i32 3) +; CHECK-VF4IC4-NEXT: [[TMP9]] = select <4 x i1> [[TMP5]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] +; CHECK-VF4IC4-NEXT: [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]] +; CHECK-VF4IC4-NEXT: [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] +; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] +; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) +; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 +; CHECK-VF4IC4-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]]) +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX7:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP11]]) +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[RDX_MINMAX7]], <4 x i32> [[TMP12]]) +; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[RDX_MINMAX8]]) +; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP14]], 0 +; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP14]], i32 331 +; CHECK-VF4IC4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC4: [[SCALAR_PH]]: +; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4294967294, %[[MIDDLE_BLOCK]] ], [ 2147483646, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] +; CHECK-VF4IC4: [[FOR_BODY]]: +; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 3 +; CHECK-VF4IC4-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]] +; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294 +; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-VF4IC4: [[EXIT]]: +; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; +; CHECK-VF1IC4-LABEL: define i32 @select_icmp_truncated_unsigned_iv_range( +; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) { +; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF1IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF1IC4: [[VECTOR_PH]]: +; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF1IC4: [[VECTOR_BODY]]: +; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 2147483646, [[INDEX]] +; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 1 +; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-VF1IC4-NEXT: [[OFFSET_IDX4:%.*]] = add i64 2147483646, [[INDEX]] +; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = trunc i64 [[OFFSET_IDX4]] to i32 +; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 1 +; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i32 [[TMP3]], 2 +; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = add i32 [[TMP3]], 3 +; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] +; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]] +; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] +; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]] +; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP11]], 3 +; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP12]], 3 +; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP13]], 3 +; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP14]], 3 +; CHECK-VF1IC4-NEXT: [[TMP19]] = select i1 [[TMP15]], i32 [[TMP3]], i32 [[VEC_PHI]] +; CHECK-VF1IC4-NEXT: [[TMP20]] = select i1 [[TMP16]], i32 [[TMP4]], i32 [[VEC_PHI1]] +; CHECK-VF1IC4-NEXT: [[TMP21]] = select i1 [[TMP17]], i32 [[TMP5]], i32 [[VEC_PHI2]] +; CHECK-VF1IC4-NEXT: [[TMP22]] = select i1 [[TMP18]], i32 [[TMP6]], i32 [[VEC_PHI3]] +; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF1IC4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 +; CHECK-VF1IC4-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP19]], i32 [[TMP20]]) +; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i32 @llvm.umax.i32(i32 [[RDX_MINMAX]], i32 [[TMP21]]) +; CHECK-VF1IC4-NEXT: [[RDX_MINMAX6:%.*]] = call i32 @llvm.umax.i32(i32 [[RDX_MINMAX5]], i32 [[TMP22]]) +; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX6]], 0 +; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX6]], i32 331 +; CHECK-VF1IC4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF1IC4: [[SCALAR_PH]]: +; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4294967294, %[[MIDDLE_BLOCK]] ], [ 2147483646, %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] +; CHECK-VF1IC4: [[FOR_BODY]]: +; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 3 +; CHECK-VF1IC4-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]] +; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294 +; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-VF1IC4: [[EXIT]]: +; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF1IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i64 [ 2147483646, %entry ], [ %inc, %for.body ] + %rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp = icmp sgt i32 %0, 3 + %conv = trunc i64 %iv to i32 + %spec.select = select i1 %cmp, i32 %conv, i32 %rdx + %inc = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %inc, 4294967294 + br i1 %exitcond.not, label %exit, label %for.body + +exit: ; preds = %for.body + ret i32 %spec.select +} + ; Negative tests ; This test can theoretically be vectorized, but only with a runtime-check. @@ -844,7 +1045,7 @@ exit: ; preds = %for.body, %entry ; The construct that are introduced by IndVarSimplify is: ; %1 = trunc i64 %iv to i32 ; However, the loop exit condition is a constant that overflows i32: -; %exitcond.not = icmp eq i64 %inc, 4294967294 +; %exitcond.not = icmp eq i64 %inc, 9223372036854775806 ; Hence, the i32 will most certainly wrap and hit the sentinel value, and we ; cannot vectorize this case. define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) { @@ -853,7 +1054,7 @@ define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) { ; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF4IC1: [[FOR_BODY]]: -; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 4294967294, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] ; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] ; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 @@ -861,7 +1062,7 @@ define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) { ; CHECK-VF4IC1-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32 ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]] ; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 -; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294 +; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 9223372036854775806 ; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] ; CHECK-VF4IC1: [[EXIT]]: ; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] @@ -872,7 +1073,7 @@ define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) { ; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF4IC4: [[FOR_BODY]]: -; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 4294967294, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 @@ -880,7 +1081,7 @@ define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) { ; CHECK-VF4IC4-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32 ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]] ; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 -; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294 +; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 9223372036854775806 ; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] ; CHECK-VF4IC4: [[EXIT]]: ; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] @@ -891,7 +1092,7 @@ define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) { ; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF1IC4: [[FOR_BODY]]: -; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 4294967294, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] ; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] ; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 @@ -899,7 +1100,7 @@ define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) { ; CHECK-VF1IC4-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32 ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]] ; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 -; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294 +; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 9223372036854775806 ; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] ; CHECK-VF1IC4: [[EXIT]]: ; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] @@ -909,7 +1110,7 @@ entry: br label %for.body for.body: ; preds = %entry, %for.body - %iv = phi i64 [ 2147483646, %entry ], [ %inc, %for.body ] + %iv = phi i64 [ 4294967294, %entry ], [ %inc, %for.body ] %rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ] %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv %0 = load i32, ptr %arrayidx, align 4 @@ -917,7 +1118,7 @@ for.body: ; preds = %entry, %for.body %conv = trunc i64 %iv to i32 %spec.select = select i1 %cmp, i32 %conv, i32 %rdx %inc = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %inc, 4294967294 + %exitcond.not = icmp eq i64 %inc, 9223372036854775806 br i1 %exitcond.not, label %exit, label %for.body exit: ; preds = %for.body @@ -1210,31 +1411,3 @@ exit: ; preds = %for.body, %entry %rdx.0.lcssa = phi i16 [ %start, %entry ], [ %cond, %for.body ] ret i16 %rdx.0.lcssa } -;. -; CHECK-VF4IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK-VF4IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK-VF4IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-VF4IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; CHECK-VF4IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK-VF4IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} -; CHECK-VF4IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; CHECK-VF4IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} -;. -; CHECK-VF4IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK-VF4IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK-VF4IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-VF4IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; CHECK-VF4IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK-VF4IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} -; CHECK-VF4IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; CHECK-VF4IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} -;. -; CHECK-VF1IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK-VF1IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK-VF1IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-VF1IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} -; CHECK-VF1IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK-VF1IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} -; CHECK-VF1IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; CHECK-VF1IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll index 07c720e9dcb17..eab5d5ea9b1f7 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4 @@ -1665,6 +1665,232 @@ exit: ; preds = %for.body ret i64 %cond } +define i64 @select_icmp_unsigned_iv_range(ptr %a, ptr %b, i64 %rdx.start) { +; CHECK-VF4IC1-LABEL: define i64 @select_icmp_unsigned_iv_range( +; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) { +; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC1: [[VECTOR_PH]]: +; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC1: [[VECTOR_BODY]]: +; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 +; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] +; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 +; CHECK-VF4IC1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 +; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] +; CHECK-VF4IC1-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9223372036854775804 +; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP5]]) +; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP7]], i64 [[RDX_START]] +; CHECK-VF4IC1-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC1: [[SCALAR_PH]]: +; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -4, %[[MIDDLE_BLOCK]] ], [ -9223372036854775808, %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 9223372036854775804, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ] +; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] +; CHECK-VF4IC1: [[FOR_BODY]]: +; CHECK-VF4IC1-NEXT: [[IV_J:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC3:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[IV_I:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]] +; CHECK-VF4IC1-NEXT: [[TMP9:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-VF4IC1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I]] +; CHECK-VF4IC1-NEXT: [[TMP10:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; CHECK-VF4IC1-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP9]], [[TMP10]] +; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV_J]], i64 [[RDX]] +; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV_I]], 1 +; CHECK-VF4IC1-NEXT: [[INC3]] = add nsw i64 [[IV_J]], 1 +; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 9223372036854775806 +; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-VF4IC1: [[EXIT]]: +; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC1-NEXT: ret i64 [[COND_LCSSA]] +; +; CHECK-VF4IC4-LABEL: define i64 @select_icmp_unsigned_iv_range( +; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) { +; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF4IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF4IC4: [[VECTOR_PH]]: +; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF4IC4: [[VECTOR_BODY]]: +; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 8 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 12 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] +; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 4 +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 8 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 12 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD7]] +; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD4]], [[WIDE_LOAD8]] +; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD5]], [[WIDE_LOAD9]] +; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD6]], [[WIDE_LOAD10]] +; CHECK-VF4IC4-NEXT: [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-VF4IC4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]] +; CHECK-VF4IC4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]] +; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]] +; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9223372036854775792 +; CHECK-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[TMP14]], <4 x i64> [[TMP15]]) +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP16]]) +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX12:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[RDX_MINMAX11]], <4 x i64> [[TMP17]]) +; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[RDX_MINMAX12]]) +; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP19]], 0 +; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP19]], i64 [[RDX_START]] +; CHECK-VF4IC4-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF4IC4: [[SCALAR_PH]]: +; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -16, %[[MIDDLE_BLOCK]] ], [ -9223372036854775808, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ 9223372036854775792, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ] +; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] +; CHECK-VF4IC4: [[FOR_BODY]]: +; CHECK-VF4IC4-NEXT: [[IV_J:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC3:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[IV_I:%.*]] = phi i64 [ [[BC_RESUME_VAL13]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]] +; CHECK-VF4IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-VF4IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I]] +; CHECK-VF4IC4-NEXT: [[TMP22:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; CHECK-VF4IC4-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP21]], [[TMP22]] +; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV_J]], i64 [[RDX]] +; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV_I]], 1 +; CHECK-VF4IC4-NEXT: [[INC3]] = add nsw i64 [[IV_J]], 1 +; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 9223372036854775806 +; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-VF4IC4: [[EXIT]]: +; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC4-NEXT: ret i64 [[COND_LCSSA]] +; +; CHECK-VF1IC4-LABEL: define i64 @select_icmp_unsigned_iv_range( +; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) { +; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-VF1IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-VF1IC4: [[VECTOR_PH]]: +; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-VF1IC4: [[VECTOR_BODY]]: +; CHECK-VF1IC4-NEXT: [[IV_I:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 -9223372036854775808, [[IV_I]] +; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 1 +; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[IV_I]], 1 +; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = add i64 [[IV_I]], 2 +; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i64 [[IV_I]], 3 +; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]] +; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] +; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] +; CHECK-VF1IC4-NEXT: [[TMP36:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-VF1IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I]] +; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] +; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] +; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP5]] +; CHECK-VF1IC4-NEXT: [[TMP37:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 +; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8 +; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP16]], align 8 +; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP17]], align 8 +; CHECK-VF1IC4-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP36]], [[TMP37]] +; CHECK-VF1IC4-NEXT: [[TMP23:%.*]] = icmp sgt i64 [[TMP11]], [[TMP19]] +; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp sgt i64 [[TMP12]], [[TMP20]] +; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = icmp sgt i64 [[TMP13]], [[TMP21]] +; CHECK-VF1IC4-NEXT: [[TMP26]] = select i1 [[CMP2]], i64 [[OFFSET_IDX]], i64 [[VEC_PHI]] +; CHECK-VF1IC4-NEXT: [[TMP27]] = select i1 [[TMP23]], i64 [[TMP0]], i64 [[VEC_PHI1]] +; CHECK-VF1IC4-NEXT: [[TMP28]] = select i1 [[TMP24]], i64 [[TMP1]], i64 [[VEC_PHI2]] +; CHECK-VF1IC4-NEXT: [[TMP29]] = select i1 [[TMP25]], i64 [[TMP2]], i64 [[VEC_PHI3]] +; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV_I]], 4 +; CHECK-VF1IC4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9223372036854775804 +; CHECK-VF1IC4-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP26]], i64 [[TMP27]]) +; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.umax.i64(i64 [[RDX_MINMAX]], i64 [[TMP28]]) +; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.umax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP29]]) +; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], 0 +; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[RDX_START]] +; CHECK-VF1IC4-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-VF1IC4: [[SCALAR_PH]]: +; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -4, %[[MIDDLE_BLOCK]] ], [ -9223372036854775808, %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ 9223372036854775804, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ] +; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] +; CHECK-VF1IC4: [[FOR_BODY]]: +; CHECK-VF1IC4-NEXT: [[IV_J:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC3:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[IV_I1:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I1]] +; CHECK-VF1IC4-NEXT: [[TMP31:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8 +; CHECK-VF1IC4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I1]] +; CHECK-VF1IC4-NEXT: [[TMP32:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8 +; CHECK-VF1IC4-NEXT: [[CMP3:%.*]] = icmp sgt i64 [[TMP31]], [[TMP32]] +; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP3]], i64 [[IV_J]], i64 [[RDX]] +; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV_I1]], 1 +; CHECK-VF1IC4-NEXT: [[INC3]] = add nsw i64 [[IV_J]], 1 +; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 9223372036854775806 +; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-VF1IC4: [[EXIT]]: +; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF1IC4-NEXT: ret i64 [[COND_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv.j = phi i64 [ 9223372036854775808, %entry], [ %inc3, %for.body ] + %iv.i = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %rdx = phi i64 [ %rdx.start, %entry ], [ %cond, %for.body ] + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv.i + %0 = load i64, ptr %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %iv.i + %1 = load i64, ptr %arrayidx1, align 8 + %cmp2 = icmp sgt i64 %0, %1 + %cond = select i1 %cmp2, i64 %iv.j, i64 %rdx + %inc = add nuw nsw i64 %iv.i, 1 + %inc3 = add nsw i64 %iv.j, 1 + %exitcond.not = icmp eq i64 %inc, 9223372036854775806 + br i1 %exitcond.not, label %exit, label %for.body + +exit: ; preds = %for.body + ret i64 %cond +} + ; Negative tests define float @not_vectorized_select_float_induction_icmp(ptr %a, ptr %b, float %rdx.start, i64 %n) { @@ -1927,61 +2153,3 @@ for.body: ; preds = %entry, %for.body exit: ; preds = %for.body ret i64 %cond } -;. -; CHECK-VF4IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK-VF4IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK-VF4IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-VF4IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; CHECK-VF4IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK-VF4IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} -; CHECK-VF4IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; CHECK-VF4IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} -; CHECK-VF4IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; CHECK-VF4IC1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} -; CHECK-VF4IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} -; CHECK-VF4IC1: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} -; CHECK-VF4IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} -; CHECK-VF4IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} -; CHECK-VF4IC1: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]} -; CHECK-VF4IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]} -; CHECK-VF4IC1: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]} -; CHECK-VF4IC1: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]} -;. -; CHECK-VF4IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK-VF4IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK-VF4IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-VF4IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; CHECK-VF4IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK-VF4IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} -; CHECK-VF4IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; CHECK-VF4IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} -; CHECK-VF4IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; CHECK-VF4IC4: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} -; CHECK-VF4IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} -; CHECK-VF4IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} -; CHECK-VF4IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} -; CHECK-VF4IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} -; CHECK-VF4IC4: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]} -; CHECK-VF4IC4: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]} -; CHECK-VF4IC4: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]} -; CHECK-VF4IC4: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]} -;. -; CHECK-VF1IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK-VF1IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK-VF1IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-VF1IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} -; CHECK-VF1IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK-VF1IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} -; CHECK-VF1IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; CHECK-VF1IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} -; CHECK-VF1IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; CHECK-VF1IC4: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]} -; CHECK-VF1IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} -; CHECK-VF1IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]} -; CHECK-VF1IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} -; CHECK-VF1IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]} -; CHECK-VF1IC4: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]} -; CHECK-VF1IC4: [[LOOP15]] = distinct !{[[LOOP15]], [[META1]]} -; CHECK-VF1IC4: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]} -; CHECK-VF1IC4: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]]} -;.