From 158f82a37dff347ee6e58ab3c81207d5add06185 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 5 Aug 2025 23:44:36 +0800 Subject: [PATCH 1/8] [VPlan] Replace EVL branch condition with (branch-on-count AVLNext, 0) This changes the branch condition to use the AVL's backedge value instead of the EVL-based IV. This allows us to emit bnez on RISC-V and removes a use of the trip count, which should reduce register pressure. To help match the AVL's backedge value I've added some new pattern matchers. For the m_Phi matcher it's variadic in the number of operands it accepts, so I had to add a new template argument to Recipe_match to relax the assertion that the number of operands must exactly match the template operand types. For m_Sub I've used it in a couple of other places that were also pattern matching on subs. Happy to split this out if reviewers prefer. Fixes #151459 --- .../Transforms/Vectorize/VPlanPatternMatch.h | 67 +++++++++++++---- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 3 +- .../Transforms/Vectorize/VPlanTransforms.cpp | 37 ++++++++-- .../Transforms/Vectorize/VPlanTransforms.h | 7 +- .../RISCV/evl-compatible-loops.ll | 4 +- .../LoopVectorize/RISCV/inloop-reduction.ll | 10 +-- .../RISCV/interleaved-masked-access.ll | 4 +- .../Transforms/LoopVectorize/RISCV/pr88802.ll | 4 +- .../LoopVectorize/RISCV/scalable-tailfold.ll | 14 ++-- .../RISCV/tail-folding-bin-unary-ops-args.ll | 72 +++++++++---------- .../RISCV/tail-folding-call-intrinsics.ll | 36 +++++----- .../RISCV/tail-folding-cast-intrinsics.ll | 44 ++++++------ .../RISCV/tail-folding-cond-reduction.ll | 24 +++---- .../LoopVectorize/RISCV/tail-folding-div.ll | 16 ++--- .../tail-folding-fixed-order-recurrence.ll | 14 ++-- .../RISCV/tail-folding-gather-scatter.ll | 2 +- .../RISCV/tail-folding-inloop-reduction.ll | 28 ++++---- .../RISCV/tail-folding-interleave.ll | 2 +- .../RISCV/tail-folding-intermediate-store.ll | 4 +- .../LoopVectorize/RISCV/tail-folding-iv32.ll | 2 +- .../RISCV/tail-folding-known-no-overflow.ll | 6 +- .../RISCV/tail-folding-masked-loadstore.ll | 4 +- .../RISCV/tail-folding-ordered-reduction.ll | 2 +- .../RISCV/tail-folding-reduction.ll | 56 +++++++-------- .../RISCV/tail-folding-reverse-load-store.ll | 6 +- .../RISCV/tail-folding-safe-dep-distance.ll | 10 +-- .../RISCV/tail-folding-uniform-store.ll | 2 +- .../truncate-to-minimal-bitwidth-cost.ll | 2 +- .../truncate-to-minimal-bitwidth-evl-crash.ll | 4 +- .../RISCV/type-info-cache-evl-crash.ll | 4 +- .../LoopVectorize/RISCV/uniform-load-store.ll | 14 ++-- .../RISCV/vectorize-vp-intrinsics.ll | 4 +- 32 files changed, 285 insertions(+), 223 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index d133610ef4f75..ed8f33d23f038 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -195,7 +195,7 @@ inline bind_ty m_VPValue(VPValue *&V) { return V; } /// Match a VPInstruction, capturing if we match. inline bind_ty m_VPInstruction(VPInstruction *&V) { return V; } -template struct Recipe_match { Ops_t Ops; @@ -231,9 +231,12 @@ struct Recipe_match { if ((!matchRecipeAndOpcode(R) && ...)) return false; - assert(R->getNumOperands() == std::tuple_size::value && - "recipe with matched opcode does not have the expected number of " - "operands"); + if (R->getNumOperands() != std::tuple_size::value) { + assert(Variadic && "non-variadic recipe with matched opcode does not " + "have the expected number of " + "operands"); + return false; + } auto IdxSeq = std::make_index_sequence::value>(); if (all_of_tuple_elements(IdxSeq, [R](auto Op, unsigned Idx) { @@ -256,7 +259,9 @@ struct Recipe_match { std::is_same::value || std::is_same::value || std::is_same::value || - std::is_same::value) + std::is_same::value || + std::is_same::value || + std::is_same::value) return DefR; else return DefR && DefR->getOpcode() == Opcode; @@ -272,11 +277,11 @@ struct Recipe_match { template using ZeroOpRecipe_match = - Recipe_match, Opcode, false, RecipeTys...>; + Recipe_match, Opcode, false, false, RecipeTys...>; template using UnaryRecipe_match = - Recipe_match, Opcode, false, RecipeTys...>; + Recipe_match, Opcode, false, false, RecipeTys...>; template using UnaryVPInstruction_match = @@ -293,7 +298,8 @@ using AllUnaryRecipe_match = template using BinaryRecipe_match = - Recipe_match, Opcode, Commutative, RecipeTys...>; + Recipe_match, Opcode, Commutative, + /*Variadic*/ false, RecipeTys...>; template using BinaryVPInstruction_match = @@ -302,8 +308,9 @@ using BinaryVPInstruction_match = template -using TernaryRecipe_match = Recipe_match, - Opcode, Commutative, RecipeTys...>; +using TernaryRecipe_match = + Recipe_match, Opcode, Commutative, + /*Variadic*/ false, RecipeTys...>; template using TernaryVPInstruction_match = @@ -343,8 +350,9 @@ m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { template -using Recipe4Op_match = Recipe_match, - Opcode, Commutative, RecipeTys...>; +using Recipe4Op_match = + Recipe_match, Opcode, Commutative, + /*Variadic*/ false, RecipeTys...>; template @@ -378,6 +386,12 @@ m_Broadcast(const Op0_t &Op0) { return m_VPInstruction(Op0); } +template +inline UnaryVPInstruction_match +m_ExplicitVectorLength(const Op0_t &Op0) { + return m_VPInstruction(Op0); +} + template inline BinaryVPInstruction_match m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1) { @@ -418,6 +432,12 @@ m_ZExtOrSExt(const Op0_t &Op0) { return m_CombineOr(m_ZExt(Op0), m_SExt(Op0)); } +template +inline match_combine_or, Op0_t> +m_ZExtOrSelf(const Op0_t &Op0) { + return m_CombineOr(m_ZExt(Op0), Op0); +} + template inline AllBinaryRecipe_match @@ -431,6 +451,12 @@ m_c_Binary(const Op0_t &Op0, const Op1_t &Op1) { return AllBinaryRecipe_match(Op0, Op1); } +template +inline AllBinaryRecipe_match +m_Sub(const Op0_t &Op0, const Op1_t &Op1) { + return m_Binary(Op0, Op1); +} + template inline AllBinaryRecipe_match m_Mul(const Op0_t &Op0, const Op1_t &Op1) { @@ -476,7 +502,8 @@ inline GEPLikeRecipe_match m_GetElementPtr(const Op0_t &Op0, template using AllTernaryRecipe_match = Recipe_match, Opcode, false, - VPReplicateRecipe, VPInstruction, VPWidenSelectRecipe>; + /*Variadic*/ false, VPReplicateRecipe, VPInstruction, + VPWidenSelectRecipe>; template inline AllTernaryRecipe_match @@ -524,7 +551,8 @@ m_ScalarIVSteps(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { template using VPDerivedIV_match = - Recipe_match, 0, false, VPDerivedIVRecipe>; + Recipe_match, 0, false, /*Variadic*/ false, + VPDerivedIVRecipe>; template inline VPDerivedIV_match @@ -532,6 +560,17 @@ m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { return VPDerivedIV_match({Op0, Op1, Op2}); } +template +using PhiLikeRecipe_match = + Recipe_match, Instruction::PHI, false, true, + VPWidenPHIRecipe, VPHeaderPHIRecipe, VPInstruction>; + +template +inline PhiLikeRecipe_match +m_Phi(const Op0_t &Op0, const Op1_t &Op1, const OpTys &...Ops) { + return PhiLikeRecipe_match(Op0, Op1, Ops...); +} + /// Match a call argument at a given argument index. template struct Argument_match { /// Call argument index to match. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 47a807794eb3d..f64ca6c6a449d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -328,8 +328,7 @@ VPPartialReductionRecipe::computeCost(ElementCount VF, // Pick out opcode, type/ext information and use sub side effects from a widen // recipe. auto HandleWiden = [&](VPWidenRecipe *Widen) { - if (match(Widen, - m_Binary(m_SpecificInt(0), m_VPValue(Op)))) { + if (match(Widen, m_Sub(m_SpecificInt(0), m_VPValue(Op)))) { Widen = dyn_cast(Op->getDefiningRecipe()); } Opcode = Widen->getOpcode(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a7965a053e6e3..ace5b35522516 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -740,8 +740,7 @@ static VPWidenInductionRecipe *getOptimizableIVOf(VPValue *VPV) { // IVStep will be the negated step of the subtraction. Check if Step == -1 // * IVStep. VPValue *Step; - if (!match(VPV, - m_Binary(m_VPValue(), m_VPValue(Step))) || + if (!match(VPV, m_Sub(m_VPValue(), m_VPValue(Step))) || !Step->isLiveIn() || !IVStep->isLiveIn()) return false; auto *StepCI = dyn_cast(Step->getLiveInIRValue()); @@ -2386,19 +2385,38 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { // Find EVL loop entries by locating VPEVLBasedIVPHIRecipe. // There should be only one EVL PHI in the entire plan. VPEVLBasedIVPHIRecipe *EVLPhi = nullptr; + VPValue *AVLNext = nullptr; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_shallow(Plan.getEntry()))) - for (VPRecipeBase &R : VPBB->phis()) - if (auto *PhiR = dyn_cast(&R)) { + for (VPRecipeBase &R : VPBB->phis()) { + auto *PhiR = dyn_cast(&R); + if (!PhiR) + continue; + VPValue *Backedge; + if (auto *EVL = dyn_cast(PhiR)) { assert(!EVLPhi && "Found multiple EVL PHIs. Only one expected"); - EVLPhi = PhiR; + EVLPhi = EVL; + continue; } + if (match(PhiR, + m_Phi(m_Specific(Plan.getTripCount()), m_VPValue(Backedge))) && + match(Backedge, m_Sub(m_Specific(PhiR), + m_ZExtOrSelf(m_ExplicitVectorLength(m_CombineOr( + m_Specific(PhiR), + // The AVL may be capped to a safe distance. + m_Select(m_VPValue(), m_Specific(PhiR), + m_VPValue()))))))) { + AVLNext = Backedge; + } + } // Early return if no EVL PHI is found. if (!EVLPhi) return; + assert(AVLNext && "Didn't find AVL backedge?"); + VPBasicBlock *HeaderVPBB = EVLPhi->getParent(); VPValue *EVLIncrement = EVLPhi->getBackedgeValue(); @@ -2425,7 +2443,7 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { // Replace the use of VectorTripCount in the latch-exiting block. // Before: (branch-on-count EVLIVInc, VectorTripCount) - // After: (branch-on-count EVLIVInc, TripCount) + // After: (branch-on-count AVLNext, 0) VPBasicBlock *LatchExiting = HeaderVPBB->getPredecessors()[1]->getEntryBasicBlock(); @@ -2438,7 +2456,12 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { m_BranchOnCount(m_VPValue(EVLIncrement), m_Specific(&Plan.getVectorTripCount()))) && "Unexpected terminator in EVL loop"); - LatchExitingBr->setOperand(1, Plan.getTripCount()); + + Type *AVLTy = VPTypeAnalysis(Plan).inferScalarType(AVLNext); + + LatchExitingBr->setOperand(0, AVLNext); + LatchExitingBr->setOperand( + 1, Plan.getOrAddLiveIn(ConstantInt::getNullValue(AVLTy))); } void VPlanTransforms::dropPoisonGeneratingRecipes( diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 5943684e17a76..f42850a719ed2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -216,9 +216,10 @@ struct VPlanTransforms { /// variable vector lengths instead of fixed lengths. This transformation: /// * Makes EVL-Phi concrete. // * Removes CanonicalIV and increment. - /// * Replaces fixed-length stepping (branch-on-cond CanonicalIVInc, - /// VectorTripCount) with variable-length stepping (branch-on-cond - /// EVLIVInc, TripCount). + /// * Replaces the exit condition from + /// (branch-on-cond CanonicalIVInc, VectorTripCount) + /// to + /// (branch-on-cond AVLNext, 0) static void canonicalizeEVLLoops(VPlan &Plan); /// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll index 5f13089ff17fd..3f2c4d9f94a07 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll @@ -37,8 +37,8 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 6e2434aefce9d..d31ea53cde213 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -141,7 +141,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP5]]) ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP5]] -; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP10]]) @@ -195,7 +195,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]] ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i32 [[TMP5]], [[TMP6]] -; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-INLOOP: middle.block: ; IF-EVL-INLOOP-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] @@ -362,8 +362,8 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[TMP15]]) ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]] @@ -410,7 +410,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] -; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL-INLOOP: middle.block: ; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll index 976ce77d2ba29..73d1b9c307ff4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll @@ -143,7 +143,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]] ; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP1]] ; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] -; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 1024 +; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP15:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 ; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PREDICATED_DATA-WITH-EVL: middle.block: ; PREDICATED_DATA-WITH-EVL-NEXT: br label [[FOR_END:%.*]] @@ -334,7 +334,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]] ; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP1]] ; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] -; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 1024 +; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP19:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 ; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; PREDICATED_DATA-WITH-EVL: middle.block: ; PREDICATED_DATA-WITH-EVL-NEXT: br label [[FOR_END:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index 01df43618aad0..b4a164603b353 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -50,8 +50,8 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP11]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP11]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT8]] -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 9 -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT1:%.*]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index ed507961ef825..61ee4fa88d0a1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -33,7 +33,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] @@ -98,8 +98,8 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP12]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: scalar.ph: @@ -162,7 +162,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP11]]) @@ -230,7 +230,7 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] @@ -288,7 +288,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] @@ -409,7 +409,7 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll index 38e78321a4f36..b2d1fc0778d1d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll @@ -43,8 +43,8 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -169,8 +169,8 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -295,8 +295,8 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -421,8 +421,8 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -547,8 +547,8 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -673,8 +673,8 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -799,8 +799,8 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -925,8 +925,8 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -1051,8 +1051,8 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -1177,8 +1177,8 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -1303,8 +1303,8 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -1429,8 +1429,8 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -1555,8 +1555,8 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -1684,8 +1684,8 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -1812,8 +1812,8 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -1940,8 +1940,8 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -2068,8 +2068,8 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -2249,8 +2249,8 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 100 -; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FINISH_LOOPEXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll index f60474534ff3c..22144b3576645 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll @@ -50,8 +50,8 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] -; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -199,8 +199,8 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] -; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -348,8 +348,8 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] -; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -497,8 +497,8 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] -; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -640,8 +640,8 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -769,8 +769,8 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP13]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] -; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -900,8 +900,8 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] -; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -1039,8 +1039,8 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] -; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -1176,8 +1176,8 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll index ce2b790cdbd4f..a1604ead7a089 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll @@ -43,8 +43,8 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] -; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -171,8 +171,8 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] -; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -299,8 +299,8 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] -; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -427,8 +427,8 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] -; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -555,8 +555,8 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] -; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -683,8 +683,8 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] -; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -811,8 +811,8 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] -; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -939,8 +939,8 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] -; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -1067,8 +1067,8 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] -; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -1195,8 +1195,8 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] -; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -1322,8 +1322,8 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] -; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] +; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll index d02d53b8e1207..da1f06420b46f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll @@ -47,8 +47,8 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP22]], [[EVL_BASED_IV1]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP22]] -; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT1]], [[N]] -; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP24:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP20]]) ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]] @@ -99,7 +99,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP23]] -; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-INLOOP: middle.block: ; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]] @@ -275,8 +275,8 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP23]] -; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PREDPHI]]) ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]] @@ -330,7 +330,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP23]] -; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL-INLOOP: middle.block: ; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]] @@ -527,7 +527,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP20]], [[EVL_BASED_IV1]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] ; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] -; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT1]], [[N]] +; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP19]]) @@ -588,7 +588,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP19]], [[EVL_BASED_IV1]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] -; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT1]], [[N]] +; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL-INLOOP: middle.block: ; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]] @@ -794,8 +794,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP25]], [[IV]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]] ; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT7]] = add [[VEC_IND2]], [[BROADCAST_SPLAT2]] -; IF-EVL-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP24]]) ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]] @@ -858,8 +858,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] -; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-INLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-INLOOP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL-INLOOP: middle.block: ; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]] ; IF-EVL-INLOOP: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll index ae047f5f63476..ba8526cdc3086 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll @@ -37,8 +37,8 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -158,8 +158,8 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -278,8 +278,8 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -398,8 +398,8 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024 -; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] ; IF-EVL: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll index 987f9460c2172..4da1e660c3330 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll @@ -44,8 +44,8 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] -; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]] -; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FOR_END:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -182,8 +182,8 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP15]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP23]] -; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]] -; IF-EVL-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FOR_END:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -342,8 +342,8 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP18]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP27]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP27]] -; IF-EVL-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]] -; IF-EVL-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: [[TMP26:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FOR_END:.*]] ; IF-EVL: [[SCALAR_PH]]: @@ -657,7 +657,7 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] -; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]] +; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FOR_END:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll index 2aeb1d0b25b5d..a69510c69845a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll @@ -43,7 +43,7 @@ define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %inde ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] -; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_END:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll index 3e23df78e0b68..0001102a12f0c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll @@ -35,7 +35,7 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -250,7 +250,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -358,7 +358,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -466,7 +466,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -574,7 +574,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -685,7 +685,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -796,7 +796,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -907,7 +907,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -1018,7 +1018,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -1234,7 +1234,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -1347,7 +1347,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -1678,7 +1678,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP11]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] -; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -1795,7 +1795,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP16]]) @@ -1912,7 +1912,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP16]]) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll index 8d987a94d383d..caa8faea7e6ca 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll @@ -42,7 +42,7 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] -; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll index be6ae1d4a0f6b..2fe1ee2d200db 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll @@ -53,7 +53,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP21]] -; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP19]]) @@ -110,7 +110,7 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP13]], [[TMP23]] -; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL-INLOOP: middle.block: ; IF-EVL-INLOOP-NEXT: store i32 [[TMP22]], ptr [[ADDR]], align 4, !alias.scope [[META7:![0-9]+]], !noalias [[META0]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll index d474a03b90ee9..f4ff85091543a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll @@ -31,7 +31,7 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) { ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP_LOAD]], ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP12]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[TMP11]], [[TMP12]] -; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll index 06c6bfe64dd2f..8b0f054993f65 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll @@ -36,7 +36,7 @@ define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[UMAX]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]] @@ -105,7 +105,7 @@ define void @overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]] @@ -174,7 +174,7 @@ define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC_ADD]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll index 5f407fcca259d..d66ae959513f1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll @@ -35,8 +35,8 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP9]], [[TMP21]] -; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[EXIT:%.*]] ; IF-EVL: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll index 59d137011d288..f9c83ec7af9b0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll @@ -34,7 +34,7 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) { ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP9]], [[TMP15]] -; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_END:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll index 2d5718b24cb30..3b89995e91b8e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll @@ -34,8 +34,8 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -252,8 +252,8 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -363,8 +363,8 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32( [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -474,8 +474,8 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -587,8 +587,8 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -705,8 +705,8 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -823,8 +823,8 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umin.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -941,8 +941,8 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -1057,8 +1057,8 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] -; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -1277,8 +1277,8 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -1395,8 +1395,8 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -1729,8 +1729,8 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] -; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] +; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP20:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP17]]) ; IF-EVL-NEXT: br label [[FOR_END:%.*]] @@ -1847,8 +1847,8 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] @@ -1964,8 +1964,8 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll index e2db28d54ac58..6c9437ae5ff1b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll @@ -48,7 +48,7 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP20]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] -; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024 +; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[LOOPEND:%.*]] @@ -196,7 +196,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP28:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP28]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP28]] -; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024 +; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[LOOPEND:%.*]] @@ -380,7 +380,7 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP27]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP27]] -; IF-EVL-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 +; IF-EVL-NEXT: [[TMP32:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[EXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll index 1c78b25e114c4..79412815e5462 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll @@ -36,8 +36,8 @@ define void @test(ptr %p) { ; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP5]], [[TMP13]] -; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 200 -; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[EXIT:%.*]] ; IF-EVL: scalar.ph: @@ -367,8 +367,8 @@ define void @trivial_due_max_vscale(ptr %p) { ; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP5]], [[TMP13]] -; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 200 -; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[EXIT:%.*]] ; IF-EVL: scalar.ph: @@ -475,7 +475,7 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; IF-EVL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] -; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 3002 +; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[EXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll index 687a2e7bf9312..dc8245f3f281c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll @@ -42,7 +42,7 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP0]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index 24649729f43ba..5b650da33fc6b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -171,7 +171,7 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 { ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i8.nxv4p0( zeroinitializer, align 1 [[BROADCAST_SPLAT2]], splat (i1 true), i32 [[TMP6]]) ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_EVL_NEXT]], 9 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll index dfdc893570810..ece1d27836669 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll @@ -28,8 +28,8 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) { ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 9 -; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll index 6476373c08401..b7a818401b1df 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll @@ -48,8 +48,8 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count) ; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP0]] -; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 568aa953de511..bcd02e0dce5ce 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -109,7 +109,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; TF-SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] @@ -431,7 +431,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; TF-SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 +; TF-SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; TF-SCALABLE-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] @@ -581,7 +581,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; TF-SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] @@ -718,7 +718,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64 ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] @@ -882,7 +882,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] -; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 +; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; TF-SCALABLE-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] @@ -1060,7 +1060,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] -; TF-SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 +; TF-SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; TF-SCALABLE-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] @@ -1208,7 +1208,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64 ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll index 85116feab6a33..0907bae8b39ba 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll @@ -35,8 +35,8 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP20]] -; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]] ; IF-EVL: scalar.ph: From 50ca2e5e181fa118a57604164b08344c86de0884 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 6 Aug 2025 16:28:59 +0800 Subject: [PATCH 2/8] Take out most pattern match changes --- .../Transforms/Vectorize/VPlanPatternMatch.h | 58 +++++-------------- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 3 +- .../Transforms/Vectorize/VPlanTransforms.cpp | 22 ++++--- 3 files changed, 30 insertions(+), 53 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index ed8f33d23f038..857c7b633bbcb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -195,7 +195,7 @@ inline bind_ty m_VPValue(VPValue *&V) { return V; } /// Match a VPInstruction, capturing if we match. inline bind_ty m_VPInstruction(VPInstruction *&V) { return V; } -template struct Recipe_match { Ops_t Ops; @@ -232,9 +232,9 @@ struct Recipe_match { return false; if (R->getNumOperands() != std::tuple_size::value) { - assert(Variadic && "non-variadic recipe with matched opcode does not " - "have the expected number of " - "operands"); + assert(Opcode == Instruction::PHI && + "non-variadic recipe with matched opcode does not have the " + "expected number of operands"); return false; } @@ -259,9 +259,7 @@ struct Recipe_match { std::is_same::value || std::is_same::value || std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value) + std::is_same::value) return DefR; else return DefR && DefR->getOpcode() == Opcode; @@ -277,11 +275,11 @@ struct Recipe_match { template using ZeroOpRecipe_match = - Recipe_match, Opcode, false, false, RecipeTys...>; + Recipe_match, Opcode, false, RecipeTys...>; template using UnaryRecipe_match = - Recipe_match, Opcode, false, false, RecipeTys...>; + Recipe_match, Opcode, false, RecipeTys...>; template using UnaryVPInstruction_match = @@ -298,8 +296,7 @@ using AllUnaryRecipe_match = template using BinaryRecipe_match = - Recipe_match, Opcode, Commutative, - /*Variadic*/ false, RecipeTys...>; + Recipe_match, Opcode, Commutative, RecipeTys...>; template using BinaryVPInstruction_match = @@ -308,9 +305,8 @@ using BinaryVPInstruction_match = template -using TernaryRecipe_match = - Recipe_match, Opcode, Commutative, - /*Variadic*/ false, RecipeTys...>; +using TernaryRecipe_match = Recipe_match, + Opcode, Commutative, RecipeTys...>; template using TernaryVPInstruction_match = @@ -350,9 +346,8 @@ m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { template -using Recipe4Op_match = - Recipe_match, Opcode, Commutative, - /*Variadic*/ false, RecipeTys...>; +using Recipe4Op_match = Recipe_match, + Opcode, Commutative, RecipeTys...>; template @@ -386,12 +381,6 @@ m_Broadcast(const Op0_t &Op0) { return m_VPInstruction(Op0); } -template -inline UnaryVPInstruction_match -m_ExplicitVectorLength(const Op0_t &Op0) { - return m_VPInstruction(Op0); -} - template inline BinaryVPInstruction_match m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1) { @@ -451,12 +440,6 @@ m_c_Binary(const Op0_t &Op0, const Op1_t &Op1) { return AllBinaryRecipe_match(Op0, Op1); } -template -inline AllBinaryRecipe_match -m_Sub(const Op0_t &Op0, const Op1_t &Op1) { - return m_Binary(Op0, Op1); -} - template inline AllBinaryRecipe_match m_Mul(const Op0_t &Op0, const Op1_t &Op1) { @@ -502,8 +485,7 @@ inline GEPLikeRecipe_match m_GetElementPtr(const Op0_t &Op0, template using AllTernaryRecipe_match = Recipe_match, Opcode, false, - /*Variadic*/ false, VPReplicateRecipe, VPInstruction, - VPWidenSelectRecipe>; + VPReplicateRecipe, VPInstruction, VPWidenSelectRecipe>; template inline AllTernaryRecipe_match @@ -551,8 +533,7 @@ m_ScalarIVSteps(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { template using VPDerivedIV_match = - Recipe_match, 0, false, /*Variadic*/ false, - VPDerivedIVRecipe>; + Recipe_match, 0, false, VPDerivedIVRecipe>; template inline VPDerivedIV_match @@ -560,17 +541,6 @@ m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { return VPDerivedIV_match({Op0, Op1, Op2}); } -template -using PhiLikeRecipe_match = - Recipe_match, Instruction::PHI, false, true, - VPWidenPHIRecipe, VPHeaderPHIRecipe, VPInstruction>; - -template -inline PhiLikeRecipe_match -m_Phi(const Op0_t &Op0, const Op1_t &Op1, const OpTys &...Ops) { - return PhiLikeRecipe_match(Op0, Op1, Ops...); -} - /// Match a call argument at a given argument index. template struct Argument_match { /// Call argument index to match. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f64ca6c6a449d..47a807794eb3d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -328,7 +328,8 @@ VPPartialReductionRecipe::computeCost(ElementCount VF, // Pick out opcode, type/ext information and use sub side effects from a widen // recipe. auto HandleWiden = [&](VPWidenRecipe *Widen) { - if (match(Widen, m_Sub(m_SpecificInt(0), m_VPValue(Op)))) { + if (match(Widen, + m_Binary(m_SpecificInt(0), m_VPValue(Op)))) { Widen = dyn_cast(Op->getDefiningRecipe()); } Opcode = Widen->getOpcode(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ace5b35522516..f5908d895de32 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -740,7 +740,8 @@ static VPWidenInductionRecipe *getOptimizableIVOf(VPValue *VPV) { // IVStep will be the negated step of the subtraction. Check if Step == -1 // * IVStep. VPValue *Step; - if (!match(VPV, m_Sub(m_VPValue(), m_VPValue(Step))) || + if (!match(VPV, + m_Binary(m_VPValue(), m_VPValue(Step))) || !Step->isLiveIn() || !IVStep->isLiveIn()) return false; auto *StepCI = dyn_cast(Step->getLiveInIRValue()); @@ -2400,13 +2401,18 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { continue; } if (match(PhiR, - m_Phi(m_Specific(Plan.getTripCount()), m_VPValue(Backedge))) && - match(Backedge, m_Sub(m_Specific(PhiR), - m_ZExtOrSelf(m_ExplicitVectorLength(m_CombineOr( - m_Specific(PhiR), - // The AVL may be capped to a safe distance. - m_Select(m_VPValue(), m_Specific(PhiR), - m_VPValue()))))))) { + m_VPInstruction( + m_Specific(Plan.getTripCount()), m_VPValue(Backedge))) && + match(Backedge, + m_VPInstruction( + m_Specific(PhiR), + m_ZExtOrSelf( + m_VPInstruction( + m_CombineOr( + m_Specific(PhiR), + // The AVL may be capped to a safe distance. + m_Select(m_VPValue(), m_Specific(PhiR), + m_VPValue()))))))) { AVLNext = Backedge; } } From dc68b0ba2ce39f770762a4f6e16f6fe876bfcf9c Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 11 Aug 2025 18:41:11 +0800 Subject: [PATCH 3/8] Transform to branch-on-cond, update comments --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 12 +++++++----- llvm/lib/Transforms/Vectorize/VPlanTransforms.h | 4 ++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 1899d1cc37dad..c76c5defd9e47 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2458,7 +2458,7 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { // Replace the use of VectorTripCount in the latch-exiting block. // Before: (branch-on-count EVLIVInc, VectorTripCount) - // After: (branch-on-count AVLNext, 0) + // After: (branch-on-cond eq AVLNext, 0) VPBasicBlock *LatchExiting = HeaderVPBB->getPredecessors()[1]->getEntryBasicBlock(); @@ -2473,10 +2473,12 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { "Unexpected terminator in EVL loop"); Type *AVLTy = VPTypeAnalysis(Plan).inferScalarType(AVLNext); - - LatchExitingBr->setOperand(0, AVLNext); - LatchExitingBr->setOperand( - 1, Plan.getOrAddLiveIn(ConstantInt::getNullValue(AVLTy))); + VPBuilder Builder(LatchExitingBr); + VPValue *Cmp = + Builder.createICmp(CmpInst::ICMP_EQ, AVLNext, + Plan.getOrAddLiveIn(ConstantInt::getNullValue(AVLTy))); + Builder.createNaryOp(VPInstruction::BranchOnCond, Cmp); + LatchExitingBr->eraseFromParent(); } void VPlanTransforms::dropPoisonGeneratingRecipes( diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 5b295d3af005b..ab6cfcc3d7773 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -225,9 +225,9 @@ struct VPlanTransforms { /// * Makes EVL-Phi concrete. // * Removes CanonicalIV and increment. /// * Replaces the exit condition from - /// (branch-on-cond CanonicalIVInc, VectorTripCount) + /// (branch-on-count CanonicalIVInc, VectorTripCount) /// to - /// (branch-on-cond AVLNext, 0) + /// (branch-on-cond eq AVLNext, 0) static void canonicalizeEVLLoops(VPlan &Plan); /// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p From 428ce1604b18f0ffea230b13662a79d2bc33413c Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 14 Aug 2025 16:47:41 +0800 Subject: [PATCH 4/8] clang-format --- llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 15e65aee7cfee..ae40b36312564 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -353,8 +353,7 @@ m_ZExtOrSExt(const Op0_t &Op0) { } template -inline match_combine_or, - Op0_t> +inline match_combine_or, Op0_t> m_ZExtOrSelf(const Op0_t &Op0) { return m_CombineOr(m_ZExt(Op0), Op0); } From 2931aa7e0aff692fe4f02a93d2540b325a655c5e Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 21 Aug 2025 18:51:17 +0800 Subject: [PATCH 5/8] Simplify matching --- .../Transforms/Vectorize/VPlanPatternMatch.h | 12 ++++ .../Transforms/Vectorize/VPlanTransforms.cpp | 59 ++++++++----------- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 18ce84c5dcaed..a21f33089638c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -313,6 +313,12 @@ m_Broadcast(const Op0_t &Op0) { return m_VPInstruction(Op0); } +template +inline VPInstruction_match +m_EVL(const Op0_t &Op0) { + return m_VPInstruction(Op0); +} + template inline VPInstruction_match m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1) { @@ -370,6 +376,12 @@ m_c_Binary(const Op0_t &Op0, const Op1_t &Op1) { return AllRecipe_commutative_match(Op0, Op1); } +template +inline AllRecipe_commutative_match +m_c_Add(const Op0_t &Op0, const Op1_t &Op1) { + return m_c_Binary(Op0, Op1); +} + template inline AllRecipe_match m_Sub(const Op0_t &Op0, const Op1_t &Op1) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index eb3bcee240935..8aa01493cbe3c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -741,8 +741,7 @@ static VPWidenInductionRecipe *getOptimizableIVOf(VPValue *VPV) { VPValue *IVStep = WideIV->getStepValue(); switch (ID.getInductionOpcode()) { case Instruction::Add: - return match(VPV, m_c_Binary(m_Specific(WideIV), - m_Specific(IVStep))); + return match(VPV, m_c_Add(m_Specific(WideIV), m_Specific(IVStep))); case Instruction::FAdd: return match(VPV, m_c_Binary(m_Specific(WideIV), m_Specific(IVStep))); @@ -2231,9 +2230,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { assert(all_of(Plan.getVFxUF().users(), [&Plan](VPUser *U) { - return match(U, m_c_Binary( - m_Specific(Plan.getCanonicalIV()), - m_Specific(&Plan.getVFxUF()))) || + return match(U, m_c_Add(m_Specific(Plan.getCanonicalIV()), + m_Specific(&Plan.getVFxUF()))) || isa(U); }) && "Only users of VFxUF should be VPWidenPointerInductionRecipe and the " @@ -2446,45 +2444,37 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { // Find EVL loop entries by locating VPEVLBasedIVPHIRecipe. // There should be only one EVL PHI in the entire plan. VPEVLBasedIVPHIRecipe *EVLPhi = nullptr; - VPValue *AVLNext = nullptr; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_shallow(Plan.getEntry()))) - for (VPRecipeBase &R : VPBB->phis()) { - auto *PhiR = dyn_cast(&R); - if (!PhiR) - continue; - VPValue *Backedge; - if (auto *EVL = dyn_cast(PhiR)) { + for (VPRecipeBase &R : VPBB->phis()) + if (auto *PhiR = dyn_cast(&R)) { assert(!EVLPhi && "Found multiple EVL PHIs. Only one expected"); - EVLPhi = EVL; - continue; - } - if (match(PhiR, - m_VPInstruction( - m_Specific(Plan.getTripCount()), m_VPValue(Backedge))) && - match(Backedge, - m_VPInstruction( - m_Specific(PhiR), - m_ZExtOrSelf( - m_VPInstruction( - m_CombineOr( - m_Specific(PhiR), - // The AVL may be capped to a safe distance. - m_Select(m_VPValue(), m_Specific(PhiR), - m_VPValue()))))))) { - AVLNext = Backedge; + EVLPhi = PhiR; } - } // Early return if no EVL PHI is found. if (!EVLPhi) return; - assert(AVLNext && "Didn't find AVL backedge?"); - VPBasicBlock *HeaderVPBB = EVLPhi->getParent(); VPValue *EVLIncrement = EVLPhi->getBackedgeValue(); + VPValue *AVL, *EVL; + if (!match(EVLIncrement, + m_c_Add(m_ZExtOrSelf(m_VPValue(EVL)), m_Specific(EVLPhi))) || + !match(EVL, m_EVL(m_VPValue(AVL)))) + llvm_unreachable("Didn't find EVL?"); + + // The AVL may be capped to a safe distance. + VPValue *SafeAVL; + if (match(AVL, m_Select(m_VPValue(), m_VPValue(SafeAVL), m_VPValue()))) + AVL = SafeAVL; + + VPValue *AVLNext; + if (!match(AVL, m_VPInstruction( + m_Specific(Plan.getTripCount()), m_VPValue(AVLNext))) || + !match(AVLNext, m_Sub(m_Specific(AVL), m_ZExtOrSelf(m_Specific(EVL))))) + llvm_unreachable("Didn't find AVL backedge?"); // Convert EVLPhi to concrete recipe. auto *ScalarR = @@ -2496,9 +2486,8 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { // Replace CanonicalIVInc with EVL-PHI increment. auto *CanonicalIV = cast(&*HeaderVPBB->begin()); VPValue *Backedge = CanonicalIV->getIncomingValue(1); - assert(match(Backedge, - m_c_Binary(m_Specific(CanonicalIV), - m_Specific(&Plan.getVFxUF()))) && + assert(match(Backedge, m_c_Add(m_Specific(CanonicalIV), + m_Specific(&Plan.getVFxUF()))) && "Unexpected canonical iv"); Backedge->replaceAllUsesWith(EVLIncrement); From b24bfdba5ae1069fd1fd845712e74a7a52c1d86f Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 21 Aug 2025 20:21:44 +0800 Subject: [PATCH 6/8] Simplify matches --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 8aa01493cbe3c..f6379ae733974 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2459,11 +2459,10 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { VPBasicBlock *HeaderVPBB = EVLPhi->getParent(); VPValue *EVLIncrement = EVLPhi->getBackedgeValue(); - VPValue *AVL, *EVL; + VPValue *AVL; if (!match(EVLIncrement, - m_c_Add(m_ZExtOrSelf(m_VPValue(EVL)), m_Specific(EVLPhi))) || - !match(EVL, m_EVL(m_VPValue(AVL)))) - llvm_unreachable("Didn't find EVL?"); + m_c_Add(m_ZExtOrSelf(m_EVL(m_VPValue(AVL))), m_Specific(EVLPhi)))) + llvm_unreachable("Didn't find AVL?"); // The AVL may be capped to a safe distance. VPValue *SafeAVL; @@ -2472,8 +2471,7 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { VPValue *AVLNext; if (!match(AVL, m_VPInstruction( - m_Specific(Plan.getTripCount()), m_VPValue(AVLNext))) || - !match(AVLNext, m_Sub(m_Specific(AVL), m_ZExtOrSelf(m_Specific(EVL))))) + m_Specific(Plan.getTripCount()), m_VPValue(AVLNext)))) llvm_unreachable("Didn't find AVL backedge?"); // Convert EVLPhi to concrete recipe. From d8b01f9e3ceb8a23ca0f97c154c75e6654a6e3ac Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 21 Aug 2025 22:51:52 +0800 Subject: [PATCH 7/8] Use [[maybe_unused]] + assert --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index f6379ae733974..8528fbd9bc3fb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2460,9 +2460,10 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { VPBasicBlock *HeaderVPBB = EVLPhi->getParent(); VPValue *EVLIncrement = EVLPhi->getBackedgeValue(); VPValue *AVL; - if (!match(EVLIncrement, - m_c_Add(m_ZExtOrSelf(m_EVL(m_VPValue(AVL))), m_Specific(EVLPhi)))) - llvm_unreachable("Didn't find AVL?"); + [[maybe_unused]] bool FoundAVL = + match(EVLIncrement, + m_c_Add(m_ZExtOrSelf(m_EVL(m_VPValue(AVL))), m_Specific(EVLPhi))); + assert(FoundAVL && "Didn't find AVL?"); // The AVL may be capped to a safe distance. VPValue *SafeAVL; @@ -2470,9 +2471,10 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { AVL = SafeAVL; VPValue *AVLNext; - if (!match(AVL, m_VPInstruction( - m_Specific(Plan.getTripCount()), m_VPValue(AVLNext)))) - llvm_unreachable("Didn't find AVL backedge?"); + [[maybe_unused]] bool FoundAVLNext = + match(AVL, m_VPInstruction( + m_Specific(Plan.getTripCount()), m_VPValue(AVLNext))); + assert(FoundAVLNext && "Didn't find AVL backedge?"); // Convert EVLPhi to concrete recipe. auto *ScalarR = From a5d3767b1d704b74e0f5db299ee94742f3a62af2 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 26 Aug 2025 18:50:04 +0800 Subject: [PATCH 8/8] Update test --- llvm/test/Transforms/LoopVectorize/RISCV/pr154103.ll | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr154103.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr154103.ll index 2eca655804ee1..032e821e5934b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr154103.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr154103.ll @@ -18,7 +18,6 @@ define void @pr154103(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalia ; CHECK-NEXT: [[INDUCTION:%.*]] = add splat (i64 1), [[TMP1]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ -7905747460161236406, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) @@ -45,10 +44,9 @@ define void @pr154103(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalia ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i16.nxv4p0( [[TMP15]], align 2 [[BROADCAST_SPLAT2]], splat (i1 true), i32 [[TMP2]]) ; CHECK-NEXT: store i32 0, ptr [[D]], align 4 ; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP2]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], -7905747460161236406 +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]]