From 8dec7cf95478273269ecbc2062b7a3254b485d5e Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Sat, 12 Jul 2025 01:40:59 +0800 Subject: [PATCH 01/16] [VPlan] Allow generating vectors with VPInstruction::ptradd. NFC Currently a ptradd can only generate a scalar, or a series of scalars per-lane. In an upcoming patch to expand VPWidenPointerRecipe into smaller recipes, we need to be able to generate a vector ptradd, which currently we can't do. This adds support for generating vectors by checking to see if the offset operand is a vector: If it isn't, it will generate per-lane scalars as per usual. --- llvm/lib/Transforms/Vectorize/VPlan.h | 8 +++++--- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 14 +++++--------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 9a6e4b36397b3..0d9af0210a393 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -958,8 +958,10 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, ExtractPenultimateElement, LogicalAnd, // Non-poison propagating logical And. // Add an offset in bytes (second operand) to a base pointer (first - // operand). Only generates scalar values (either for the first lane only or - // for all lanes, depending on its uses). + // operand). The base pointer must be scalar, but the offset can be a + // scalar, multiple scalars, or a vector. If the offset is multiple scalars + // then it will generate multiple scalar values (either for the first lane + // only or for all lanes, depending on its uses). PtrAdd, // Returns a scalar boolean value, which is true if any lane of its // (boolean) vector operands is true. It produces the reduced value across @@ -998,7 +1000,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, /// values per all lanes, stemming from an original ingredient. This method /// identifies the (rare) cases of VPInstructions that do so as well, w/o an /// underlying ingredient. - bool doesGeneratePerAllLanes() const; + bool doesGeneratePerAllLanes(VPTransformState &State) const; /// Returns true if we can generate a scalar for the first lane only if /// needed. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 75ade13b09d9c..4b7d21edbb48a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -494,8 +494,9 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { } #endif -bool VPInstruction::doesGeneratePerAllLanes() const { - return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this); +bool VPInstruction::doesGeneratePerAllLanes(VPTransformState &State) const { + return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this) && + !State.hasVectorValue(getOperand(1)); } bool VPInstruction::canGenerateScalarForFirstLane() const { @@ -848,10 +849,8 @@ Value *VPInstruction::generate(VPTransformState &State) { return Builder.CreateLogicalAnd(A, B, Name); } case VPInstruction::PtrAdd: { - assert(vputils::onlyFirstLaneUsed(this) && - "can only generate first lane for PtrAdd"); Value *Ptr = State.get(getOperand(0), VPLane(0)); - Value *Addend = State.get(getOperand(1), VPLane(0)); + Value *Addend = State.get(getOperand(1), vputils::onlyFirstLaneUsed(this)); return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags()); } case VPInstruction::AnyOf: { @@ -911,9 +910,6 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, } } - assert(!doesGeneratePerAllLanes() && - "Should only generate a vector value or single scalar, not scalars " - "for all lanes."); return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind); } @@ -1001,7 +997,7 @@ void VPInstruction::execute(VPTransformState &State) { bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && (vputils::onlyFirstLaneUsed(this) || isVectorToScalar() || isSingleScalar()); - bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); + bool GeneratesPerAllLanes = doesGeneratePerAllLanes(State); if (GeneratesPerAllLanes) { for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue(); Lane != NumLanes; ++Lane) { From 61e4aec21dc1e0eaee695bf9c759db9072b1488a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Sat, 12 Jul 2025 02:23:24 +0800 Subject: [PATCH 02/16] [VPlan] Expand VPWidenPointerInductionRecipe into separate recipes Stacked on #148273 to be able to use VPInstruction::PtrAdd. This is the VPWidenPointerInductionRecipe equivalent of #118638, with the motivation of allowing us to use the EVL as the induction step. Most of the new VPlan transformation is a straightforward translation of the existing execute code. VPUnrollPartAccessor unfortunately doesn't work outside of VPlanRecipes.cpp so here the operands are just manually checked to see if they're unrolled. --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 15 --- llvm/lib/Transforms/Vectorize/VPlan.h | 16 +-- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 86 -------------- .../Transforms/Vectorize/VPlanTransforms.cpp | 107 ++++++++++++++++++ .../LoopVectorize/AArch64/sve-widen-gep.ll | 17 ++- .../ARM/mve-reg-pressure-vmla.ll | 18 +-- .../LoopVectorize/ARM/pointer_iv.ll | 30 ++--- .../LoopVectorize/RISCV/strided-accesses.ll | 24 ++-- .../Transforms/LoopVectorize/X86/pr48340.ll | 4 +- .../LoopVectorize/pointer-induction.ll | 4 +- 10 files changed, 156 insertions(+), 165 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 40a55656bfa7e..2ca2e273392db 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1043,21 +1043,6 @@ void VPlan::execute(VPTransformState *State) { if (isa(&R)) continue; - if (auto *WidenPhi = dyn_cast(&R)) { - assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) && - "recipe generating only scalars should have been replaced"); - auto *GEP = cast(State->get(WidenPhi)); - PHINode *Phi = cast(GEP->getPointerOperand()); - - Phi->setIncomingBlock(1, VectorLatchBB); - - // Move the last step to the end of the latch block. This ensures - // consistent placement of all induction updates. - Instruction *Inc = cast(Phi->getIncomingValue(1)); - Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator())); - continue; - } - auto *PhiR = cast(&R); // VPInstructions currently model scalar Phis only. bool NeedsScalar = isa(PhiR) || diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 0d9af0210a393..6d658287fe738 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2066,8 +2066,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { } }; -class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe, - public VPUnrollPartAccessor<4> { +class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe { bool IsScalarAfterVectorization; public: @@ -2095,19 +2094,14 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe, VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC) - /// Generate vector values for the pointer induction. - void execute(VPTransformState &State) override; + void execute(VPTransformState &State) override { + llvm_unreachable("cannot execute this recipe, should be expanded via " + "expandVPWidenIntOrFpInductionRecipe"); + }; /// Returns true if only scalar values will be generated. bool onlyScalarsGenerated(bool IsScalable); - /// Returns the VPValue representing the value of this induction at - /// the first unrolled part, if it exists. Returns itself if unrolling did not - /// take place. - VPValue *getFirstUnrolledPartOperand() { - return getUnrollPart(*this) == 0 ? this : getOperand(3); - } - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 4b7d21edbb48a..1feb45abaa193 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3686,87 +3686,6 @@ bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) { (!IsScalable || vputils::onlyFirstLaneUsed(this)); } -void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { - assert(getInductionDescriptor().getKind() == - InductionDescriptor::IK_PtrInduction && - "Not a pointer induction according to InductionDescriptor!"); - assert(State.TypeAnalysis.inferScalarType(this)->isPointerTy() && - "Unexpected type."); - assert(!onlyScalarsGenerated(State.VF.isScalable()) && - "Recipe should have been replaced"); - - unsigned CurrentPart = getUnrollPart(*this); - - // Build a pointer phi - Value *ScalarStartValue = getStartValue()->getLiveInIRValue(); - Type *ScStValueType = ScalarStartValue->getType(); - - BasicBlock *VectorPH = - State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0)); - PHINode *NewPointerPhi = nullptr; - if (CurrentPart == 0) { - IRBuilder<>::InsertPointGuard Guard(State.Builder); - if (State.Builder.GetInsertPoint() != - State.Builder.GetInsertBlock()->getFirstNonPHIIt()) - State.Builder.SetInsertPoint( - State.Builder.GetInsertBlock()->getFirstNonPHIIt()); - NewPointerPhi = State.Builder.CreatePHI(ScStValueType, 2, "pointer.phi"); - NewPointerPhi->addIncoming(ScalarStartValue, VectorPH); - NewPointerPhi->setDebugLoc(getDebugLoc()); - } else { - // The recipe has been unrolled. In that case, fetch the single pointer phi - // shared among all unrolled parts of the recipe. - auto *GEP = - cast(State.get(getFirstUnrolledPartOperand())); - NewPointerPhi = cast(GEP->getPointerOperand()); - } - - // A pointer induction, performed by using a gep - BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint(); - Value *ScalarStepValue = State.get(getStepValue(), VPLane(0)); - Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue()); - Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF); - // Add induction update using an incorrect block temporarily. The phi node - // will be fixed after VPlan execution. Note that at this point the latch - // block cannot be used, as it does not exist yet. - // TODO: Model increment value in VPlan, by turning the recipe into a - // multi-def and a subclass of VPHeaderPHIRecipe. - if (CurrentPart == 0) { - // The recipe represents the first part of the pointer induction. Create the - // GEP to increment the phi across all unrolled parts. - Value *NumUnrolledElems = State.get(getOperand(2), true); - - Value *InductionGEP = GetElementPtrInst::Create( - State.Builder.getInt8Ty(), NewPointerPhi, - State.Builder.CreateMul( - ScalarStepValue, - State.Builder.CreateTrunc(NumUnrolledElems, PhiType)), - "ptr.ind", InductionLoc); - - NewPointerPhi->addIncoming(InductionGEP, VectorPH); - } - - // Create actual address geps that use the pointer phi as base and a - // vectorized version of the step value () as offset. - Type *VecPhiType = VectorType::get(PhiType, State.VF); - Value *StartOffsetScalar = State.Builder.CreateMul( - RuntimeVF, ConstantInt::get(PhiType, CurrentPart)); - Value *StartOffset = - State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar); - // Create a vector of consecutive numbers from zero to VF. - StartOffset = State.Builder.CreateAdd( - StartOffset, State.Builder.CreateStepVector(VecPhiType)); - - assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) && - "scalar step must be the same across all parts"); - Value *GEP = State.Builder.CreateGEP( - State.Builder.getInt8Ty(), NewPointerPhi, - State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat( - State.VF, ScalarStepValue)), - "vector.gep"); - State.set(this, GEP); -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -3925,11 +3844,6 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) { Value *Op0 = State.get(getOperand(0)); Type *VecTy = Op0->getType(); Instruction *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name); - // Manually move it with the other PHIs in case PHI recipes above this one - // also inserted non-phi instructions. - // TODO: Remove once VPWidenPointerInductionRecipe is also expanded in - // convertToConcreteRecipes. - VecPhi->moveBefore(State.Builder.GetInsertBlock()->getFirstNonPHIIt()); State.set(this, VecPhi); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 581af67c88bf9..b96ac9f36bcd3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2675,6 +2675,107 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, WidenIVR->replaceAllUsesWith(WidePHI); } +/// Expand a VPWidenPointerInductionRecipe into executable recipes, for the +/// initial value, phi and backedge value. In the following example: +/// +/// vector loop: { +/// vector.body: +/// EMIT ir<%ptr.iv> = WIDEN-POINTER-INDUCTION %start, %step, %vf +/// ... +/// EMIT branch-on-count ... +/// } +/// +/// WIDEN-POINTER-INDUCTION will get expanded to: +/// +/// vector loop: { +/// vector.body: +/// EMIT-SCALAR %pointer.phi = phi %start, %ptr.ind +/// EMIT %mul = mul %stepvector, %step +/// EMIT %vector.gep = ptradd %pointer.phi, %add +/// ... +/// EMIT %ptr.ind = ptradd %pointer.phi, %vf +/// EMIT branch-on-count ... +/// } +static void +expandVPWidenPointerInductionRecipe(VPWidenPointerInductionRecipe *R, + VPTypeAnalysis &TypeInfo) { + VPlan *Plan = R->getParent()->getPlan(); + + assert(R->getInductionDescriptor().getKind() == + InductionDescriptor::IK_PtrInduction && + "Not a pointer induction according to InductionDescriptor!"); + assert(TypeInfo.inferScalarType(R)->isPointerTy() && "Unexpected type."); + assert(!R->onlyScalarsGenerated(Plan->hasScalableVF()) && + "Recipe should have been replaced"); + + unsigned CurrentPart = 0; + if (R->getNumOperands() > 3) + CurrentPart = + cast(R->getOperand(4)->getLiveInIRValue())->getZExtValue(); + + VPBuilder Builder(R); + DebugLoc DL = R->getDebugLoc(); + + // Build a pointer phi + VPPhi *Phi; + if (CurrentPart == 0) { + Phi = Builder.createScalarPhi({R->getStartValue()}, R->getDebugLoc(), + "pointer.phi"); + } else { + // The recipe has been unrolled. In that case, fetch the single pointer phi + // shared among all unrolled parts of the recipe. + auto *PtrAdd = cast(R->getOperand(3)); + Phi = cast(PtrAdd->getOperand(0)->getDefiningRecipe()); + } + + Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi()); + + // A pointer induction, performed by using a gep + Type *PhiType = TypeInfo.inferScalarType(R->getStepValue()); + VPValue *RuntimeVF = Builder.createScalarZExtOrTrunc( + &Plan->getVF(), PhiType, TypeInfo.inferScalarType(&Plan->getVF()), DL); + if (CurrentPart == 0) { + // The recipe represents the first part of the pointer induction. Create the + // GEP to increment the phi across all unrolled parts. + VPValue *NumUnrolledElems = Builder.createScalarZExtOrTrunc( + R->getOperand(2), PhiType, TypeInfo.inferScalarType(R->getOperand(2)), + DL); + VPValue *Offset = Builder.createNaryOp( + Instruction::Mul, {R->getStepValue(), NumUnrolledElems}); + + VPBuilder::InsertPointGuard Guard(Builder); + VPBasicBlock *ExitingBB = + Plan->getVectorLoopRegion()->getExitingBasicBlock(); + Builder.setInsertPoint(ExitingBB, + ExitingBB->getTerminator()->getIterator()); + + VPValue *InductionGEP = Builder.createPtrAdd(Phi, Offset, DL, "ptr.ind"); + Phi->addOperand(InductionGEP); + } + + VPValue *CurrentPartV = + Plan->getOrAddLiveIn(ConstantInt::get(PhiType, CurrentPart)); + + // Create actual address geps that use the pointer phi as base and a + // vectorized version of the step value () as offset. + VPValue *StartOffsetScalar = + Builder.createNaryOp(Instruction::Mul, {RuntimeVF, CurrentPartV}); + VPValue *StartOffset = + Builder.createNaryOp(VPInstruction::Broadcast, StartOffsetScalar); + // Create a vector of consecutive numbers from zero to VF. + StartOffset = Builder.createNaryOp( + Instruction::Add, + {StartOffset, + Builder.createNaryOp(VPInstruction::StepVector, {}, PhiType)}); + + VPValue *PtrAdd = Builder.createPtrAdd( + Phi, + Builder.createNaryOp(Instruction::Mul, {StartOffset, R->getStepValue()}), + DL, "vector.gep"); + + R->replaceAllUsesWith(PtrAdd); +} + void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) { // Replace loop regions with explicity CFG. SmallVector LoopRegions; @@ -2711,6 +2812,12 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, continue; } + if (auto *WidenIVR = dyn_cast(&R)) { + expandVPWidenPointerInductionRecipe(WidenIVR, TypeInfo); + ToRemove.push_back(WidenIVR); + continue; + } + if (auto *Expr = dyn_cast(&R)) { Expr->decompose(); ToRemove.push_back(Expr); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index e58ea655d6098..5aee65fd1c59d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -67,10 +67,8 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP6]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP6]], 0 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = call @llvm.stepvector.nxv2i64() @@ -159,17 +157,16 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP6]] -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP6]], 0 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP13:%.*]] = add [[DOTSPLAT]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = mul [[TMP13]], splat (i64 1) -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[VECTOR_GEP]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement [[DOTSPLAT]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP16]], align 1 ; CHECK-NEXT: [[TMP17:%.*]] = add [[WIDE_LOAD]], splat (i8 1) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll index 4c29a3a0d1d01..6e16003f11757 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll @@ -29,14 +29,14 @@ define void @fn(i32 noundef %n, ptr %in, ptr %out) #0 { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[IN]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI2:%.*]] = phi ptr [ [[OUT]], %[[VECTOR_PH]] ], [ [[PTR_IND3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI2:%.*]] = phi ptr [ [[IN]], %[[VECTOR_PH]] ], [ [[PTR_IND3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[OUT]], %[[VECTOR_PH]] ], [ [[PTR_IND6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI2]], <4 x i32> ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]]) -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP]], i32 1 -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[VECTOR_GEP]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison), !alias.scope [[META0:![0-9]+]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP]], i32 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP4]], i32 1 +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[VECTOR_GEP4]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison), !alias.scope [[META0:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP4]], i32 2 ; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP1]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison), !alias.scope [[META0]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP2]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison), !alias.scope [[META0]] ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[WIDE_MASKED_GATHER]] to <4 x i32> @@ -66,14 +66,14 @@ define void @fn(i32 noundef %n, ptr %in, ptr %out) #0 { ; CHECK-NEXT: [[TMP27:%.*]] = add nuw <4 x i32> [[TMP26]], [[TMP24]] ; CHECK-NEXT: [[TMP28:%.*]] = lshr <4 x i32> [[TMP27]], splat (i32 16) ; CHECK-NEXT: [[TMP29:%.*]] = trunc <4 x i32> [[TMP28]] to <4 x i8> -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP4]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP13]], <4 x ptr> [[VECTOR_GEP4]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP4]], i32 2 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP]], i32 1 +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP13]], <4 x ptr> [[VECTOR_GEP]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP]], i32 2 ; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP21]], <4 x ptr> [[TMP30]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3]], !noalias [[META0]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP29]], <4 x ptr> [[TMP31]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3]], !noalias [[META0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 12 ; CHECK-NEXT: [[PTR_IND3]] = getelementptr i8, ptr [[POINTER_PHI2]], i32 12 +; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, ptr [[POINTER_PHI]], i32 12 ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll index fa03e29ae0620..c62dcba7c1302 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll @@ -106,8 +106,8 @@ define hidden void @pointer_phi_v4i32_add3(ptr noalias nocapture readonly %A, pt ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]] @@ -539,8 +539,8 @@ define hidden void @pointer_phi_v4f32_add3(ptr noalias nocapture readonly %A, pt ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]] @@ -743,10 +743,10 @@ define hidden void @pointer_phi_v4i32_uf2(ptr noalias nocapture readonly %A, ptr ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) @@ -808,12 +808,12 @@ define hidden void @pointer_phi_v4i32_uf4(ptr noalias nocapture readonly %A, ptr ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) @@ -880,11 +880,11 @@ define hidden void @mult_ptr_iv(ptr noalias nocapture readonly %x, ptr noalias n ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ], [ [[X]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ], [ [[X]], [[ENTRY]] ] ; CHECK-NEXT: [[POINTER_PHI5:%.*]] = phi ptr [ [[PTR_IND6:%.*]], [[VECTOR_BODY]] ], [ [[Z]], [[ENTRY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI5]], <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 1 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP0]], i32 1, <4 x i1> splat (i1 true), <4 x i8> poison), !alias.scope [[META28:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 2 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 25dac366ef73e..86536b705261f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -170,10 +170,8 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP8]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP10]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP8]], 0 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() @@ -764,10 +762,8 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[POINTER_PHI11:%.*]] = phi ptr [ [[P2]], [[VECTOR_PH]] ], [ [[PTR_IND12:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 4 ; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[STRIDE]], [[TMP13]] -; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP15]], 0 +; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP13]], 0 ; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; STRIDED-NEXT: [[TMP19:%.*]] = call @llvm.stepvector.nxv4i64() @@ -775,23 +771,21 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector [[DOTSPLATINSERT9]], poison, zeroinitializer ; STRIDED-NEXT: [[TMP21:%.*]] = mul [[TMP20]], [[DOTSPLAT10]] -; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP21]] -; STRIDED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 4 +; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[TMP21]] ; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP13]] -; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0 +; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP13]], 0 ; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement poison, i64 [[TMP26]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT14:%.*]] = shufflevector [[DOTSPLATINSERT13]], poison, zeroinitializer ; STRIDED-NEXT: [[TMP27:%.*]] = call @llvm.stepvector.nxv4i64() ; STRIDED-NEXT: [[TMP28:%.*]] = add [[DOTSPLAT14]], [[TMP27]] ; STRIDED-NEXT: [[TMP29:%.*]] = mul [[TMP28]], [[DOTSPLAT10]] -; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[TMP29]] -; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[VECTOR_GEP]], i32 4, splat (i1 true), poison), !alias.scope [[META15:![0-9]+]] +; STRIDED-NEXT: [[VECTOR_GEP11:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP29]] +; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[VECTOR_GEP11]], i32 4, splat (i1 true), poison), !alias.scope [[META15:![0-9]+]] ; STRIDED-NEXT: [[TMP30:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) -; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP30]], [[VECTOR_GEP17]], i32 4, splat (i1 true)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]] +; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP30]], [[VECTOR_GEP]], i32 4, splat (i1 true)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]] ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] -; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP17]] -; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP25]] +; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP25]] +; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP17]] ; STRIDED-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; STRIDED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll index 8acf89abae2d7..6c7a882567a64 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll @@ -27,8 +27,8 @@ define ptr @foo(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> +; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], i64 0 ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16384 @@ -85,8 +85,8 @@ define ptr @bar(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> +; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], i64 0 ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16384 diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index a4f2b077cb066..9531802f830fa 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -317,8 +317,8 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; DEFAULT-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; DEFAULT-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16 +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; DEFAULT-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; DEFAULT: middle.block: @@ -373,8 +373,8 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { ; STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; STRIDED-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4 ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16 +; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; STRIDED: middle.block: From ecb3fa59f4df8dd56ff3dfad983e2e11b7865409 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Sat, 12 Jul 2025 02:51:25 +0800 Subject: [PATCH 03/16] Rename method --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b96ac9f36bcd3..43c05b5f60fcf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2696,9 +2696,8 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, /// EMIT %ptr.ind = ptradd %pointer.phi, %vf /// EMIT branch-on-count ... /// } -static void -expandVPWidenPointerInductionRecipe(VPWidenPointerInductionRecipe *R, - VPTypeAnalysis &TypeInfo) { +static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, + VPTypeAnalysis &TypeInfo) { VPlan *Plan = R->getParent()->getPlan(); assert(R->getInductionDescriptor().getKind() == @@ -2813,7 +2812,7 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, } if (auto *WidenIVR = dyn_cast(&R)) { - expandVPWidenPointerInductionRecipe(WidenIVR, TypeInfo); + expandVPWidenPointerInduction(WidenIVR, TypeInfo); ToRemove.push_back(WidenIVR); continue; } From 3b38332a2efa44a59e3061775d3f6eea41f0e766 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 14 Jul 2025 22:50:06 +0800 Subject: [PATCH 04/16] Add new separate opcode, VPInstruction::WidePtrAdd --- .../Vectorize/LoopVectorizationPlanner.h | 7 +++++ llvm/lib/Transforms/Vectorize/VPlan.h | 11 ++++---- .../Transforms/Vectorize/VPlanAnalysis.cpp | 1 + .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 27 +++++++++++++++---- .../Transforms/Vectorize/VPlanTransforms.cpp | 3 ++- 5 files changed, 38 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 11853859484e3..ffdf4b4492b24 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -254,6 +254,13 @@ class VPBuilder { new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset}, GEPNoWrapFlags::inBounds(), DL, Name)); } + VPInstruction *createWidePtrAdd(VPValue *Ptr, VPValue *Offset, + DebugLoc DL = DebugLoc::getUnknown(), + const Twine &Name = "") { + return tryInsertInstruction( + new VPInstruction(VPInstruction::WidePtrAdd, {Ptr, Offset}, + GEPNoWrapFlags::none(), DL, Name)); + } VPPhi *createScalarPhi(ArrayRef IncomingValues, DebugLoc DL, const Twine &Name = "") { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 6d658287fe738..f11441db662db 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -958,11 +958,12 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, ExtractPenultimateElement, LogicalAnd, // Non-poison propagating logical And. // Add an offset in bytes (second operand) to a base pointer (first - // operand). The base pointer must be scalar, but the offset can be a - // scalar, multiple scalars, or a vector. If the offset is multiple scalars - // then it will generate multiple scalar values (either for the first lane - // only or for all lanes, depending on its uses). + // operand). Only generates scalar values (either for the first lane only or + // for all lanes, depending on its uses). PtrAdd, + // Add a vector offset in bytes (second operand) to a scalar base pointer + // (first operand). + WidePtrAdd, // Returns a scalar boolean value, which is true if any lane of its // (boolean) vector operands is true. It produces the reduced value across // all unrolled iterations. Unrolling will add all copies of its original @@ -1000,7 +1001,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, /// values per all lanes, stemming from an original ingredient. This method /// identifies the (rare) cases of VPInstructions that do so as well, w/o an /// underlying ingredient. - bool doesGeneratePerAllLanes(VPTransformState &State) const; + bool doesGeneratePerAllLanes() const; /// Returns true if we can generate a scalar for the first lane only if /// needed. diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 92db9674ef42b..bc11dba244afe 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -126,6 +126,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { return IntegerType::get(Ctx, 1); case VPInstruction::Broadcast: case VPInstruction::PtrAdd: + case VPInstruction::WidePtrAdd: // Return the type based on first operand. return inferScalarType(R->getOperand(0)); case VPInstruction::BranchOnCond: diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1feb45abaa193..6dd8574a159e6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -475,6 +475,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { case VPInstruction::FirstOrderRecurrenceSplice: case VPInstruction::LogicalAnd: case VPInstruction::PtrAdd: + case VPInstruction::WidePtrAdd: case VPInstruction::WideIVStep: return 2; case Instruction::Select: @@ -494,9 +495,8 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { } #endif -bool VPInstruction::doesGeneratePerAllLanes(VPTransformState &State) const { - return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this) && - !State.hasVectorValue(getOperand(1)); +bool VPInstruction::doesGeneratePerAllLanes() const { + return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this); } bool VPInstruction::canGenerateScalarForFirstLane() const { @@ -514,6 +514,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::PtrAdd: + case VPInstruction::WidePtrAdd: case VPInstruction::ExplicitVectorLength: case VPInstruction::AnyOf: return true; @@ -849,7 +850,14 @@ Value *VPInstruction::generate(VPTransformState &State) { return Builder.CreateLogicalAnd(A, B, Name); } case VPInstruction::PtrAdd: { + assert(vputils::onlyFirstLaneUsed(this) && + "can only generate first lane for PtrAdd"); Value *Ptr = State.get(getOperand(0), VPLane(0)); + Value *Addend = State.get(getOperand(1), VPLane(0)); + return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags()); + } + case VPInstruction::WidePtrAdd: { + Value *Ptr = State.get(getOperand(0), true); Value *Addend = State.get(getOperand(1), vputils::onlyFirstLaneUsed(this)); return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags()); } @@ -910,6 +918,9 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, } } + assert(!doesGeneratePerAllLanes() && + "Should only generate a vector value or single scalar, not scalars " + "for all lanes."); return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind); } @@ -997,7 +1008,7 @@ void VPInstruction::execute(VPTransformState &State) { bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && (vputils::onlyFirstLaneUsed(this) || isVectorToScalar() || isSingleScalar()); - bool GeneratesPerAllLanes = doesGeneratePerAllLanes(State); + bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); if (GeneratesPerAllLanes) { for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue(); Lane != NumLanes; ++Lane) { @@ -1041,6 +1052,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::LogicalAnd: case VPInstruction::Not: case VPInstruction::PtrAdd: + case VPInstruction::WidePtrAdd: case VPInstruction::WideIVStep: case VPInstruction::StepVector: case VPInstruction::ReductionStartVector: @@ -1078,6 +1090,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::ReductionStartVector: return true; case VPInstruction::PtrAdd: + case VPInstruction::WidePtrAdd: return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); case VPInstruction::ComputeAnyOfResult: case VPInstruction::ComputeFindIVResult: @@ -1181,6 +1194,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::PtrAdd: O << "ptradd"; break; + case VPInstruction::WidePtrAdd: + O << "wide-ptradd"; + break; case VPInstruction::AnyOf: O << "any-of"; break; @@ -1765,7 +1781,8 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const { return Opcode == Instruction::AShr; case OperationType::GEPOp: return Opcode == Instruction::GetElementPtr || - Opcode == VPInstruction::PtrAdd; + Opcode == VPInstruction::PtrAdd || + Opcode == VPInstruction::WidePtrAdd; case OperationType::FPMathOp: return Opcode == Instruction::FAdd || Opcode == Instruction::FMul || Opcode == Instruction::FSub || Opcode == Instruction::FNeg || diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 43c05b5f60fcf..e8aa2678339a7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -973,6 +973,7 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode, RFlags.getGEPNoWrapFlags()); } case VPInstruction::PtrAdd: + case VPInstruction::WidePtrAdd: return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0], Ops[1], cast(R).getGEPNoWrapFlags()); @@ -2767,7 +2768,7 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, {StartOffset, Builder.createNaryOp(VPInstruction::StepVector, {}, PhiType)}); - VPValue *PtrAdd = Builder.createPtrAdd( + VPValue *PtrAdd = Builder.createWidePtrAdd( Phi, Builder.createNaryOp(Instruction::Mul, {StartOffset, R->getStepValue()}), DL, "vector.gep"); From 0315ec12d33e431e4d019cca2361f7964755354e Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 17 Jul 2025 19:32:05 +0800 Subject: [PATCH 05/16] Maintain previous behaviour by always generating vector for WidePtrAdd --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 6 +++--- .../Transforms/LoopVectorize/AArch64/sve-widen-gep.ll | 9 ++++----- llvm/test/Transforms/LoopVectorize/X86/pr48340.ll | 4 ++-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 6dd8574a159e6..4f7a8741a72a0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -514,7 +514,6 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::PtrAdd: - case VPInstruction::WidePtrAdd: case VPInstruction::ExplicitVectorLength: case VPInstruction::AnyOf: return true; @@ -858,7 +857,7 @@ Value *VPInstruction::generate(VPTransformState &State) { } case VPInstruction::WidePtrAdd: { Value *Ptr = State.get(getOperand(0), true); - Value *Addend = State.get(getOperand(1), vputils::onlyFirstLaneUsed(this)); + Value *Addend = State.get(getOperand(1)); return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags()); } case VPInstruction::AnyOf: { @@ -1090,8 +1089,9 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::ReductionStartVector: return true; case VPInstruction::PtrAdd: - case VPInstruction::WidePtrAdd: return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); + case VPInstruction::WidePtrAdd: + return Op == getOperand(0); case VPInstruction::ComputeAnyOfResult: case VPInstruction::ComputeFindIVResult: return Op == getOperand(1); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index 5aee65fd1c59d..4fa2cd7ae93b8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -162,11 +162,10 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP20:%.*]] = extractelement [[DOTSPLAT]], i32 0 -; CHECK-NEXT: [[TMP21:%.*]] = extractelement [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP20]], [[TMP21]] -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 1 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP13:%.*]] = add [[DOTSPLAT]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = mul [[TMP13]], splat (i64 1) +; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[VECTOR_GEP]], i32 0 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP16]], align 1 ; CHECK-NEXT: [[TMP17:%.*]] = add [[WIDE_LOAD]], splat (i8 1) diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll index 6c7a882567a64..e5400fb8c7cb7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll @@ -28,7 +28,7 @@ define ptr @foo(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> -; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], i64 0 +; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16384 @@ -86,7 +86,7 @@ define ptr @bar(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> -; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], i64 0 +; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16384 From 3adca50ded00e70967f7405b1d26cc3a09eeaa47 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 22 Jul 2025 22:25:37 +0800 Subject: [PATCH 06/16] Use VPUnrollPartAccessor --- llvm/lib/Transforms/Vectorize/VPlan.h | 13 +++++++++++-- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 + llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 7 ++----- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index f11441db662db..35dd79d4c9f2a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -873,7 +873,7 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { /// Helper to access the operand that contains the unroll part for this recipe /// after unrolling. template class LLVM_ABI_FOR_TEST VPUnrollPartAccessor { -protected: +public: /// Return the VPValue operand containing the unroll part or null if there is /// no such operand. VPValue *getUnrollPartOperand(VPUser &U) const; @@ -2067,7 +2067,8 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { } }; -class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe { +class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe, + public VPUnrollPartAccessor<4> { bool IsScalarAfterVectorization; public: @@ -2095,6 +2096,7 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe { VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC) + /// Generate vector values for the pointer induction. void execute(VPTransformState &State) override { llvm_unreachable("cannot execute this recipe, should be expanded via " "expandVPWidenIntOrFpInductionRecipe"); @@ -2103,6 +2105,13 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe { /// Returns true if only scalar values will be generated. bool onlyScalarsGenerated(bool IsScalable); + /// Returns the VPValue representing the value of this induction at + /// the first unrolled part, if it exists. Returns itself if unrolling did not + /// take place. + VPValue *getFirstUnrolledPartOperand() { + return getUnrollPart(*this) == 0 ? this : getOperand(3); + } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 4f7a8741a72a0..8c91567ddb05f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -427,6 +427,7 @@ unsigned VPUnrollPartAccessor::getUnrollPart(VPUser &U) const { namespace llvm { template class VPUnrollPartAccessor<2>; template class VPUnrollPartAccessor<3>; +template class VPUnrollPartAccessor<4>; } VPInstruction::VPInstruction(unsigned Opcode, ArrayRef Operands, diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index e8aa2678339a7..88bbcd73f6498 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2708,10 +2708,7 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, assert(!R->onlyScalarsGenerated(Plan->hasScalableVF()) && "Recipe should have been replaced"); - unsigned CurrentPart = 0; - if (R->getNumOperands() > 3) - CurrentPart = - cast(R->getOperand(4)->getLiveInIRValue())->getZExtValue(); + unsigned CurrentPart = R->getUnrollPart(*R); VPBuilder Builder(R); DebugLoc DL = R->getDebugLoc(); @@ -2724,7 +2721,7 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, } else { // The recipe has been unrolled. In that case, fetch the single pointer phi // shared among all unrolled parts of the recipe. - auto *PtrAdd = cast(R->getOperand(3)); + auto *PtrAdd = cast(R->getFirstUnrolledPartOperand()); Phi = cast(PtrAdd->getOperand(0)->getDefiningRecipe()); } From 628c26b80d95f7287dbf3cba3cf19ffcd4852538 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 22 Jul 2025 22:27:53 +0800 Subject: [PATCH 07/16] Use DL --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 88bbcd73f6498..5191233d01f85 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2716,8 +2716,7 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, // Build a pointer phi VPPhi *Phi; if (CurrentPart == 0) { - Phi = Builder.createScalarPhi({R->getStartValue()}, R->getDebugLoc(), - "pointer.phi"); + Phi = Builder.createScalarPhi({R->getStartValue()}, DL, "pointer.phi"); } else { // The recipe has been unrolled. In that case, fetch the single pointer phi // shared among all unrolled parts of the recipe. From 50454c5cdb12fce7771db4e2350a8d4e5051de8e Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 22 Jul 2025 22:29:32 +0800 Subject: [PATCH 08/16] Fix add -> mul typo in comments --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 5191233d01f85..c36b1758c1ac3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2692,7 +2692,7 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, /// vector.body: /// EMIT-SCALAR %pointer.phi = phi %start, %ptr.ind /// EMIT %mul = mul %stepvector, %step -/// EMIT %vector.gep = ptradd %pointer.phi, %add +/// EMIT %vector.gep = ptradd %pointer.phi, %mul /// ... /// EMIT %ptr.ind = ptradd %pointer.phi, %vf /// EMIT branch-on-count ... From 5bf2d79235acd0b49c559f188406f7dc72a71151 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 28 Jul 2025 10:16:06 +0800 Subject: [PATCH 09/16] Don't make VPUnrollPart public, add method for VPWidenPointerInductionRecipe::getCurrentPart --- llvm/lib/Transforms/Vectorize/VPlan.h | 5 ++++- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 35dd79d4c9f2a..73c1723383620 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -873,7 +873,7 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { /// Helper to access the operand that contains the unroll part for this recipe /// after unrolling. template class LLVM_ABI_FOR_TEST VPUnrollPartAccessor { -public: +protected: /// Return the VPValue operand containing the unroll part or null if there is /// no such operand. VPValue *getUnrollPartOperand(VPUser &U) const; @@ -2105,6 +2105,9 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe, /// Returns true if only scalar values will be generated. bool onlyScalarsGenerated(bool IsScalable); + /// Returns the unroll part. + unsigned getCurrentPart() { return getUnrollPart(*this); } + /// Returns the VPValue representing the value of this induction at /// the first unrolled part, if it exists. Returns itself if unrolling did not /// take place. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index c36b1758c1ac3..a7f9f7230e7ee 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2708,7 +2708,7 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, assert(!R->onlyScalarsGenerated(Plan->hasScalableVF()) && "Recipe should have been replaced"); - unsigned CurrentPart = R->getUnrollPart(*R); + unsigned CurrentPart = R->getCurrentPart(); VPBuilder Builder(R); DebugLoc DL = R->getDebugLoc(); From e1ca024ed686afe9af6258c34d8ddcacdb4e825f Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 28 Jul 2025 18:30:37 +0800 Subject: [PATCH 10/16] Address comments --- .../Transforms/Vectorize/VPlanTransforms.cpp | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b9bd30ee08b27..b4059634dd4d0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2717,20 +2717,21 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, VPBuilder Builder(R); DebugLoc DL = R->getDebugLoc(); - // Build a pointer phi - VPPhi *Phi; + // Build a pointer phi. + VPPhi *ScalarPtrPhi; if (CurrentPart == 0) { - Phi = Builder.createScalarPhi({R->getStartValue()}, DL, "pointer.phi"); + ScalarPtrPhi = + Builder.createScalarPhi({R->getStartValue()}, DL, "pointer.phi"); } else { // The recipe has been unrolled. In that case, fetch the single pointer phi // shared among all unrolled parts of the recipe. auto *PtrAdd = cast(R->getFirstUnrolledPartOperand()); - Phi = cast(PtrAdd->getOperand(0)->getDefiningRecipe()); + ScalarPtrPhi = cast(PtrAdd->getOperand(0)->getDefiningRecipe()); } Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi()); - // A pointer induction, performed by using a gep + // A pointer induction, performed by using a gep. Type *PhiType = TypeInfo.inferScalarType(R->getStepValue()); VPValue *RuntimeVF = Builder.createScalarZExtOrTrunc( &Plan->getVF(), PhiType, TypeInfo.inferScalarType(&Plan->getVF()), DL); @@ -2749,8 +2750,9 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator()); - VPValue *InductionGEP = Builder.createPtrAdd(Phi, Offset, DL, "ptr.ind"); - Phi->addOperand(InductionGEP); + VPValue *InductionGEP = + Builder.createPtrAdd(ScalarPtrPhi, Offset, DL, "ptr.ind"); + ScalarPtrPhi->addOperand(InductionGEP); } VPValue *CurrentPartV = @@ -2762,14 +2764,14 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, Builder.createNaryOp(Instruction::Mul, {RuntimeVF, CurrentPartV}); VPValue *StartOffset = Builder.createNaryOp(VPInstruction::Broadcast, StartOffsetScalar); - // Create a vector of consecutive numbers from zero to VF. + // Create a vector of consecutive numbers from StartOffset to StartOffset+VF. StartOffset = Builder.createNaryOp( Instruction::Add, {StartOffset, Builder.createNaryOp(VPInstruction::StepVector, {}, PhiType)}); VPValue *PtrAdd = Builder.createWidePtrAdd( - Phi, + ScalarPtrPhi, Builder.createNaryOp(Instruction::Mul, {StartOffset, R->getStepValue()}), DL, "vector.gep"); From 4a25f751d6db4a0eadfa23a9bfce47c6bb30a77a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 28 Jul 2025 18:31:16 +0800 Subject: [PATCH 11/16] Rename PhiType -> OffsetTy --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b4059634dd4d0..56aca1e668950 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2732,14 +2732,14 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi()); // A pointer induction, performed by using a gep. - Type *PhiType = TypeInfo.inferScalarType(R->getStepValue()); + Type *OffsetTy = TypeInfo.inferScalarType(R->getStepValue()); VPValue *RuntimeVF = Builder.createScalarZExtOrTrunc( - &Plan->getVF(), PhiType, TypeInfo.inferScalarType(&Plan->getVF()), DL); + &Plan->getVF(), OffsetTy, TypeInfo.inferScalarType(&Plan->getVF()), DL); if (CurrentPart == 0) { // The recipe represents the first part of the pointer induction. Create the // GEP to increment the phi across all unrolled parts. VPValue *NumUnrolledElems = Builder.createScalarZExtOrTrunc( - R->getOperand(2), PhiType, TypeInfo.inferScalarType(R->getOperand(2)), + R->getOperand(2), OffsetTy, TypeInfo.inferScalarType(R->getOperand(2)), DL); VPValue *Offset = Builder.createNaryOp( Instruction::Mul, {R->getStepValue(), NumUnrolledElems}); @@ -2756,7 +2756,7 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, } VPValue *CurrentPartV = - Plan->getOrAddLiveIn(ConstantInt::get(PhiType, CurrentPart)); + Plan->getOrAddLiveIn(ConstantInt::get(OffsetTy, CurrentPart)); // Create actual address geps that use the pointer phi as base and a // vectorized version of the step value () as offset. @@ -2768,7 +2768,7 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, StartOffset = Builder.createNaryOp( Instruction::Add, {StartOffset, - Builder.createNaryOp(VPInstruction::StepVector, {}, PhiType)}); + Builder.createNaryOp(VPInstruction::StepVector, {}, OffsetTy)}); VPValue *PtrAdd = Builder.createWidePtrAdd( ScalarPtrPhi, From 03369e207c9a822cd1a42a11667f4574726ff2dd Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 28 Jul 2025 21:59:59 +0800 Subject: [PATCH 12/16] Reuse VPUnroll infrastructure --- llvm/lib/Transforms/Vectorize/VPlan.h | 26 +----- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 +- .../Transforms/Vectorize/VPlanTransforms.cpp | 82 ++++++------------- llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 33 ++++---- .../LoopVectorize/AArch64/sve-widen-gep.ll | 12 +-- .../LoopVectorize/ARM/pointer_iv.ll | 8 +- .../LoopVectorize/RISCV/strided-accesses.ll | 18 +--- .../Transforms/LoopVectorize/X86/pr48340.ll | 8 +- 8 files changed, 63 insertions(+), 128 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8833e9f597492..c0501fda77342 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1982,6 +1982,9 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe { /// Update the step value of the recipe. void setStepValue(VPValue *V) { setOperand(1, V); } + VPValue *getVFValue() { return getOperand(2); } + const VPValue *getVFValue() const { return getOperand(2); } + /// Returns the number of incoming values, also number of incoming blocks. /// Note that at the moment, VPWidenPointerInductionRecipe only has a single /// incoming value, its start value. @@ -2071,9 +2074,6 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { VPSlotTracker &SlotTracker) const override; #endif - VPValue *getVFValue() { return getOperand(2); } - const VPValue *getVFValue() const { return getOperand(2); } - VPValue *getSplatVFValue() { // If the recipe has been unrolled return the VPValue for the induction // increment. @@ -2100,17 +2100,9 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { return Trunc ? Trunc->getType() : getStartValue()->getLiveInIRValue()->getType(); } - - /// Returns the VPValue representing the value of this induction at - /// the last unrolled part, if it exists. Returns itself if unrolling did not - /// take place. - VPValue *getLastUnrolledPartOperand() { - return isUnrolled() ? getOperand(getNumOperands() - 1) : this; - } }; -class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe, - public VPUnrollPartAccessor<4> { +class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe { bool IsScalarAfterVectorization; public: @@ -2147,16 +2139,6 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe, /// Returns true if only scalar values will be generated. bool onlyScalarsGenerated(bool IsScalable); - /// Returns the unroll part. - unsigned getCurrentPart() { return getUnrollPart(*this); } - - /// Returns the VPValue representing the value of this induction at - /// the first unrolled part, if it exists. Returns itself if unrolling did not - /// take place. - VPValue *getFirstUnrolledPartOperand() { - return getUnrollPart(*this) == 0 ? this : getOperand(3); - } - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index d368d70917228..64a2b52fb429c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -430,7 +430,6 @@ namespace llvm { template class VPUnrollPartAccessor<1>; template class VPUnrollPartAccessor<2>; template class VPUnrollPartAccessor<3>; -template class VPUnrollPartAccessor<4>; } VPInstruction::VPInstruction(unsigned Opcode, ArrayRef Operands, @@ -861,7 +860,8 @@ Value *VPInstruction::generate(VPTransformState &State) { return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags()); } case VPInstruction::WidePtrAdd: { - Value *Ptr = State.get(getOperand(0), true); + Value *Ptr = + State.get(getOperand(0), vputils::isSingleScalar(getOperand(0))); Value *Addend = State.get(getOperand(1)); return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 56aca1e668950..423a7b71dd010 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2696,7 +2696,7 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, /// vector.body: /// EMIT-SCALAR %pointer.phi = phi %start, %ptr.ind /// EMIT %mul = mul %stepvector, %step -/// EMIT %vector.gep = ptradd %pointer.phi, %mul +/// EMIT %vector.gep = wide-ptradd %pointer.phi, %mul /// ... /// EMIT %ptr.ind = ptradd %pointer.phi, %vf /// EMIT branch-on-count ... @@ -2704,6 +2704,9 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, VPTypeAnalysis &TypeInfo) { VPlan *Plan = R->getParent()->getPlan(); + VPValue *Start = R->getStartValue(); + VPValue *Step = R->getStepValue(); + VPValue *VF = R->getVFValue(); assert(R->getInductionDescriptor().getKind() == InductionDescriptor::IK_PtrInduction && @@ -2712,70 +2715,33 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, assert(!R->onlyScalarsGenerated(Plan->hasScalableVF()) && "Recipe should have been replaced"); - unsigned CurrentPart = R->getCurrentPart(); - VPBuilder Builder(R); DebugLoc DL = R->getDebugLoc(); - // Build a pointer phi. - VPPhi *ScalarPtrPhi; - if (CurrentPart == 0) { - ScalarPtrPhi = - Builder.createScalarPhi({R->getStartValue()}, DL, "pointer.phi"); - } else { - // The recipe has been unrolled. In that case, fetch the single pointer phi - // shared among all unrolled parts of the recipe. - auto *PtrAdd = cast(R->getFirstUnrolledPartOperand()); - ScalarPtrPhi = cast(PtrAdd->getOperand(0)->getDefiningRecipe()); - } - - Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi()); - - // A pointer induction, performed by using a gep. - Type *OffsetTy = TypeInfo.inferScalarType(R->getStepValue()); - VPValue *RuntimeVF = Builder.createScalarZExtOrTrunc( - &Plan->getVF(), OffsetTy, TypeInfo.inferScalarType(&Plan->getVF()), DL); - if (CurrentPart == 0) { - // The recipe represents the first part of the pointer induction. Create the - // GEP to increment the phi across all unrolled parts. - VPValue *NumUnrolledElems = Builder.createScalarZExtOrTrunc( - R->getOperand(2), OffsetTy, TypeInfo.inferScalarType(R->getOperand(2)), - DL); - VPValue *Offset = Builder.createNaryOp( - Instruction::Mul, {R->getStepValue(), NumUnrolledElems}); - - VPBuilder::InsertPointGuard Guard(Builder); - VPBasicBlock *ExitingBB = - Plan->getVectorLoopRegion()->getExitingBasicBlock(); - Builder.setInsertPoint(ExitingBB, - ExitingBB->getTerminator()->getIterator()); - - VPValue *InductionGEP = - Builder.createPtrAdd(ScalarPtrPhi, Offset, DL, "ptr.ind"); - ScalarPtrPhi->addOperand(InductionGEP); - } - - VPValue *CurrentPartV = - Plan->getOrAddLiveIn(ConstantInt::get(OffsetTy, CurrentPart)); + // Build a scalar pointer phi. + VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start, DL, "pointer.phi"); // Create actual address geps that use the pointer phi as base and a // vectorized version of the step value () as offset. - VPValue *StartOffsetScalar = - Builder.createNaryOp(Instruction::Mul, {RuntimeVF, CurrentPartV}); - VPValue *StartOffset = - Builder.createNaryOp(VPInstruction::Broadcast, StartOffsetScalar); - // Create a vector of consecutive numbers from StartOffset to StartOffset+VF. - StartOffset = Builder.createNaryOp( - Instruction::Add, - {StartOffset, - Builder.createNaryOp(VPInstruction::StepVector, {}, OffsetTy)}); - - VPValue *PtrAdd = Builder.createWidePtrAdd( - ScalarPtrPhi, - Builder.createNaryOp(Instruction::Mul, {StartOffset, R->getStepValue()}), - DL, "vector.gep"); - + Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi()); + Type *StepTy = TypeInfo.inferScalarType(Step); + VPValue *Offset = Builder.createNaryOp(VPInstruction::StepVector, {}, StepTy); + Offset = Builder.createNaryOp(Instruction::Mul, {Offset, Step}); + VPValue *PtrAdd = Builder.createNaryOp( + VPInstruction::WidePtrAdd, {ScalarPtrPhi, Offset}, DL, "vector.gep"); R->replaceAllUsesWith(PtrAdd); + + // Create the backedge value for the scalar pointer phi. + Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi()); + VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.inferScalarType(VF), + DL); + VPValue *Inc = Builder.createNaryOp(Instruction::Mul, {Step, VF}); + + VPBasicBlock *ExitingBB = Plan->getVectorLoopRegion()->getExitingBasicBlock(); + Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator()); + VPValue *InductionGEP = + Builder.createPtrAdd(ScalarPtrPhi, Inc, DL, "ptr.ind"); + ScalarPtrPhi->addOperand(InductionGEP); } void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 871e37ef3966a..fc072de8ff78e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -65,7 +65,7 @@ class UnrollState { /// Unroll a widen induction recipe \p IV. This introduces recipes to compute /// the induction steps for each part. - void unrollWidenInductionByUF(VPWidenIntOrFpInductionRecipe *IV, + void unrollWidenInductionByUF(VPWidenInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi); VPValue *getConstantVPV(unsigned Part) { @@ -148,7 +148,7 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) { } void UnrollState::unrollWidenInductionByUF( - VPWidenIntOrFpInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi) { + VPWidenInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi) { VPBasicBlock *PH = cast( IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor()); Type *IVTy = TypeInfo.inferScalarType(IV); @@ -159,9 +159,11 @@ void UnrollState::unrollWidenInductionByUF( VPValue *ScalarStep = IV->getStepValue(); VPBuilder Builder(PH); + Type *VectorStepTy = + IVTy->isPointerTy() ? TypeInfo.inferScalarType(ScalarStep) : IVTy; VPInstruction *VectorStep = Builder.createNaryOp( - VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, IVTy, Flags, - IV->getDebugLoc()); + VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, VectorStepTy, + Flags, IV->getDebugLoc()); ToSkip.insert(VectorStep); @@ -169,8 +171,8 @@ void UnrollState::unrollWidenInductionByUF( // remains the header phi. Parts > 0 are computed by adding Step to the // previous part. The header phi recipe will get 2 new operands: the step // value for a single part and the last part, used to compute the backedge - // value during VPWidenIntOrFpInductionRecipe::execute. %Part.0 = - // VPWidenIntOrFpInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3 + // value during VPWidenInductionRecipe::execute. + // %Part.0 = VPWidenInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3 // %Part.1 = %Part.0 + %VectorStep // %Part.2 = %Part.1 + %VectorStep // %Part.3 = %Part.2 + %VectorStep @@ -179,8 +181,13 @@ void UnrollState::unrollWidenInductionByUF( // again. VPValue *Prev = IV; Builder.setInsertPoint(IV->getParent(), InsertPtForPhi); - unsigned AddOpc = - IVTy->isFloatingPointTy() ? ID.getInductionOpcode() : Instruction::Add; + unsigned AddOpc; + if (IVTy->isPointerTy()) + AddOpc = VPInstruction::WidePtrAdd; + else if (IVTy->isFloatingPointTy()) + AddOpc = ID.getInductionOpcode(); + else + AddOpc = Instruction::Add; for (unsigned Part = 1; Part != UF; ++Part) { std::string Name = Part > 1 ? "step.add." + std::to_string(Part) : "step.add"; @@ -207,7 +214,7 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R, return; // Generate step vectors for each unrolled part. - if (auto *IV = dyn_cast(R)) { + if (auto *IV = dyn_cast(R)) { unrollWidenInductionByUF(IV, InsertPtForPhi); return; } @@ -221,10 +228,7 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R, VPRecipeBase *Copy = R->clone(); Copy->insertBefore(*R->getParent(), InsertPt); addRecipeForPart(R, Copy, Part); - if (isa(R)) { - Copy->addOperand(R); - Copy->addOperand(getConstantVPV(Part)); - } else if (RdxPhi) { + if (RdxPhi) { // If the start value is a ReductionStartVector, use the identity value // (second operand) for unrolled parts. If the scaling factor is > 1, // create a new ReductionStartVector with the scale factor and both @@ -450,8 +454,7 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) { Unroller.remapOperand(&H, 1, UF - 1); continue; } - if (Unroller.contains(H.getVPSingleValue()) || - isa(&H)) { + if (Unroller.contains(H.getVPSingleValue())) { Part = 1; continue; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index c45b24fb64e30..9929f35d47dac 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -36,12 +36,8 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP6]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP6]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = call @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP14:%.*]] = add [[DOTSPLAT]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = mul [[TMP14]], splat (i64 1) +; CHECK-NEXT: [[TMP15:%.*]] = mul [[TMP13]], splat (i64 1) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP15]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[OFFSET_IDX]] @@ -124,12 +120,8 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP6]] -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP6]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP13:%.*]] = add [[DOTSPLAT]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = mul [[TMP13]], splat (i64 1) +; CHECK-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i64 1) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP14]] ; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[VECTOR_GEP]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP15]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll index c62dcba7c1302..a13256139d7cd 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll @@ -745,8 +745,8 @@ define hidden void @pointer_phi_v4i32_uf2(ptr noalias nocapture readonly %A, ptr ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, <4 x ptr> [[TMP0]], i32 96 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) @@ -810,10 +810,10 @@ define hidden void @pointer_phi_v4i32_uf4(ptr noalias nocapture readonly %A, ptr ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, <4 x ptr> [[TMP0]], i32 96 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <4 x ptr> [[TMP0]], i32 192 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <4 x ptr> [[TMP0]], i32 288 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index d4281f655786e..d602262b38bc5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -171,12 +171,8 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP8]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP8]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP13]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP15:%.*]] = add [[DOTSPLAT]], [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = mul [[TMP15]], splat (i64 8) +; CHECK-NEXT: [[TMP16:%.*]] = mul [[TMP14]], splat (i64 8) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP16]] ; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[VECTOR_GEP]], i32 0 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP17]], align 4 @@ -758,22 +754,14 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[POINTER_PHI11:%.*]] = phi ptr [ [[P2]], [[VECTOR_PH]] ], [ [[PTR_IND12:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[STRIDE]], [[TMP13]] -; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP13]], 0 -; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP18]], i64 0 -; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; STRIDED-NEXT: [[TMP19:%.*]] = call @llvm.stepvector.nxv4i64() -; STRIDED-NEXT: [[TMP20:%.*]] = add [[DOTSPLAT]], [[TMP19]] ; STRIDED-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector [[DOTSPLATINSERT9]], poison, zeroinitializer -; STRIDED-NEXT: [[TMP21:%.*]] = mul [[TMP20]], [[DOTSPLAT10]] +; STRIDED-NEXT: [[TMP21:%.*]] = mul [[TMP19]], [[DOTSPLAT10]] ; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[TMP21]] ; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP13]] -; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP13]], 0 -; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement poison, i64 [[TMP26]], i64 0 -; STRIDED-NEXT: [[DOTSPLAT14:%.*]] = shufflevector [[DOTSPLATINSERT13]], poison, zeroinitializer ; STRIDED-NEXT: [[TMP27:%.*]] = call @llvm.stepvector.nxv4i64() -; STRIDED-NEXT: [[TMP28:%.*]] = add [[DOTSPLAT14]], [[TMP27]] -; STRIDED-NEXT: [[TMP29:%.*]] = mul [[TMP28]], [[DOTSPLAT10]] +; STRIDED-NEXT: [[TMP29:%.*]] = mul [[TMP27]], [[DOTSPLAT10]] ; STRIDED-NEXT: [[VECTOR_GEP11:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP29]] ; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[VECTOR_GEP11]], i32 4, splat (i1 true), poison), !alias.scope [[META15:![0-9]+]] ; STRIDED-NEXT: [[TMP30:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll index e5400fb8c7cb7..6e7c6fbd6fa02 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll @@ -27,8 +27,10 @@ define ptr @foo(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> +; CHECK-NEXT: [[STEP_ADD:%.*]] = getelementptr i8, <4 x ptr> [[VECTOR_GEP4]], <4 x i64> splat (i64 4096) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = getelementptr i8, <4 x ptr> [[STEP_ADD]], <4 x i64> splat (i64 4096) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, <4 x ptr> [[STEP_ADD_2]], <4 x i64> splat (i64 4096) ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16384 @@ -85,8 +87,10 @@ define ptr @bar(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> ; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> +; CHECK-NEXT: [[STEP_ADD:%.*]] = getelementptr i8, <4 x ptr> [[VECTOR_GEP4]], <4 x i64> splat (i64 4096) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = getelementptr i8, <4 x ptr> [[STEP_ADD]], <4 x i64> splat (i64 4096) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, <4 x ptr> [[STEP_ADD_2]], <4 x i64> splat (i64 4096) ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16384 From 1c9e58a6caa5f5d23a09cf43977bfdc0a15f0b83 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 28 Jul 2025 22:24:25 +0800 Subject: [PATCH 13/16] Add back method accidentally removed --- llvm/lib/Transforms/Vectorize/VPlan.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c0501fda77342..d5a564d929ab4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2100,6 +2100,13 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { return Trunc ? Trunc->getType() : getStartValue()->getLiveInIRValue()->getType(); } + + /// Returns the VPValue representing the value of this induction at + /// the last unrolled part, if it exists. Returns itself if unrolling did not + /// take place. + VPValue *getLastUnrolledPartOperand() { + return isUnrolled() ? getOperand(getNumOperands() - 1) : this; + } }; class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe { From 73973fe2004c46cd52f4554508e96bf645a0c22e Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 28 Jul 2025 22:25:23 +0800 Subject: [PATCH 14/16] Undo some regenerated names in tests --- .../LoopVectorize/ARM/pointer_iv.ll | 28 +++++++++---------- .../LoopVectorize/RISCV/strided-accesses.ll | 10 +++---- .../Transforms/LoopVectorize/X86/pr48340.ll | 8 +++--- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll index a13256139d7cd..e8811253847be 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll @@ -62,8 +62,8 @@ define hidden void @pointer_phi_v4i32_add2(ptr noalias nocapture readonly %A, pt ; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[NEXT_GEP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 -; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.body: ; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ] @@ -116,8 +116,8 @@ define hidden void @pointer_phi_v4i32_add3(ptr noalias nocapture readonly %A, pt ; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 48 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 -; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 +; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: for.body: ; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ] @@ -212,8 +212,8 @@ define hidden void @pointer_phi_v8i16_add2(ptr noalias nocapture readonly %A, pt ; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[NEXT_GEP5]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: for.body: ; CHECK-NEXT: [[A_ADDR_011:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ] @@ -495,8 +495,8 @@ define hidden void @pointer_phi_v4f32_add2(ptr noalias nocapture readonly %A, pt ; CHECK-NEXT: [[TMP0:%.*]] = fadd fast <4 x float> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[NEXT_GEP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 -; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: for.body: ; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ] @@ -549,8 +549,8 @@ define hidden void @pointer_phi_v4f32_add3(ptr noalias nocapture readonly %A, pt ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 48 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 -; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 +; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: for.body: ; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ] @@ -642,8 +642,8 @@ define hidden void @pointer_phi_v4half_add2(ptr noalias nocapture readonly %A, p ; CHECK-NEXT: [[TMP0:%.*]] = fadd fast <8 x half> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: store <8 x half> [[TMP0]], ptr [[NEXT_GEP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: for.body: ; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ] @@ -696,8 +696,8 @@ define hidden void @pointer_phi_v4half_add3(ptr noalias nocapture readonly %A, p ; CHECK-NEXT: [[TMP0:%.*]] = fadd fast <8 x half> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: store <8 x half> [[TMP0]], ptr [[NEXT_GEP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: for.body: ; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index d602262b38bc5..9e492c62a5577 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -757,13 +757,13 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP19:%.*]] = call @llvm.stepvector.nxv4i64() ; STRIDED-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector [[DOTSPLATINSERT9]], poison, zeroinitializer -; STRIDED-NEXT: [[TMP21:%.*]] = mul [[TMP19]], [[DOTSPLAT10]] -; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[TMP21]] +; STRIDED-NEXT: [[TMP18:%.*]] = mul [[TMP19]], [[DOTSPLAT10]] +; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[TMP18]] ; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP13]] ; STRIDED-NEXT: [[TMP27:%.*]] = call @llvm.stepvector.nxv4i64() -; STRIDED-NEXT: [[TMP29:%.*]] = mul [[TMP27]], [[DOTSPLAT10]] -; STRIDED-NEXT: [[VECTOR_GEP11:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP29]] -; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[VECTOR_GEP11]], i32 4, splat (i1 true), poison), !alias.scope [[META15:![0-9]+]] +; STRIDED-NEXT: [[TMP21:%.*]] = mul [[TMP27]], [[DOTSPLAT10]] +; STRIDED-NEXT: [[VECTOR_GEP7:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP21]] +; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[VECTOR_GEP7]], i32 4, splat (i1 true), poison), !alias.scope [[META15:![0-9]+]] ; STRIDED-NEXT: [[TMP30:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP30]], [[VECTOR_GEP]], i32 4, splat (i1 true)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]] ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll index 6e7c6fbd6fa02..b6acf387fb658 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll @@ -27,8 +27,8 @@ define ptr @foo(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> -; CHECK-NEXT: [[STEP_ADD:%.*]] = getelementptr i8, <4 x ptr> [[VECTOR_GEP4]], <4 x i64> splat (i64 4096) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> +; CHECK-NEXT: [[STEP_ADD:%.*]] = getelementptr i8, <4 x ptr> [[TMP5]], <4 x i64> splat (i64 4096) ; CHECK-NEXT: [[STEP_ADD_2:%.*]] = getelementptr i8, <4 x ptr> [[STEP_ADD]], <4 x i64> splat (i64 4096) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, <4 x ptr> [[STEP_ADD_2]], <4 x i64> splat (i64 4096) ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) @@ -87,8 +87,8 @@ define ptr @bar(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> -; CHECK-NEXT: [[STEP_ADD:%.*]] = getelementptr i8, <4 x ptr> [[VECTOR_GEP4]], <4 x i64> splat (i64 4096) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> +; CHECK-NEXT: [[STEP_ADD:%.*]] = getelementptr i8, <4 x ptr> [[TMP5]], <4 x i64> splat (i64 4096) ; CHECK-NEXT: [[STEP_ADD_2:%.*]] = getelementptr i8, <4 x ptr> [[STEP_ADD]], <4 x i64> splat (i64 4096) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, <4 x ptr> [[STEP_ADD_2]], <4 x i64> splat (i64 4096) ; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison) From 564ede882a8af1925920a57fa5f522f0b5933a9e Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 4 Aug 2025 09:39:28 +0800 Subject: [PATCH 15/16] Fix typo in comment --- llvm/lib/Transforms/Vectorize/VPlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index d5a564d929ab4..358055021dceb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2140,7 +2140,7 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe { /// Generate vector values for the pointer induction. void execute(VPTransformState &State) override { llvm_unreachable("cannot execute this recipe, should be expanded via " - "expandVPWidenIntOrFpInductionRecipe"); + "expandVPWidenPointerInduction"); }; /// Returns true if only scalar values will be generated. From 3b1f70923a574f8728be3308438a2915822cdf38 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 5 Aug 2025 16:15:59 +0800 Subject: [PATCH 16/16] Fix case order --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 90ccf8ec6d1fc..47a807794eb3d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1091,8 +1091,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::LogicalAnd: case VPInstruction::Not: case VPInstruction::PtrAdd: - case VPInstruction::WidePtrAdd: case VPInstruction::WideIVStep: + case VPInstruction::WidePtrAdd: case VPInstruction::StepVector: case VPInstruction::ReductionStartVector: return false;