Skip to content

Commit 61e4aec

Browse files
committed
[VPlan] Expand VPWidenPointerInductionRecipe into separate recipes
Stacked on llvm#148273 to be able to use VPInstruction::PtrAdd. This is the VPWidenPointerInductionRecipe equivalent of llvm#118638, with the motivation of allowing us to use the EVL as the induction step. Most of the new VPlan transformation is a straightforward translation of the existing execute code. VPUnrollPartAccessor unfortunately doesn't work outside of VPlanRecipes.cpp so here the operands are just manually checked to see if they're unrolled.
1 parent 8dec7cf commit 61e4aec

File tree

10 files changed

+156
-165
lines changed

10 files changed

+156
-165
lines changed

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,21 +1043,6 @@ void VPlan::execute(VPTransformState *State) {
10431043
if (isa<VPWidenPHIRecipe>(&R))
10441044
continue;
10451045

1046-
if (auto *WidenPhi = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
1047-
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1048-
"recipe generating only scalars should have been replaced");
1049-
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1050-
PHINode *Phi = cast<PHINode>(GEP->getPointerOperand());
1051-
1052-
Phi->setIncomingBlock(1, VectorLatchBB);
1053-
1054-
// Move the last step to the end of the latch block. This ensures
1055-
// consistent placement of all induction updates.
1056-
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1057-
Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator()));
1058-
continue;
1059-
}
1060-
10611046
auto *PhiR = cast<VPSingleDefRecipe>(&R);
10621047
// VPInstructions currently model scalar Phis only.
10631048
bool NeedsScalar = isa<VPInstruction>(PhiR) ||

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2066,8 +2066,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
20662066
}
20672067
};
20682068

2069-
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
2070-
public VPUnrollPartAccessor<4> {
2069+
class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
20712070
bool IsScalarAfterVectorization;
20722071

20732072
public:
@@ -2095,19 +2094,14 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
20952094

20962095
VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
20972096

2098-
/// Generate vector values for the pointer induction.
2099-
void execute(VPTransformState &State) override;
2097+
void execute(VPTransformState &State) override {
2098+
llvm_unreachable("cannot execute this recipe, should be expanded via "
2099+
"expandVPWidenIntOrFpInductionRecipe");
2100+
};
21002101

21012102
/// Returns true if only scalar values will be generated.
21022103
bool onlyScalarsGenerated(bool IsScalable);
21032104

2104-
/// Returns the VPValue representing the value of this induction at
2105-
/// the first unrolled part, if it exists. Returns itself if unrolling did not
2106-
/// take place.
2107-
VPValue *getFirstUnrolledPartOperand() {
2108-
return getUnrollPart(*this) == 0 ? this : getOperand(3);
2109-
}
2110-
21112105
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
21122106
/// Print the recipe.
21132107
void print(raw_ostream &O, const Twine &Indent,

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 0 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -3686,87 +3686,6 @@ bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
36863686
(!IsScalable || vputils::onlyFirstLaneUsed(this));
36873687
}
36883688

3689-
void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
3690-
assert(getInductionDescriptor().getKind() ==
3691-
InductionDescriptor::IK_PtrInduction &&
3692-
"Not a pointer induction according to InductionDescriptor!");
3693-
assert(State.TypeAnalysis.inferScalarType(this)->isPointerTy() &&
3694-
"Unexpected type.");
3695-
assert(!onlyScalarsGenerated(State.VF.isScalable()) &&
3696-
"Recipe should have been replaced");
3697-
3698-
unsigned CurrentPart = getUnrollPart(*this);
3699-
3700-
// Build a pointer phi
3701-
Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
3702-
Type *ScStValueType = ScalarStartValue->getType();
3703-
3704-
BasicBlock *VectorPH =
3705-
State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
3706-
PHINode *NewPointerPhi = nullptr;
3707-
if (CurrentPart == 0) {
3708-
IRBuilder<>::InsertPointGuard Guard(State.Builder);
3709-
if (State.Builder.GetInsertPoint() !=
3710-
State.Builder.GetInsertBlock()->getFirstNonPHIIt())
3711-
State.Builder.SetInsertPoint(
3712-
State.Builder.GetInsertBlock()->getFirstNonPHIIt());
3713-
NewPointerPhi = State.Builder.CreatePHI(ScStValueType, 2, "pointer.phi");
3714-
NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
3715-
NewPointerPhi->setDebugLoc(getDebugLoc());
3716-
} else {
3717-
// The recipe has been unrolled. In that case, fetch the single pointer phi
3718-
// shared among all unrolled parts of the recipe.
3719-
auto *GEP =
3720-
cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));
3721-
NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());
3722-
}
3723-
3724-
// A pointer induction, performed by using a gep
3725-
BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
3726-
Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
3727-
Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
3728-
Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
3729-
// Add induction update using an incorrect block temporarily. The phi node
3730-
// will be fixed after VPlan execution. Note that at this point the latch
3731-
// block cannot be used, as it does not exist yet.
3732-
// TODO: Model increment value in VPlan, by turning the recipe into a
3733-
// multi-def and a subclass of VPHeaderPHIRecipe.
3734-
if (CurrentPart == 0) {
3735-
// The recipe represents the first part of the pointer induction. Create the
3736-
// GEP to increment the phi across all unrolled parts.
3737-
Value *NumUnrolledElems = State.get(getOperand(2), true);
3738-
3739-
Value *InductionGEP = GetElementPtrInst::Create(
3740-
State.Builder.getInt8Ty(), NewPointerPhi,
3741-
State.Builder.CreateMul(
3742-
ScalarStepValue,
3743-
State.Builder.CreateTrunc(NumUnrolledElems, PhiType)),
3744-
"ptr.ind", InductionLoc);
3745-
3746-
NewPointerPhi->addIncoming(InductionGEP, VectorPH);
3747-
}
3748-
3749-
// Create actual address geps that use the pointer phi as base and a
3750-
// vectorized version of the step value (<step*0, ..., step*N>) as offset.
3751-
Type *VecPhiType = VectorType::get(PhiType, State.VF);
3752-
Value *StartOffsetScalar = State.Builder.CreateMul(
3753-
RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
3754-
Value *StartOffset =
3755-
State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
3756-
// Create a vector of consecutive numbers from zero to VF.
3757-
StartOffset = State.Builder.CreateAdd(
3758-
StartOffset, State.Builder.CreateStepVector(VecPhiType));
3759-
3760-
assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
3761-
"scalar step must be the same across all parts");
3762-
Value *GEP = State.Builder.CreateGEP(
3763-
State.Builder.getInt8Ty(), NewPointerPhi,
3764-
State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
3765-
State.VF, ScalarStepValue)),
3766-
"vector.gep");
3767-
State.set(this, GEP);
3768-
}
3769-
37703689
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
37713690
void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
37723691
VPSlotTracker &SlotTracker) const {
@@ -3925,11 +3844,6 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) {
39253844
Value *Op0 = State.get(getOperand(0));
39263845
Type *VecTy = Op0->getType();
39273846
Instruction *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
3928-
// Manually move it with the other PHIs in case PHI recipes above this one
3929-
// also inserted non-phi instructions.
3930-
// TODO: Remove once VPWidenPointerInductionRecipe is also expanded in
3931-
// convertToConcreteRecipes.
3932-
VecPhi->moveBefore(State.Builder.GetInsertBlock()->getFirstNonPHIIt());
39333847
State.set(this, VecPhi);
39343848
}
39353849

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2675,6 +2675,107 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
26752675
WidenIVR->replaceAllUsesWith(WidePHI);
26762676
}
26772677

2678+
/// Expand a VPWidenPointerInductionRecipe into executable recipes, for the
2679+
/// initial value, phi and backedge value. In the following example:
2680+
///
2681+
/// <x1> vector loop: {
2682+
/// vector.body:
2683+
/// EMIT ir<%ptr.iv> = WIDEN-POINTER-INDUCTION %start, %step, %vf
2684+
/// ...
2685+
/// EMIT branch-on-count ...
2686+
/// }
2687+
///
2688+
/// WIDEN-POINTER-INDUCTION will get expanded to:
2689+
///
2690+
/// <x1> vector loop: {
2691+
/// vector.body:
2692+
/// EMIT-SCALAR %pointer.phi = phi %start, %ptr.ind
2693+
/// EMIT %mul = mul %stepvector, %step
2694+
/// EMIT %vector.gep = ptradd %pointer.phi, %add
2695+
/// ...
2696+
/// EMIT %ptr.ind = ptradd %pointer.phi, %vf
2697+
/// EMIT branch-on-count ...
2698+
/// }
2699+
static void
2700+
expandVPWidenPointerInductionRecipe(VPWidenPointerInductionRecipe *R,
2701+
VPTypeAnalysis &TypeInfo) {
2702+
VPlan *Plan = R->getParent()->getPlan();
2703+
2704+
assert(R->getInductionDescriptor().getKind() ==
2705+
InductionDescriptor::IK_PtrInduction &&
2706+
"Not a pointer induction according to InductionDescriptor!");
2707+
assert(TypeInfo.inferScalarType(R)->isPointerTy() && "Unexpected type.");
2708+
assert(!R->onlyScalarsGenerated(Plan->hasScalableVF()) &&
2709+
"Recipe should have been replaced");
2710+
2711+
unsigned CurrentPart = 0;
2712+
if (R->getNumOperands() > 3)
2713+
CurrentPart =
2714+
cast<ConstantInt>(R->getOperand(4)->getLiveInIRValue())->getZExtValue();
2715+
2716+
VPBuilder Builder(R);
2717+
DebugLoc DL = R->getDebugLoc();
2718+
2719+
// Build a pointer phi
2720+
VPPhi *Phi;
2721+
if (CurrentPart == 0) {
2722+
Phi = Builder.createScalarPhi({R->getStartValue()}, R->getDebugLoc(),
2723+
"pointer.phi");
2724+
} else {
2725+
// The recipe has been unrolled. In that case, fetch the single pointer phi
2726+
// shared among all unrolled parts of the recipe.
2727+
auto *PtrAdd = cast<VPInstruction>(R->getOperand(3));
2728+
Phi = cast<VPPhi>(PtrAdd->getOperand(0)->getDefiningRecipe());
2729+
}
2730+
2731+
Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
2732+
2733+
// A pointer induction, performed by using a gep
2734+
Type *PhiType = TypeInfo.inferScalarType(R->getStepValue());
2735+
VPValue *RuntimeVF = Builder.createScalarZExtOrTrunc(
2736+
&Plan->getVF(), PhiType, TypeInfo.inferScalarType(&Plan->getVF()), DL);
2737+
if (CurrentPart == 0) {
2738+
// The recipe represents the first part of the pointer induction. Create the
2739+
// GEP to increment the phi across all unrolled parts.
2740+
VPValue *NumUnrolledElems = Builder.createScalarZExtOrTrunc(
2741+
R->getOperand(2), PhiType, TypeInfo.inferScalarType(R->getOperand(2)),
2742+
DL);
2743+
VPValue *Offset = Builder.createNaryOp(
2744+
Instruction::Mul, {R->getStepValue(), NumUnrolledElems});
2745+
2746+
VPBuilder::InsertPointGuard Guard(Builder);
2747+
VPBasicBlock *ExitingBB =
2748+
Plan->getVectorLoopRegion()->getExitingBasicBlock();
2749+
Builder.setInsertPoint(ExitingBB,
2750+
ExitingBB->getTerminator()->getIterator());
2751+
2752+
VPValue *InductionGEP = Builder.createPtrAdd(Phi, Offset, DL, "ptr.ind");
2753+
Phi->addOperand(InductionGEP);
2754+
}
2755+
2756+
VPValue *CurrentPartV =
2757+
Plan->getOrAddLiveIn(ConstantInt::get(PhiType, CurrentPart));
2758+
2759+
// Create actual address geps that use the pointer phi as base and a
2760+
// vectorized version of the step value (<step*0, ..., step*N>) as offset.
2761+
VPValue *StartOffsetScalar =
2762+
Builder.createNaryOp(Instruction::Mul, {RuntimeVF, CurrentPartV});
2763+
VPValue *StartOffset =
2764+
Builder.createNaryOp(VPInstruction::Broadcast, StartOffsetScalar);
2765+
// Create a vector of consecutive numbers from zero to VF.
2766+
StartOffset = Builder.createNaryOp(
2767+
Instruction::Add,
2768+
{StartOffset,
2769+
Builder.createNaryOp(VPInstruction::StepVector, {}, PhiType)});
2770+
2771+
VPValue *PtrAdd = Builder.createPtrAdd(
2772+
Phi,
2773+
Builder.createNaryOp(Instruction::Mul, {StartOffset, R->getStepValue()}),
2774+
DL, "vector.gep");
2775+
2776+
R->replaceAllUsesWith(PtrAdd);
2777+
}
2778+
26782779
void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) {
26792780
// Replace loop regions with explicity CFG.
26802781
SmallVector<VPRegionBlock *> LoopRegions;
@@ -2711,6 +2812,12 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
27112812
continue;
27122813
}
27132814

2815+
if (auto *WidenIVR = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
2816+
expandVPWidenPointerInductionRecipe(WidenIVR, TypeInfo);
2817+
ToRemove.push_back(WidenIVR);
2818+
continue;
2819+
}
2820+
27142821
if (auto *Expr = dyn_cast<VPExpressionRecipe>(&R)) {
27152822
Expr->decompose();
27162823
ToRemove.push_back(Expr);

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,8 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
6767
; CHECK: vector.body:
6868
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6969
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
70-
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
71-
; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2
7270
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP6]]
73-
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP10]], 0
71+
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP6]], 0
7472
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP12]], i64 0
7573
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
7674
; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
@@ -159,17 +157,16 @@ define void @pointer_induction(ptr noalias %start, i64 %N) {
159157
; CHECK: vector.body:
160158
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
161159
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
162-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
163-
; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
164160
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP6]]
165-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0
161+
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP6]], 0
166162
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
167163
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
168164
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
169-
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP12]]
170-
; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 2 x i64> [[TMP13]], splat (i64 1)
171-
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP14]]
172-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i32 0
165+
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 2 x i64> [[DOTSPLAT]], i32 0
166+
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <vscale x 2 x i64> [[TMP12]], i32 0
167+
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP20]], [[TMP21]]
168+
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 1
169+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP14]]
173170
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0
174171
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP16]], align 1
175172
; CHECK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], splat (i8 1)

llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,14 @@ define void @fn(i32 noundef %n, ptr %in, ptr %out) #0 {
2929
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
3030
; CHECK: [[VECTOR_BODY]]:
3131
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
32-
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[IN]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ]
33-
; CHECK-NEXT: [[POINTER_PHI2:%.*]] = phi ptr [ [[OUT]], %[[VECTOR_PH]] ], [ [[PTR_IND3:%.*]], %[[VECTOR_BODY]] ]
32+
; CHECK-NEXT: [[POINTER_PHI2:%.*]] = phi ptr [ [[IN]], %[[VECTOR_PH]] ], [ [[PTR_IND3:%.*]], %[[VECTOR_BODY]] ]
33+
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[OUT]], %[[VECTOR_PH]] ], [ [[PTR_IND6:%.*]], %[[VECTOR_BODY]] ]
3434
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
3535
; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI2]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
3636
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
37-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP]], i32 1
38-
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[VECTOR_GEP]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison), !alias.scope [[META0:![0-9]+]]
39-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP]], i32 2
37+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP4]], i32 1
38+
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[VECTOR_GEP4]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison), !alias.scope [[META0:![0-9]+]]
39+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP4]], i32 2
4040
; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP1]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison), !alias.scope [[META0]]
4141
; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP2]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison), !alias.scope [[META0]]
4242
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[WIDE_MASKED_GATHER]] to <4 x i32>
@@ -66,14 +66,14 @@ define void @fn(i32 noundef %n, ptr %in, ptr %out) #0 {
6666
; CHECK-NEXT: [[TMP27:%.*]] = add nuw <4 x i32> [[TMP26]], [[TMP24]]
6767
; CHECK-NEXT: [[TMP28:%.*]] = lshr <4 x i32> [[TMP27]], splat (i32 16)
6868
; CHECK-NEXT: [[TMP29:%.*]] = trunc <4 x i32> [[TMP28]] to <4 x i8>
69-
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP4]], i32 1
70-
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP13]], <4 x ptr> [[VECTOR_GEP4]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
71-
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP4]], i32 2
69+
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP]], i32 1
70+
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP13]], <4 x ptr> [[VECTOR_GEP]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
71+
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP]], i32 2
7272
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP21]], <4 x ptr> [[TMP30]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3]], !noalias [[META0]]
7373
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP29]], <4 x ptr> [[TMP31]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3]], !noalias [[META0]]
7474
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
75-
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 12
7675
; CHECK-NEXT: [[PTR_IND3]] = getelementptr i8, ptr [[POINTER_PHI2]], i32 12
76+
; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, ptr [[POINTER_PHI]], i32 12
7777
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
7878
; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
7979
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)