Skip to content

Commit 0d6e069

Browse files
committed
[VPlan] Model VF as operand in VectorPointerRecipe
Similar to how the runtime VF is modeled as an operand in VectorEndPointerRecipe, model it as an operand in VectorPointerRecipe as well.
1 parent 1abb055 commit 0d6e069

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+805
-1030
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7607,10 +7607,10 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
76077607
Ptr, &Plan.getVF(), getLoadStoreType(I),
76087608
/*Stride*/ -1, Flags, VPI->getDebugLoc());
76097609
} else {
7610-
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
7611-
GEP ? GEP->getNoWrapFlags()
7612-
: GEPNoWrapFlags::none(),
7613-
VPI->getDebugLoc());
7610+
VectorPtr = new VPVectorPointerRecipe(
7611+
Ptr, &Plan.getVF(), getLoadStoreType(I),
7612+
GEP ? GEP->getNoWrapFlags() : GEPNoWrapFlags::none(),
7613+
VPI->getDebugLoc());
76147614
}
76157615
Builder.insert(VectorPtr);
76167616
Ptr = VectorPtr;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1977,18 +1977,20 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags,
19771977

19781978
/// A recipe to compute the pointers for widened memory accesses of IndexTy.
19791979
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
1980-
public VPUnrollPartAccessor<1> {
1980+
public VPUnrollPartAccessor<2> {
19811981
Type *SourceElementTy;
19821982

19831983
public:
1984-
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
1984+
VPVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
19851985
GEPNoWrapFlags GEPFlags, DebugLoc DL)
1986-
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1987-
GEPFlags, DL),
1986+
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, {Ptr, VF}, GEPFlags, DL),
19881987
SourceElementTy(SourceElementTy) {}
19891988

19901989
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
19911990

1991+
VPValue *getVFValue() { return getOperand(1); }
1992+
const VPValue *getVFValue() const { return getOperand(1); }
1993+
19921994
void execute(VPTransformState &State) override;
19931995

19941996
Type *getSourceElementType() const { return SourceElementTy; }
@@ -2008,8 +2010,9 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
20082010
}
20092011

20102012
VPVectorPointerRecipe *clone() override {
2011-
return new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2012-
getGEPNoWrapFlags(), getDebugLoc());
2013+
return new VPVectorPointerRecipe(getOperand(0), getVFValue(),
2014+
SourceElementTy, getGEPNoWrapFlags(),
2015+
getDebugLoc());
20132016
}
20142017

20152018
/// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2662,7 +2662,12 @@ void VPVectorPointerRecipe::execute(VPTransformState &State) {
26622662
/*IsUnitStride*/ true, CurrentPart, Builder);
26632663
Value *Ptr = State.get(getOperand(0), VPLane(0));
26642664

2665-
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2665+
Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
2666+
RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, IndexTy);
2667+
Value *Increment =
2668+
CurrentPart == 1 ? RuntimeVF
2669+
: Builder.CreateNUWMul(
2670+
RuntimeVF, ConstantInt::get(IndexTy, CurrentPart));
26662671
Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Increment,
26672672
"", getGEPNoWrapFlags());
26682673

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2740,10 +2740,11 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
27402740
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
27412741
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
27422742

2743-
assert(all_of(Plan.getVF().users(),
2744-
IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
2745-
VPWidenIntOrFpInductionRecipe>) &&
2746-
"User of VF that we can't transform to EVL.");
2743+
assert(
2744+
all_of(Plan.getVF().users(),
2745+
IsaPred<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
2746+
VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>) &&
2747+
"User of VF that we can't transform to EVL.");
27472748
Plan.getVF().replaceUsesWithIf(&EVL, [](VPUser &U, unsigned Idx) {
27482749
return isa<VPWidenIntOrFpInductionRecipe, VPScalarIVStepsRecipe>(U);
27492750
});

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
2121
; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
2222
; CHECK: [[VECTOR_PH]]:
2323
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
24-
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
24+
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP8]], 2
25+
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP11]], 2
2526
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]]
2627
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
2728
; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]]
@@ -36,9 +37,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
3637
; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP26]]
3738
; CHECK-NEXT: [[TMP32:%.*]] = sext i32 [[TMP30]] to i64
3839
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP32]]
39-
; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64()
40-
; CHECK-NEXT: [[TMP38:%.*]] = shl nuw i64 [[TMP37]], 1
41-
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP38]]
40+
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP11]]
4241
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP34]], align 8
4342
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP39]], align 8
4443
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
3030
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK3]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
3131
; DEFAULT: [[VECTOR_PH]]:
3232
; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
33-
; DEFAULT-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 16
33+
; DEFAULT-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP9]], 8
34+
; DEFAULT-NEXT: [[TMP10:%.*]] = mul i64 [[TMP13]], 2
3435
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP10]]
3536
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
3637
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[X]], i64 0
@@ -40,9 +41,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
4041
; DEFAULT: [[VECTOR_BODY]]:
4142
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
4243
; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]]
43-
; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
44-
; DEFAULT-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 3
45-
; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP14]]
44+
; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP13]]
4645
; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP12]], align 1
4746
; DEFAULT-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x i8>, ptr [[TMP15]], align 1
4847
; DEFAULT-NEXT: [[TMP16:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i16>
@@ -56,9 +55,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
5655
; DEFAULT-NEXT: [[TMP24:%.*]] = trunc <vscale x 8 x i16> [[TMP22]] to <vscale x 8 x i8>
5756
; DEFAULT-NEXT: [[TMP25:%.*]] = trunc <vscale x 8 x i16> [[TMP23]] to <vscale x 8 x i8>
5857
; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
59-
; DEFAULT-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
60-
; DEFAULT-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 3
61-
; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP26]], i64 [[TMP28]]
58+
; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP26]], i64 [[TMP13]]
6259
; DEFAULT-NEXT: store <vscale x 8 x i8> [[TMP24]], ptr [[TMP26]], align 1
6360
; DEFAULT-NEXT: store <vscale x 8 x i8> [[TMP25]], ptr [[TMP29]], align 1
6461
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]

llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,8 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
144144
; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
145145
; INTERLEAVE-4-VLA: vector.ph:
146146
; INTERLEAVE-4-VLA-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
147-
; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
147+
; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4
148+
; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = mul i64 [[TMP5]], 4
148149
; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
149150
; INTERLEAVE-4-VLA-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
150151
; INTERLEAVE-4-VLA-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -155,14 +156,10 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
155156
; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
156157
; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
157158
; INTERLEAVE-4-VLA-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]]
158-
; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
159-
; INTERLEAVE-4-VLA-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
160-
; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP6]]
161-
; INTERLEAVE-4-VLA-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
162-
; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 3
159+
; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP5]]
160+
; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP5]], 2
163161
; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP9]]
164-
; INTERLEAVE-4-VLA-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
165-
; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 12
162+
; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP5]], 3
166163
; INTERLEAVE-4-VLA-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP12]]
167164
; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 1
168165
; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 1

0 commit comments

Comments
 (0)