Skip to content

Commit e398f4f

Browse files
committed
[VPlan] Model VF as operand in VectorPointerRecipe
Similar to how the runtime VF is modeled as an operand in VectorEndPointerRecipe, model it as an operand in VectorPointerRecipe as well.
1 parent 76f1949 commit e398f4f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+805
-1030
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7608,10 +7608,10 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
76087608
Ptr, &Plan.getVF(), getLoadStoreType(I),
76097609
/*Stride*/ -1, Flags, VPI->getDebugLoc());
76107610
} else {
7611-
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
7612-
GEP ? GEP->getNoWrapFlags()
7613-
: GEPNoWrapFlags::none(),
7614-
VPI->getDebugLoc());
7611+
VectorPtr = new VPVectorPointerRecipe(
7612+
Ptr, &Plan.getVF(), getLoadStoreType(I),
7613+
GEP ? GEP->getNoWrapFlags() : GEPNoWrapFlags::none(),
7614+
VPI->getDebugLoc());
76157615
}
76167616
Builder.insert(VectorPtr);
76177617
Ptr = VectorPtr;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1974,18 +1974,20 @@ class VPVectorEndPointerRecipe : public VPRecipeWithIRFlags,
19741974

19751975
/// A recipe to compute the pointers for widened memory accesses of IndexTy.
19761976
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
1977-
public VPUnrollPartAccessor<1> {
1977+
public VPUnrollPartAccessor<2> {
19781978
Type *SourceElementTy;
19791979

19801980
public:
1981-
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
1981+
VPVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
19821982
GEPNoWrapFlags GEPFlags, DebugLoc DL)
1983-
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1984-
GEPFlags, DL),
1983+
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, {Ptr, VF}, GEPFlags, DL),
19851984
SourceElementTy(SourceElementTy) {}
19861985

19871986
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
19881987

1988+
VPValue *getVFValue() { return getOperand(1); }
1989+
const VPValue *getVFValue() const { return getOperand(1); }
1990+
19891991
void execute(VPTransformState &State) override;
19901992

19911993
Type *getSourceElementType() const { return SourceElementTy; }
@@ -2005,8 +2007,9 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
20052007
}
20062008

20072009
VPVectorPointerRecipe *clone() override {
2008-
return new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2009-
getGEPNoWrapFlags(), getDebugLoc());
2010+
return new VPVectorPointerRecipe(getOperand(0), getVFValue(),
2011+
SourceElementTy, getGEPNoWrapFlags(),
2012+
getDebugLoc());
20102013
}
20112014

20122015
/// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2651,7 +2651,12 @@ void VPVectorPointerRecipe::execute(VPTransformState &State) {
26512651
/*IsUnitStride*/ true, CurrentPart, Builder);
26522652
Value *Ptr = State.get(getOperand(0), VPLane(0));
26532653

2654-
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2654+
Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
2655+
RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, IndexTy);
2656+
Value *Increment =
2657+
CurrentPart == 1 ? RuntimeVF
2658+
: Builder.CreateNUWMul(
2659+
RuntimeVF, ConstantInt::get(IndexTy, CurrentPart));
26552660
Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Increment,
26562661
"", getGEPNoWrapFlags());
26572662

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2710,10 +2710,11 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
27102710
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
27112711
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
27122712

2713-
assert(all_of(Plan.getVF().users(),
2714-
IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
2715-
VPWidenIntOrFpInductionRecipe>) &&
2716-
"User of VF that we can't transform to EVL.");
2713+
assert(
2714+
all_of(Plan.getVF().users(),
2715+
IsaPred<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
2716+
VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>) &&
2717+
"User of VF that we can't transform to EVL.");
27172718
Plan.getVF().replaceUsesWithIf(&EVL, [](VPUser &U, unsigned Idx) {
27182719
return isa<VPWidenIntOrFpInductionRecipe, VPScalarIVStepsRecipe>(U);
27192720
});

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
2121
; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
2222
; CHECK: [[VECTOR_PH]]:
2323
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
24-
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
24+
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP8]], 2
25+
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP11]], 2
2526
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]]
2627
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
2728
; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]]
@@ -36,9 +37,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
3637
; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP26]]
3738
; CHECK-NEXT: [[TMP32:%.*]] = sext i32 [[TMP30]] to i64
3839
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP32]]
39-
; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64()
40-
; CHECK-NEXT: [[TMP38:%.*]] = shl nuw i64 [[TMP37]], 1
41-
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP38]]
40+
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP11]]
4241
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP34]], align 8
4342
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP39]], align 8
4443
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
3030
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK3]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
3131
; DEFAULT: [[VECTOR_PH]]:
3232
; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
33-
; DEFAULT-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 16
33+
; DEFAULT-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP9]], 8
34+
; DEFAULT-NEXT: [[TMP10:%.*]] = mul i64 [[TMP13]], 2
3435
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP10]]
3536
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
3637
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[X]], i64 0
@@ -40,9 +41,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
4041
; DEFAULT: [[VECTOR_BODY]]:
4142
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
4243
; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]]
43-
; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
44-
; DEFAULT-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 3
45-
; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP14]]
44+
; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP13]]
4645
; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP12]], align 1
4746
; DEFAULT-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x i8>, ptr [[TMP15]], align 1
4847
; DEFAULT-NEXT: [[TMP16:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i16>
@@ -56,9 +55,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
5655
; DEFAULT-NEXT: [[TMP24:%.*]] = trunc <vscale x 8 x i16> [[TMP22]] to <vscale x 8 x i8>
5756
; DEFAULT-NEXT: [[TMP25:%.*]] = trunc <vscale x 8 x i16> [[TMP23]] to <vscale x 8 x i8>
5857
; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
59-
; DEFAULT-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
60-
; DEFAULT-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 3
61-
; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP26]], i64 [[TMP28]]
58+
; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP26]], i64 [[TMP13]]
6259
; DEFAULT-NEXT: store <vscale x 8 x i8> [[TMP24]], ptr [[TMP26]], align 1
6360
; DEFAULT-NEXT: store <vscale x 8 x i8> [[TMP25]], ptr [[TMP29]], align 1
6461
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]

llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,8 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
144144
; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
145145
; INTERLEAVE-4-VLA: vector.ph:
146146
; INTERLEAVE-4-VLA-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
147-
; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
147+
; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4
148+
; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = mul i64 [[TMP5]], 4
148149
; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
149150
; INTERLEAVE-4-VLA-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
150151
; INTERLEAVE-4-VLA-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -155,14 +156,10 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
155156
; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
156157
; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
157158
; INTERLEAVE-4-VLA-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]]
158-
; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
159-
; INTERLEAVE-4-VLA-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
160-
; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP6]]
161-
; INTERLEAVE-4-VLA-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
162-
; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 3
159+
; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP5]]
160+
; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP5]], 2
163161
; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP9]]
164-
; INTERLEAVE-4-VLA-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
165-
; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 12
162+
; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP5]], 3
166163
; INTERLEAVE-4-VLA-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP12]]
167164
; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 1
168165
; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 1

0 commit comments

Comments
 (0)