Skip to content

Commit 76eaea6

Browse files
committed
[VPlan] Use DL index type consistently for GEPs
In preparation to strip VPUnrollPartAccessor and unroll recipes directly, strip unnecessary complication in getGEPIndexTy, as the unroll part will no longer be available in follow-ups. Narrowing the GEP index type conditionally does not yield any benefit, and the change is non-functional in terms of emitted assembly.
1 parent 97732dd commit 76eaea6

File tree

127 files changed

+1517
-1413
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

127 files changed

+1517
-1413
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2593,22 +2593,15 @@ void VPWidenGEPRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
25932593
}
25942594
#endif
25952595

2596-
static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, bool IsUnitStride,
2597-
unsigned CurrentPart, IRBuilderBase &Builder) {
2598-
// Use i32 for the gep index type when the value is constant,
2599-
// or query DataLayout for a more suitable index type otherwise.
2596+
static Type *getGEPIndexTy(IRBuilderBase &Builder) {
26002597
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
2601-
return !IsUnitStride || (IsScalable && (IsReverse || CurrentPart > 0))
2602-
? DL.getIndexType(Builder.getPtrTy(0))
2603-
: Builder.getInt32Ty();
2598+
return DL.getIndexType(Builder.getPtrTy(0));
26042599
}
26052600

26062601
void VPVectorEndPointerRecipe::execute(VPTransformState &State) {
26072602
auto &Builder = State.Builder;
26082603
unsigned CurrentPart = getUnrollPart(*this);
2609-
bool IsUnitStride = Stride == 1 || Stride == -1;
2610-
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
2611-
IsUnitStride, CurrentPart, Builder);
2604+
Type *IndexTy = getGEPIndexTy(Builder);
26122605

26132606
// The wide store needs to start at the last vector element.
26142607
Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
@@ -2644,8 +2637,7 @@ void VPVectorEndPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
26442637
void VPVectorPointerRecipe::execute(VPTransformState &State) {
26452638
auto &Builder = State.Builder;
26462639
unsigned CurrentPart = getUnrollPart(*this);
2647-
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2648-
/*IsUnitStride*/ true, CurrentPart, Builder);
2640+
Type *IndexTy = getGEPIndexTy(Builder);
26492641
Value *Ptr = State.get(getOperand(0), VPLane(0));
26502642

26512643
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);

llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
1414
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1515
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i64> [ <i64 poison, i64 0>, %[[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], %[[VECTOR_BODY]] ]
1616
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]]
17-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
17+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2
1818
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
1919
; CHECK-NEXT: [[WIDE_LOAD1]] = load <2 x i64>, ptr [[TMP5]], align 8
2020
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[WIDE_LOAD]], <2 x i32> <i32 1, i32 2>
2121
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[WIDE_LOAD]], <2 x i64> [[WIDE_LOAD1]], <2 x i32> <i32 1, i32 2>
2222
; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP6]], <2 x i64> splat (i64 1))
2323
; CHECK-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP7]], <2 x i64> splat (i64 1))
2424
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]]
25-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 2
25+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 2
2626
; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP10]], align 8
2727
; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP13]], align 8
2828
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ define void @loop_dependent_cond(ptr %src, ptr noalias %dst, i64 %N) {
6363
; DEFAULT: [[VECTOR_BODY]]:
6464
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
6565
; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
66-
; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP3]], i32 2
66+
; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP3]], i64 2
6767
; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8
6868
; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP6]], align 8
6969
; DEFAULT-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD]])
@@ -259,7 +259,7 @@ define void @latch_branch_cost(ptr %dst) {
259259
; DEFAULT: [[VECTOR_BODY]]:
260260
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
261261
; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
262-
; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i32 16
262+
; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 16
263263
; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP2]], align 1
264264
; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1
265265
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32

llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i
137137
; CHECK: [[VECTOR_BODY]]:
138138
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
139139
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[INDEX]]
140-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16
140+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16
141141
; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1
142142
; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1
143143
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
@@ -210,7 +210,7 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 %
210210
; CHECK: [[VECTOR_BODY]]:
211211
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
212212
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[INDEX]]
213-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 16
213+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16
214214
; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP5]], align 1
215215
; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1
216216
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
@@ -279,15 +279,15 @@ define void @trunc_invariant_sdiv_result(i32 %a, i32 %b, ptr noalias %src, ptr %
279279
; CHECK: [[VECTOR_BODY]]:
280280
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
281281
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]]
282-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16
282+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
283283
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
284284
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
285285
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
286286
; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i16>
287287
; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i16> [[TMP0]], [[TMP3]]
288288
; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i16> [[TMP0]], [[TMP4]]
289289
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]]
290-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 16
290+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i64 16
291291
; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP7]], align 2
292292
; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP8]], align 2
293293
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
@@ -413,7 +413,7 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) {
413413
; CHECK: [[VECTOR_BODY]]:
414414
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
415415
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[INDEX]]
416-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4
416+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 4
417417
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8
418418
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
419419
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[WIDE_LOAD]] to <4 x i1>
@@ -427,15 +427,16 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) {
427427
; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP8]] to <4 x i32>
428428
; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32>
429429
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
430-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 4
430+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 4
431431
; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[TMP12]], align 4
432432
; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP13]], align 4
433433
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
434434
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
435435
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
436436
; CHECK: [[MIDDLE_BLOCK]]:
437-
; CHECK-NEXT: br [[EXIT:label %.*]]
438-
; CHECK: [[SCALAR_PH:.*:]]
437+
; CHECK-NEXT: br label %[[EXIT:.*]]
438+
; CHECK: [[EXIT]]:
439+
; CHECK-NEXT: ret void
439440
;
440441
entry:
441442
br label %loop

llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
2727
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i8> [[VEC_IND]], splat (i8 16)
2828
; CHECK-NEXT: [[INDEX4:%.*]] = trunc i32 [[INDEX]] to i8
2929
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[INDEX4]]
30-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 16
30+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16
3131
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP8]], align 8
3232
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP7]], align 8
3333
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 3)

0 commit comments

Comments
 (0)