Skip to content

Commit 2d4a8da

Browse files
authored
[VPlan] Use DL index type consistently for GEPs (#169396)
In preparation to strip VPUnrollPartAccessor and unroll recipes directly, strip unnecessary complication in getGEPIndexTy, as the unroll part will no longer be available in follow-ups (see #168886 for instance). The patch also helps by doing a mass test update up-front. Narrowing the GEP index type conditionally does not yield any benefit, and the change is non-functional in terms of emitted assembly. While at it, avoid hard-coding address-space 0, and use the pointer operand's address space to get the GEP index type.
1 parent 04bddda commit 2d4a8da

File tree

128 files changed

+1455
-1402
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

128 files changed

+1455
-1402
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2593,22 +2593,11 @@ void VPWidenGEPRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
25932593
}
25942594
#endif
25952595

2596-
static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, bool IsUnitStride,
2597-
unsigned CurrentPart, IRBuilderBase &Builder) {
2598-
// Use i32 for the gep index type when the value is constant,
2599-
// or query DataLayout for a more suitable index type otherwise.
2600-
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
2601-
return !IsUnitStride || (IsScalable && (IsReverse || CurrentPart > 0))
2602-
? DL.getIndexType(Builder.getPtrTy(0))
2603-
: Builder.getInt32Ty();
2604-
}
2605-
26062596
void VPVectorEndPointerRecipe::execute(VPTransformState &State) {
26072597
auto &Builder = State.Builder;
26082598
unsigned CurrentPart = getUnrollPart(*this);
2609-
bool IsUnitStride = Stride == 1 || Stride == -1;
2610-
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
2611-
IsUnitStride, CurrentPart, Builder);
2599+
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
2600+
Type *IndexTy = DL.getIndexType(State.TypeAnalysis.inferScalarType(this));
26122601

26132602
// The wide store needs to start at the last vector element.
26142603
Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
@@ -2644,8 +2633,8 @@ void VPVectorEndPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
26442633
void VPVectorPointerRecipe::execute(VPTransformState &State) {
26452634
auto &Builder = State.Builder;
26462635
unsigned CurrentPart = getUnrollPart(*this);
2647-
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2648-
/*IsUnitStride*/ true, CurrentPart, Builder);
2636+
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
2637+
Type *IndexTy = DL.getIndexType(State.TypeAnalysis.inferScalarType(this));
26492638
Value *Ptr = State.get(getOperand(0), VPLane(0));
26502639

26512640
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);

llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
1414
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1515
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i64> [ <i64 poison, i64 0>, %[[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], %[[VECTOR_BODY]] ]
1616
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]]
17-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
17+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2
1818
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
1919
; CHECK-NEXT: [[WIDE_LOAD1]] = load <2 x i64>, ptr [[TMP5]], align 8
2020
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[WIDE_LOAD]], <2 x i32> <i32 1, i32 2>
2121
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[WIDE_LOAD]], <2 x i64> [[WIDE_LOAD1]], <2 x i32> <i32 1, i32 2>
2222
; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP6]], <2 x i64> splat (i64 1))
2323
; CHECK-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP7]], <2 x i64> splat (i64 1))
2424
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]]
25-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 2
25+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 2
2626
; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP10]], align 8
2727
; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP13]], align 8
2828
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ define void @loop_dependent_cond(ptr %src, ptr noalias %dst, i64 %N) {
6363
; DEFAULT: [[VECTOR_BODY]]:
6464
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
6565
; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
66-
; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP3]], i32 2
66+
; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP3]], i64 2
6767
; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8
6868
; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP6]], align 8
6969
; DEFAULT-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD]])
@@ -259,7 +259,7 @@ define void @latch_branch_cost(ptr %dst) {
259259
; DEFAULT: [[VECTOR_BODY]]:
260260
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
261261
; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
262-
; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i32 16
262+
; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 16
263263
; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP2]], align 1
264264
; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1
265265
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32

llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i
137137
; CHECK: [[VECTOR_BODY]]:
138138
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
139139
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[INDEX]]
140-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16
140+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16
141141
; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1
142142
; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1
143143
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
@@ -210,7 +210,7 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 %
210210
; CHECK: [[VECTOR_BODY]]:
211211
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
212212
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[INDEX]]
213-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 16
213+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16
214214
; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP5]], align 1
215215
; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1
216216
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
@@ -279,15 +279,15 @@ define void @trunc_invariant_sdiv_result(i32 %a, i32 %b, ptr noalias %src, ptr %
279279
; CHECK: [[VECTOR_BODY]]:
280280
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
281281
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]]
282-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16
282+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
283283
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
284284
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
285285
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
286286
; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i16>
287287
; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i16> [[TMP0]], [[TMP3]]
288288
; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i16> [[TMP0]], [[TMP4]]
289289
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]]
290-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 16
290+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i64 16
291291
; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP7]], align 2
292292
; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP8]], align 2
293293
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
@@ -413,7 +413,7 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) {
413413
; CHECK: [[VECTOR_BODY]]:
414414
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
415415
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[INDEX]]
416-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4
416+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 4
417417
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8
418418
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
419419
; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[WIDE_LOAD]] to <4 x i1>
@@ -427,15 +427,16 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) {
427427
; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP8]] to <4 x i32>
428428
; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32>
429429
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
430-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 4
430+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 4
431431
; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[TMP12]], align 4
432432
; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP13]], align 4
433433
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
434434
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
435435
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
436436
; CHECK: [[MIDDLE_BLOCK]]:
437-
; CHECK-NEXT: br [[EXIT:label %.*]]
438-
; CHECK: [[SCALAR_PH:.*:]]
437+
; CHECK-NEXT: br label %[[EXIT:.*]]
438+
; CHECK: [[EXIT]]:
439+
; CHECK-NEXT: ret void
439440
;
440441
entry:
441442
br label %loop

llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
2727
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i8> [[VEC_IND]], splat (i8 16)
2828
; CHECK-NEXT: [[INDEX4:%.*]] = trunc i32 [[INDEX]] to i8
2929
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[INDEX4]]
30-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 16
30+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16
3131
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP8]], align 8
3232
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP7]], align 8
3333
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 3)

0 commit comments

Comments
 (0)