Skip to content

Commit 055bfc0

Browse files
authored
[InstCombine] Strip leading zero indices from GEP (llvm#155415)
GEPs are often in the form `gep [N x %T], ptr %p, i64 0, i64 %idx`. Canonicalize these to `gep %T, ptr %p, i64 %idx`. This enables transforms that only support one GEP index to work and improves CSE. Various transforms were recently hardened to make sure they still work without the leading index.
1 parent e82abde commit 055bfc0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+364
-363
lines changed

clang/test/CodeGen/attr-counted-by.c

Lines changed: 94 additions & 106 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/union-tbaa1.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,17 @@ void bar(vect32 p[][2]);
1717
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARR]], i32 [[TMP0]]
1818
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
1919
// CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP1]], [[NUM]]
20-
// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x [2 x %union.vect32]], ptr [[TMP]], i32 0, i32 [[TMP0]]
20+
// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP0]]
2121
// CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA6:![0-9]+]]
2222
// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARR]], i32 [[TMP0]], i32 1
2323
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !tbaa [[TBAA2]]
2424
// CHECK-NEXT: [[MUL6:%.*]] = mul i32 [[TMP2]], [[NUM]]
25-
// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [4 x [2 x %union.vect32]], ptr [[TMP]], i32 0, i32 [[TMP0]], i32 1
25+
// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP0]], i32 1
2626
// CHECK-NEXT: store i32 [[MUL6]], ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA6]]
2727
// CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[MUL]], 16
2828
// CHECK-NEXT: store i32 [[TMP3]], ptr [[VEC]], align 4, !tbaa [[TBAA2]]
2929
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INDEX]], align 4, !tbaa [[TBAA2]]
30-
// CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [4 x [2 x %union.vect32]], ptr [[TMP]], i32 0, i32 [[TMP4]], i32 1
30+
// CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP4]], i32 1
3131
// CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX14]], i32 2
3232
// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX15]], align 2, !tbaa [[TBAA6]]
3333
// CHECK-NEXT: [[CONV16:%.*]] = zext i16 [[TMP5]] to i32

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3238,6 +3238,19 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
32383238
drop_end(Indices), "", GEP.getNoWrapFlags()));
32393239
}
32403240

3241+
// Strip leading zero indices.
3242+
auto *FirstIdx = dyn_cast<Constant>(Indices.front());
3243+
if (FirstIdx && FirstIdx->isNullValue() &&
3244+
!FirstIdx->getType()->isVectorTy()) {
3245+
gep_type_iterator GTI = gep_type_begin(GEP);
3246+
++GTI;
3247+
if (!GTI.isStruct())
3248+
return replaceInstUsesWith(GEP, Builder.CreateGEP(GTI.getIndexedType(),
3249+
GEP.getPointerOperand(),
3250+
drop_begin(Indices), "",
3251+
GEP.getNoWrapFlags()));
3252+
}
3253+
32413254
// Scalarize vector operands; prefer splat-of-gep.as canonical form.
32423255
// Note that this looses information about undef lanes; we run it after
32433256
// demanded bits to partially mitigate that loss.

llvm/test/CodeGen/AMDGPU/vector-alloca-addrspacecast.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
; OPT: store i32 0, ptr addrspace(5) %alloca, align 4
88
; OPT: store i32 1, ptr addrspace(5) %a1, align 4
99
; OPT: store i32 2, ptr addrspace(5) %a2, align 4
10-
; OPT: %tmp = getelementptr [3 x i32], ptr addrspace(5) %alloca, i64 0, i64 %index
10+
; OPT: %tmp = getelementptr i32, ptr addrspace(5) %alloca, i64 %index
1111
; OPT: %ac = addrspacecast ptr addrspace(5) %tmp to ptr
1212
; OPT: %data = load i32, ptr %ac, align 4
1313
define amdgpu_kernel void @vector_addrspacecast(ptr addrspace(1) %out, i64 %index) {

llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ define i1 @print_pgm_cond_true(i32 %tmp12.reload, ptr %tmp16.out) {
1616
; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP16_OUT]], align 4
1717
; CHECK-NEXT: ret i1 false
1818
; CHECK: cond_true:
19-
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr [17 x i32], ptr @r, i32 0, i32 [[TMP12_RELOAD:%.*]]
19+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr @r, i32 [[TMP12_RELOAD:%.*]]
2020
; CHECK-NEXT: [[TMP16]] = load i32, ptr [[TMP15]], align 4
2121
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[TMP16]], -32
2222
; CHECK-NEXT: [[BOTHCOND:%.*]] = icmp ult i32 [[TMP0]], -63
@@ -53,7 +53,7 @@ define i1 @print_pgm_cond_true_logical(i32 %tmp12.reload, ptr %tmp16.out) {
5353
; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP16_OUT]], align 4
5454
; CHECK-NEXT: ret i1 false
5555
; CHECK: cond_true:
56-
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr [17 x i32], ptr @r, i32 0, i32 [[TMP12_RELOAD:%.*]]
56+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr @r, i32 [[TMP12_RELOAD:%.*]]
5757
; CHECK-NEXT: [[TMP16]] = load i32, ptr [[TMP15]], align 4
5858
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[TMP16]], -32
5959
; CHECK-NEXT: [[BOTHCOND:%.*]] = icmp ult i32 [[TMP0]], -63

llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
define i8 @memcpy_constant_arg_ptr_to_alloca(ptr addrspace(4) noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) {
99
; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca(
1010
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
11-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(4) [[ARG:%.*]], i64 0, i64 [[TMP1]]
11+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARG:%.*]], i64 [[TMP1]]
1212
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(4) [[GEP]], align 1
1313
; CHECK-NEXT: ret i8 [[LOAD]]
1414
;
@@ -22,7 +22,7 @@ define i8 @memcpy_constant_arg_ptr_to_alloca(ptr addrspace(4) noalias readonly a
2222
define i8 @memcpy_constant_arg_ptr_to_alloca_load_metadata(ptr addrspace(4) noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) {
2323
; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_load_metadata(
2424
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
25-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(4) [[ARG:%.*]], i64 0, i64 [[TMP1]]
25+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARG:%.*]], i64 [[TMP1]]
2626
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(4) [[GEP]], align 1, !noalias [[META0:![0-9]+]]
2727
; CHECK-NEXT: ret i8 [[LOAD]]
2828
;
@@ -36,7 +36,7 @@ define i8 @memcpy_constant_arg_ptr_to_alloca_load_metadata(ptr addrspace(4) noal
3636
define i64 @memcpy_constant_arg_ptr_to_alloca_load_alignment(ptr addrspace(4) noalias readonly align 4 dereferenceable(256) %arg, i32 %idx) {
3737
; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_load_alignment(
3838
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
39-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i64], ptr addrspace(4) [[ARG:%.*]], i64 0, i64 [[TMP1]]
39+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr addrspace(4) [[ARG:%.*]], i64 [[TMP1]]
4040
; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr addrspace(4) [[GEP]], align 16
4141
; CHECK-NEXT: ret i64 [[LOAD]]
4242
;
@@ -51,7 +51,7 @@ define i64 @memcpy_constant_arg_ptr_to_alloca_load_atomic(ptr addrspace(4) noali
5151
; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_load_atomic(
5252
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i64], align 8, addrspace(5)
5353
; CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 8 dereferenceable(256) [[ALLOCA]], ptr addrspace(4) noundef align 8 dereferenceable(256) [[ARG:%.*]], i64 256, i1 false)
54-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i64], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[IDX:%.*]]
54+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr addrspace(5) [[ALLOCA]], i32 [[IDX:%.*]]
5555
; CHECK-NEXT: [[LOAD:%.*]] = load atomic i64, ptr addrspace(5) [[GEP]] syncscope("somescope") acquire, align 8
5656
; CHECK-NEXT: ret i64 [[LOAD]]
5757
;
@@ -66,7 +66,7 @@ define i64 @memcpy_constant_arg_ptr_to_alloca_load_atomic(ptr addrspace(4) noali
6666
define i8 @memmove_constant_arg_ptr_to_alloca(ptr addrspace(4) noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) {
6767
; CHECK-LABEL: @memmove_constant_arg_ptr_to_alloca(
6868
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
69-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(4) [[ARG:%.*]], i64 0, i64 [[TMP1]]
69+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARG:%.*]], i64 [[TMP1]]
7070
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(4) [[GEP]], align 1
7171
; CHECK-NEXT: ret i8 [[LOAD]]
7272
;
@@ -81,7 +81,7 @@ define i8 @memmove_constant_arg_ptr_to_alloca(ptr addrspace(4) noalias readonly
8181
define amdgpu_kernel void @memcpy_constant_byref_arg_ptr_to_alloca(ptr addrspace(4) noalias readonly align 4 byref([32 x i8]) %arg, ptr addrspace(1) %out, i32 %idx) {
8282
; CHECK-LABEL: @memcpy_constant_byref_arg_ptr_to_alloca(
8383
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
84-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(4) [[ARG:%.*]], i64 0, i64 [[TMP1]]
84+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARG:%.*]], i64 [[TMP1]]
8585
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(4) [[GEP]], align 1
8686
; CHECK-NEXT: store i8 [[LOAD]], ptr addrspace(1) [[OUT:%.*]], align 1
8787
; CHECK-NEXT: ret void
@@ -99,7 +99,7 @@ define amdgpu_kernel void @memcpy_constant_byref_arg_ptr_to_alloca_too_many_byte
9999
; CHECK-LABEL: @memcpy_constant_byref_arg_ptr_to_alloca_too_many_bytes(
100100
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5)
101101
; CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 4 dereferenceable(31) [[ALLOCA]], ptr addrspace(4) noundef align 4 dereferenceable(31) [[ARG:%.*]], i64 31, i1 false)
102-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[IDX:%.*]]
102+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], i32 [[IDX:%.*]]
103103
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(5) [[GEP]], align 1
104104
; CHECK-NEXT: store i8 [[LOAD]], ptr addrspace(1) [[OUT:%.*]], align 1
105105
; CHECK-NEXT: ret void
@@ -118,7 +118,7 @@ define amdgpu_kernel void @memcpy_constant_intrinsic_ptr_to_alloca(ptr addrspace
118118
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5)
119119
; CHECK-NEXT: [[KERNARG_SEGMENT_PTR:%.*]] = call align 16 dereferenceable(32) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
120120
; CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 4 dereferenceable(32) [[ALLOCA]], ptr addrspace(4) noundef align 16 dereferenceable(32) [[KERNARG_SEGMENT_PTR]], i64 32, i1 false)
121-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[IDX:%.*]]
121+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], i32 [[IDX:%.*]]
122122
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(5) [[GEP]], align 1
123123
; CHECK-NEXT: store i8 [[LOAD]], ptr addrspace(1) [[OUT:%.*]], align 1
124124
; CHECK-NEXT: ret void
@@ -136,7 +136,7 @@ define amdgpu_kernel void @memcpy_constant_intrinsic_ptr_to_alloca(ptr addrspace
136136
define i8 @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat(ptr addrspace(4) noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) {
137137
; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat(
138138
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
139-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(4) [[ARG:%.*]], i64 0, i64 [[TMP1]]
139+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARG:%.*]], i64 [[TMP1]]
140140
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(4) [[GEP]], align 1
141141
; CHECK-NEXT: ret i8 [[LOAD]]
142142
;
@@ -153,7 +153,7 @@ define i8 @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat2(ptr addrspac
153153
; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat2(
154154
; CHECK-NEXT: [[ALLOCA_CAST_ASC:%.*]] = addrspacecast ptr addrspace(4) [[ARG:%.*]] to ptr
155155
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
156-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], ptr [[ALLOCA_CAST_ASC]], i64 0, i64 [[TMP1]]
156+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOCA_CAST_ASC]], i64 [[TMP1]]
157157
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
158158
; CHECK-NEXT: ret i8 [[LOAD]]
159159
;

llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ define ptr @xzy(i64 %x, i64 %y, i64 %z) {
3535
; CHECK-LABEL: define ptr @xzy(
3636
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]]) {
3737
; CHECK-NEXT: entry:
38-
; CHECK-NEXT: [[GEP_SPLIT:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds nuw (i8, ptr @glob, i64 40), i64 0, i64 [[X]]
39-
; CHECK-NEXT: [[GEP_SPLIT1:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[GEP_SPLIT]], i64 0, i64 [[Z]]
40-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x i32], ptr [[GEP_SPLIT1]], i64 0, i64 [[Y]]
38+
; CHECK-NEXT: [[GEP_SPLIT:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr getelementptr inbounds nuw (i8, ptr @glob, i64 40), i64 [[X]]
39+
; CHECK-NEXT: [[GEP_SPLIT1:%.*]] = getelementptr inbounds [10 x i32], ptr [[GEP_SPLIT]], i64 [[Z]]
40+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[GEP_SPLIT1]], i64 [[Y]]
4141
; CHECK-NEXT: ret ptr [[GEP]]
4242
;
4343
entry:
@@ -49,7 +49,7 @@ define ptr @zerox(i64 %x) {
4949
; CHECK-LABEL: define ptr @zerox(
5050
; CHECK-SAME: i64 [[X:%.*]]) {
5151
; CHECK-NEXT: entry:
52-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x i32], ptr getelementptr inbounds nuw (i8, ptr @glob, i64 32), i64 0, i64 [[X]]
52+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr getelementptr inbounds nuw (i8, ptr @glob, i64 32), i64 [[X]]
5353
; CHECK-NEXT: ret ptr [[GEP]]
5454
;
5555
entry:

llvm/test/Transforms/InstCombine/cast.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ lpad:
150150

151151
define ptr @test13(i64 %A) {
152152
; ALL-LABEL: @test13(
153-
; ALL-NEXT: [[C:%.*]] = getelementptr [0 x i8], ptr @inbuf, i64 0, i64 [[A:%.*]]
153+
; ALL-NEXT: [[C:%.*]] = getelementptr i8, ptr @inbuf, i64 [[A:%.*]]
154154
; ALL-NEXT: ret ptr [[C]]
155155
;
156156
%c = getelementptr [0 x i8], ptr @inbuf, i64 0, i64 %A

llvm/test/Transforms/InstCombine/cast_phi.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ define void @MainKernel(i32 %iNumSteps, i32 %tid, i32 %base) {
99
; CHECK-NEXT: [[CALLB:%.*]] = alloca [258 x float], align 4
1010
; CHECK-NEXT: [[CONV_I:%.*]] = uitofp i32 [[INUMSTEPS:%.*]] to float
1111
; CHECK-NEXT: [[CONV_I12:%.*]] = zext i32 [[TID:%.*]] to i64
12-
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [258 x float], ptr [[CALLA]], i64 0, i64 [[CONV_I12]]
12+
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[CALLA]], i64 [[CONV_I12]]
1313
; CHECK-NEXT: store float [[CONV_I]], ptr [[ARRAYIDX3]], align 4
14-
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [258 x float], ptr [[CALLB]], i64 0, i64 [[CONV_I12]]
14+
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[CALLB]], i64 [[CONV_I12]]
1515
; CHECK-NEXT: store float [[CONV_I]], ptr [[ARRAYIDX6]], align 4
1616
; CHECK-NEXT: [[CMP7:%.*]] = icmp eq i32 [[TID]], 0
1717
; CHECK-NEXT: br i1 [[CMP7]], label [[DOTBB1:%.*]], label [[DOTBB2:%.*]]
@@ -31,8 +31,8 @@ define void @MainKernel(i32 %iNumSteps, i32 %tid, i32 %base) {
3131
; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[I12_06]], [[BASE:%.*]]
3232
; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[I12_06]], 1
3333
; CHECK-NEXT: [[CONV_I9:%.*]] = sext i32 [[ADD]] to i64
34-
; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [258 x float], ptr [[CALLA]], i64 0, i64 [[CONV_I9]]
35-
; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [258 x float], ptr [[CALLB]], i64 0, i64 [[CONV_I9]]
34+
; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds float, ptr [[CALLA]], i64 [[CONV_I9]]
35+
; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, ptr [[CALLB]], i64 [[CONV_I9]]
3636
; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[I12_06]], [[BASE]]
3737
; CHECK-NEXT: br i1 [[TMP3]], label [[DOTBB4:%.*]], label [[DOTBB5:%.*]]
3838
; CHECK: .bb4:

llvm/test/Transforms/InstCombine/gep-addrspace.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ define void @func(ptr addrspace(1) nocapture %p) nounwind {
2424

2525
define void @keep_necessary_addrspacecast(i64 %i, ptr %out0, ptr %out1) {
2626
; CHECK-LABEL: @keep_necessary_addrspacecast(
27-
; CHECK-NEXT: [[T0:%.*]] = getelementptr [256 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i64 0, i64 [[I:%.*]]
28-
; CHECK-NEXT: [[T1:%.*]] = getelementptr [0 x float], ptr addrspacecast (ptr addrspace(3) @scalar to ptr), i64 0, i64 [[I]]
29-
; CHECK-NEXT: store ptr [[T0]], ptr [[OUT0:%.*]], align 4
27+
; CHECK-NEXT: [[T1:%.*]] = getelementptr float, ptr addrspacecast (ptr addrspace(3) @array to ptr), i64 [[I:%.*]]
28+
; CHECK-NEXT: [[T2:%.*]] = getelementptr float, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), i64 [[I]]
3029
; CHECK-NEXT: store ptr [[T1]], ptr [[OUT1:%.*]], align 4
30+
; CHECK-NEXT: store ptr [[T2]], ptr [[OUT2:%.*]], align 4
3131
; CHECK-NEXT: ret void
3232
;
3333
%t0 = getelementptr [256 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i64 0, i64 %i

0 commit comments

Comments
 (0)