Skip to content

Commit e1b0dc5

Browse files
committed
[Attributor] Use getAllocaAddrSpace to get address space for AllocaInst
1 parent 2d75ec2 commit e1b0dc5

10 files changed

+1870
-616
lines changed

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12603,6 +12603,18 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
1260312603
auto CheckAddressSpace = [&](Value &Obj) {
1260412604
if (isa<UndefValue>(&Obj))
1260512605
return true;
12606+
// Some targets relax the requirement for alloca to be in an exact address
12607+
// space, allowing it in certain other address spaces instead. These
12608+
// targets later lower alloca to the correct address space in the
12609+
// pipeline. Therefore, we need to query TTI to determine the appropriate
12610+
// address space.
12611+
if (auto *AI = dyn_cast<AllocaInst>(&Obj)) {
12612+
Function *Fn = AI->getFunction();
12613+
auto *TTI =
12614+
A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(
12615+
*Fn);
12616+
return takeAddressSpace(TTI->getAssumedAddrSpace(AI));
12617+
}
1260612618
// If an argument in flat address space only has addrspace cast uses, and
1260712619
// those casts are same, then we take the dst addrspace.
1260812620
if (auto *Arg = dyn_cast<Argument>(&Obj)) {

llvm/test/Transforms/OpenMP/custom_state_machines.ll

Lines changed: 590 additions & 190 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll

Lines changed: 774 additions & 174 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/OpenMP/nested_parallelism.ll

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l13
6464
; CHECK-NEXT: br label [[_Z3FOOI_INTERNALIZED_EXIT]]
6565
; CHECK: _Z3fooi.internalized.exit:
6666
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
67-
; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
67+
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
68+
; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr addrspace(5) [[TMP4]], align 8
6869
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
6970
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
7071
; CHECK-NEXT: call void @__kmpc_target_deinit()
@@ -109,7 +110,8 @@ define hidden void @_Z3fooi(i32 noundef %i1) local_unnamed_addr #1 {
109110
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
110111
; CHECK-NEXT: [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
111112
; CHECK-NEXT: store i32 [[I1:%.*]], ptr [[I]], align 16
112-
; CHECK-NEXT: store ptr [[I]], ptr [[CAPTURED_VARS_ADDRS]], align 8
113+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
114+
; CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[TMP1]], align 8
113115
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 1)
114116
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I]], i64 4) #[[ATTR2]]
115117
; CHECK-NEXT: ret void
@@ -141,7 +143,8 @@ define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l16
141143
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
142144
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
143145
; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i.i_shared, align 16
144-
; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
146+
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
147+
; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr addrspace(5) [[TMP2]], align 8
145148
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
146149
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
147150
; CHECK-NEXT: call void @__kmpc_target_deinit()
@@ -175,7 +178,8 @@ define hidden void @_Z4foo1i(i32 noundef %i1) local_unnamed_addr #1 {
175178
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
176179
; CHECK-NEXT: [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
177180
; CHECK-NEXT: store i32 [[I1:%.*]], ptr [[I]], align 16
178-
; CHECK-NEXT: store ptr [[I]], ptr [[CAPTURED_VARS_ADDRS]], align 8
181+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
182+
; CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[TMP1]], align 8
179183
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 1)
180184
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I]], i64 4) #[[ATTR2]]
181185
; CHECK-NEXT: ret void
@@ -202,7 +206,8 @@ define internal void @__omp_outlined__(ptr noalias nocapture readnone %.global_t
202206
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
203207
; CHECK-NEXT: [[I_I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
204208
; CHECK-NEXT: store i32 [[TMP0]], ptr [[I_I]], align 16
205-
; CHECK-NEXT: store ptr [[I_I]], ptr [[CAPTURED_VARS_ADDRS_I]], align 8
209+
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
210+
; CHECK-NEXT: store ptr [[I_I]], ptr addrspace(5) [[TMP2]], align 8
206211
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
207212
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I_I]], i64 4) #[[ATTR2]]
208213
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
@@ -228,15 +233,17 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #5 {
228233
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I_I:%.*]] = alloca [1 x ptr], align 8
229234
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
230235
; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]])
231-
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
236+
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
237+
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[TMP5]], align 8
232238
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
233239
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
234240
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]])
235-
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
241+
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
236242
; CHECK-NEXT: [[I_I_I:%.*]] = call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
237243
; CHECK-NEXT: store i32 [[TMP4]], ptr [[I_I_I]], align 16
238-
; CHECK-NEXT: store ptr [[I_I_I]], ptr [[CAPTURED_VARS_ADDRS_I_I]], align 8
239-
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]], i64 1)
244+
; CHECK-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I_I]] to ptr addrspace(5)
245+
; CHECK-NEXT: store ptr [[I_I_I]], ptr addrspace(5) [[TMP7]], align 8
246+
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]], i64 1)
240247
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I_I_I]], i64 4) #[[ATTR2]]
241248
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]])
242249
; CHECK-NEXT: ret void
@@ -287,7 +294,8 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #5 {
287294
; CHECK-NEXT: entry:
288295
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
289296
; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]])
290-
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
297+
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
298+
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[TMP5]], align 8
291299
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
292300
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
293301
; CHECK-NEXT: [[INC_I:%.*]] = add nsw i32 [[TMP4]], 1

llvm/test/Transforms/OpenMP/remove_globalization.ll

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -163,17 +163,22 @@ define internal void @convert_and_move_alloca() {
163163
; CHECK-NEXT: entry:
164164
; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
165165
; CHECK-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
166+
; CHECK-NEXT: [[UB_PTR:%.*]] = alloca i32, align 4
167+
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[UB_PTR]] to ptr addrspace(5)
166168
; CHECK-NEXT: br label [[INITLOOP:%.*]]
167169
; CHECK: initloop:
168-
; CHECK-NEXT: store i32 0, ptr [[IV_PTR]], align 4
170+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
171+
; CHECK-NEXT: store i32 0, ptr addrspace(5) [[TMP1]], align 4
169172
; CHECK-NEXT: br label [[LOOPBODY:%.*]]
170173
; CHECK: loopbody:
171-
; CHECK-NEXT: [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
172-
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
173-
; CHECK-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
174+
; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
175+
; CHECK-NEXT: [[IV:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
176+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[IV]], 10
177+
; CHECK-NEXT: br i1 [[TMP3]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
174178
; CHECK: loopinc:
175179
; CHECK-NEXT: [[INC:%.*]] = add i32 [[IV]], 1
176-
; CHECK-NEXT: store i32 [[INC]], ptr [[IV_PTR]], align 4
180+
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
181+
; CHECK-NEXT: store i32 [[INC]], ptr addrspace(5) [[TMP4]], align 4
177182
; CHECK-NEXT: br label [[LOOPBODY]]
178183
; CHECK: exit:
179184
; CHECK-NEXT: ret void
@@ -183,17 +188,22 @@ define internal void @convert_and_move_alloca() {
183188
; CHECK-DISABLED-NEXT: entry:
184189
; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
185190
; CHECK-DISABLED-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
191+
; CHECK-DISABLED-NEXT: [[UB_PTR:%.*]] = alloca i32, align 4
192+
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[UB_PTR]] to ptr addrspace(5)
186193
; CHECK-DISABLED-NEXT: br label [[INITLOOP:%.*]]
187194
; CHECK-DISABLED: initloop:
188-
; CHECK-DISABLED-NEXT: store i32 0, ptr [[IV_PTR]], align 4
195+
; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
196+
; CHECK-DISABLED-NEXT: store i32 0, ptr addrspace(5) [[TMP1]], align 4
189197
; CHECK-DISABLED-NEXT: br label [[LOOPBODY:%.*]]
190198
; CHECK-DISABLED: loopbody:
191-
; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
192-
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
193-
; CHECK-DISABLED-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
199+
; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
200+
; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
201+
; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[IV]], 10
202+
; CHECK-DISABLED-NEXT: br i1 [[TMP3]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
194203
; CHECK-DISABLED: loopinc:
195204
; CHECK-DISABLED-NEXT: [[INC:%.*]] = add i32 [[IV]], 1
196-
; CHECK-DISABLED-NEXT: store i32 [[INC]], ptr [[IV_PTR]], align 4
205+
; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
206+
; CHECK-DISABLED-NEXT: store i32 [[INC]], ptr addrspace(5) [[TMP4]], align 4
197207
; CHECK-DISABLED-NEXT: br label [[LOOPBODY]]
198208
; CHECK-DISABLED: exit:
199209
; CHECK-DISABLED-NEXT: ret void

0 commit comments

Comments
 (0)