@@ -34,10 +34,11 @@ int foo() {
3434}
3535
3636void bar () {
37- #pragma omp target
38- ;
3937#pragma omp parallel
4038 ;
39+ #pragma omp parallel for
40+ for (int i = 0 ; i < 1 ; ++i)
41+ ;
4142}
4243
4344void baz (int *p) {
@@ -56,6 +57,7 @@ int qux() {
5657// AMDGCN: @c = addrspace(4) constant i32 0, align 4
5758// AMDGCN: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
5859// AMDGCN: @[[GLOB1:[0-9]+]] = private unnamed_addr addrspace(1) constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
60+ // AMDGCN: @[[GLOB2:[0-9]+]] = private unnamed_addr addrspace(1) constant %struct.ident_t { i32 0, i32 514, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
5961// AMDGCN: @d = global i32 0, align 4
6062// AMDGCN: @g = global i32 0, align 4
6163// AMDGCN: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
@@ -65,6 +67,7 @@ int qux() {
6567// NVPTX: @c = addrspace(4) constant i32 0, align 4
6668// NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
6769// NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
70+ // NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
6871// .
6972// AMDGCN-LABEL: define dso_local noundef i32 @_Z3foov(
7073// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -82,9 +85,12 @@ int qux() {
8285// AMDGCN-SAME: ) #[[ATTR0]] {
8386// AMDGCN-NEXT: [[ENTRY:.*:]]
8487// AMDGCN-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
88+ // AMDGCN-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
8589// AMDGCN-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
8690// AMDGCN-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
91+ // AMDGCN-NEXT: [[CAPTURED_VARS_ADDRS1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
8792// AMDGCN-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined, ptr @_Z3barv_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
93+ // AMDGCN-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined.1, ptr @_Z3barv_omp_outlined.1_wrapper, ptr [[CAPTURED_VARS_ADDRS1_ASCAST]], i64 0)
8894// AMDGCN-NEXT: ret void
8995//
9096//
@@ -119,6 +125,111 @@ int qux() {
119125// AMDGCN-NEXT: ret void
120126//
121127//
128+ // AMDGCN-LABEL: define internal void @_Z3barv_omp_outlined.1(
129+ // AMDGCN-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
130+ // AMDGCN-NEXT: [[ENTRY:.*:]]
131+ // AMDGCN-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
132+ // AMDGCN-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
133+ // AMDGCN-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
134+ // AMDGCN-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
135+ // AMDGCN-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
136+ // AMDGCN-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
137+ // AMDGCN-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
138+ // AMDGCN-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
139+ // AMDGCN-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
140+ // AMDGCN-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
141+ // AMDGCN-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
142+ // AMDGCN-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
143+ // AMDGCN-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
144+ // AMDGCN-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
145+ // AMDGCN-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
146+ // AMDGCN-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
147+ // AMDGCN-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
148+ // AMDGCN-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
149+ // AMDGCN-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
150+ // AMDGCN-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
151+ // AMDGCN-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
152+ // AMDGCN-NEXT: store i32 0, ptr [[DOTOMP_UB_ASCAST]], align 4
153+ // AMDGCN-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
154+ // AMDGCN-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
155+ // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
156+ // AMDGCN-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
157+ // AMDGCN-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
158+ // AMDGCN-NEXT: br label %[[OMP_DISPATCH_COND:.*]]
159+ // AMDGCN: [[OMP_DISPATCH_COND]]:
160+ // AMDGCN-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
161+ // AMDGCN-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0
162+ // AMDGCN-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
163+ // AMDGCN: [[COND_TRUE]]:
164+ // AMDGCN-NEXT: br label %[[COND_END:.*]]
165+ // AMDGCN: [[COND_FALSE]]:
166+ // AMDGCN-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
167+ // AMDGCN-NEXT: br label %[[COND_END]]
168+ // AMDGCN: [[COND_END]]:
169+ // AMDGCN-NEXT: [[COND:%.*]] = phi i32 [ 0, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ]
170+ // AMDGCN-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
171+ // AMDGCN-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
172+ // AMDGCN-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
173+ // AMDGCN-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
174+ // AMDGCN-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
175+ // AMDGCN-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
176+ // AMDGCN-NEXT: br i1 [[CMP1]], label %[[OMP_DISPATCH_BODY:.*]], label %[[OMP_DISPATCH_END:.*]]
177+ // AMDGCN: [[OMP_DISPATCH_BODY]]:
178+ // AMDGCN-NEXT: br label %[[OMP_INNER_FOR_COND:.*]]
179+ // AMDGCN: [[OMP_INNER_FOR_COND]]:
180+ // AMDGCN-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
181+ // AMDGCN-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
182+ // AMDGCN-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
183+ // AMDGCN-NEXT: br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
184+ // AMDGCN: [[OMP_INNER_FOR_BODY]]:
185+ // AMDGCN-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
186+ // AMDGCN-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
187+ // AMDGCN-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
188+ // AMDGCN-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
189+ // AMDGCN-NEXT: br label %[[OMP_BODY_CONTINUE:.*]]
190+ // AMDGCN: [[OMP_BODY_CONTINUE]]:
191+ // AMDGCN-NEXT: br label %[[OMP_INNER_FOR_INC:.*]]
192+ // AMDGCN: [[OMP_INNER_FOR_INC]]:
193+ // AMDGCN-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
194+ // AMDGCN-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
195+ // AMDGCN-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
196+ // AMDGCN-NEXT: br label %[[OMP_INNER_FOR_COND]]
197+ // AMDGCN: [[OMP_INNER_FOR_END]]:
198+ // AMDGCN-NEXT: br label %[[OMP_DISPATCH_INC:.*]]
199+ // AMDGCN: [[OMP_DISPATCH_INC]]:
200+ // AMDGCN-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
201+ // AMDGCN-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
202+ // AMDGCN-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
203+ // AMDGCN-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
204+ // AMDGCN-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
205+ // AMDGCN-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
206+ // AMDGCN-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
207+ // AMDGCN-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
208+ // AMDGCN-NEXT: br label %[[OMP_DISPATCH_COND]]
209+ // AMDGCN: [[OMP_DISPATCH_END]]:
210+ // AMDGCN-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP1]])
211+ // AMDGCN-NEXT: ret void
212+ //
213+ //
214+ // AMDGCN-LABEL: define internal void @_Z3barv_omp_outlined.1_wrapper(
215+ // AMDGCN-SAME: i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
216+ // AMDGCN-NEXT: [[ENTRY:.*:]]
217+ // AMDGCN-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
218+ // AMDGCN-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
219+ // AMDGCN-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
220+ // AMDGCN-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
221+ // AMDGCN-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
222+ // AMDGCN-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
223+ // AMDGCN-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
224+ // AMDGCN-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
225+ // AMDGCN-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
226+ // AMDGCN-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
227+ // AMDGCN-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
228+ // AMDGCN-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
229+ // AMDGCN-NEXT: call void @_Z3barv_omp_outlined.1(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR3]]
230+ // AMDGCN-NEXT: ret void
231+ //
232+ //
122233// AMDGCN-LABEL: define dso_local void @_Z3bazPi(
123234// AMDGCN-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
124235// AMDGCN-NEXT: [[ENTRY:.*:]]
@@ -151,8 +262,10 @@ int qux() {
151262// NVPTX-SAME: ) #[[ATTR0]] {
152263// NVPTX-NEXT: [[ENTRY:.*:]]
153264// NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
265+ // NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
154266// NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
155267// NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined, ptr @_Z3barv_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
268+ // NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined1, ptr @_Z3barv_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
156269// NVPTX-NEXT: ret void
157270//
158271//
@@ -181,6 +294,98 @@ int qux() {
181294// NVPTX-NEXT: ret void
182295//
183296//
297+ // NVPTX-LABEL: define internal void @_Z3barv_omp_outlined1(
298+ // NVPTX-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
299+ // NVPTX-NEXT: [[ENTRY:.*:]]
300+ // NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
301+ // NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
302+ // NVPTX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
303+ // NVPTX-NEXT: [[TMP:%.*]] = alloca i32, align 4
304+ // NVPTX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
305+ // NVPTX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
306+ // NVPTX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
307+ // NVPTX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
308+ // NVPTX-NEXT: [[I:%.*]] = alloca i32, align 4
309+ // NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
310+ // NVPTX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
311+ // NVPTX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
312+ // NVPTX-NEXT: store i32 0, ptr [[DOTOMP_UB]], align 4
313+ // NVPTX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
314+ // NVPTX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
315+ // NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
316+ // NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
317+ // NVPTX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
318+ // NVPTX-NEXT: br label %[[OMP_DISPATCH_COND:.*]]
319+ // NVPTX: [[OMP_DISPATCH_COND]]:
320+ // NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
321+ // NVPTX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0
322+ // NVPTX-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
323+ // NVPTX: [[COND_TRUE]]:
324+ // NVPTX-NEXT: br label %[[COND_END:.*]]
325+ // NVPTX: [[COND_FALSE]]:
326+ // NVPTX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
327+ // NVPTX-NEXT: br label %[[COND_END]]
328+ // NVPTX: [[COND_END]]:
329+ // NVPTX-NEXT: [[COND:%.*]] = phi i32 [ 0, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ]
330+ // NVPTX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
331+ // NVPTX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
332+ // NVPTX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
333+ // NVPTX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
334+ // NVPTX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
335+ // NVPTX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
336+ // NVPTX-NEXT: br i1 [[CMP1]], label %[[OMP_DISPATCH_BODY:.*]], label %[[OMP_DISPATCH_END:.*]]
337+ // NVPTX: [[OMP_DISPATCH_BODY]]:
338+ // NVPTX-NEXT: br label %[[OMP_INNER_FOR_COND:.*]]
339+ // NVPTX: [[OMP_INNER_FOR_COND]]:
340+ // NVPTX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
341+ // NVPTX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
342+ // NVPTX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
343+ // NVPTX-NEXT: br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]]
344+ // NVPTX: [[OMP_INNER_FOR_BODY]]:
345+ // NVPTX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
346+ // NVPTX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
347+ // NVPTX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
348+ // NVPTX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
349+ // NVPTX-NEXT: br label %[[OMP_BODY_CONTINUE:.*]]
350+ // NVPTX: [[OMP_BODY_CONTINUE]]:
351+ // NVPTX-NEXT: br label %[[OMP_INNER_FOR_INC:.*]]
352+ // NVPTX: [[OMP_INNER_FOR_INC]]:
353+ // NVPTX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
354+ // NVPTX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
355+ // NVPTX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
356+ // NVPTX-NEXT: br label %[[OMP_INNER_FOR_COND]]
357+ // NVPTX: [[OMP_INNER_FOR_END]]:
358+ // NVPTX-NEXT: br label %[[OMP_DISPATCH_INC:.*]]
359+ // NVPTX: [[OMP_DISPATCH_INC]]:
360+ // NVPTX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
361+ // NVPTX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
362+ // NVPTX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
363+ // NVPTX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
364+ // NVPTX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
365+ // NVPTX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
366+ // NVPTX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
367+ // NVPTX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
368+ // NVPTX-NEXT: br label %[[OMP_DISPATCH_COND]]
369+ // NVPTX: [[OMP_DISPATCH_END]]:
370+ // NVPTX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
371+ // NVPTX-NEXT: ret void
372+ //
373+ //
374+ // NVPTX-LABEL: define internal void @_Z3barv_omp_outlined1_wrapper(
375+ // NVPTX-SAME: i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
376+ // NVPTX-NEXT: [[ENTRY:.*:]]
377+ // NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
378+ // NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
379+ // NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
380+ // NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
381+ // NVPTX-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
382+ // NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
383+ // NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
384+ // NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
385+ // NVPTX-NEXT: call void @_Z3barv_omp_outlined1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
386+ // NVPTX-NEXT: ret void
387+ //
388+ //
184389// NVPTX-LABEL: define dso_local void @_Z3bazPi(
185390// NVPTX-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
186391// NVPTX-NEXT: [[ENTRY:.*:]]
0 commit comments