@@ -85,8 +85,10 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
8585; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
8686; CHECK: region.guarded:
8787; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
88- ; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
89- ; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
88+ ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
89+ ; CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
90+ ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
91+ ; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
9092; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
9193; CHECK: region.guarded.end:
9294; CHECK-NEXT: br label [[REGION_BARRIER]]
@@ -107,16 +109,17 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
107109; CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
108110; CHECK-NEXT: br label [[REGION_CHECK_TID5:%.*]]
109111; CHECK: region.check.tid5:
110- ; CHECK-NEXT: [[TMP4 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
111- ; CHECK-NEXT: [[TMP5 :%.*]] = icmp eq i32 [[TMP4 ]], 0
112- ; CHECK-NEXT: br i1 [[TMP5 ]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
112+ ; CHECK-NEXT: [[TMP6 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
113+ ; CHECK-NEXT: [[TMP7 :%.*]] = icmp eq i32 [[TMP6 ]], 0
114+ ; CHECK-NEXT: br i1 [[TMP7 ]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
113115; CHECK: region.guarded4:
114- ; CHECK-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
116+ ; CHECK-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[ARRAYIDX5_I]] to ptr addrspace(1)
117+ ; CHECK-NEXT: store i32 [[SUB3_I]], ptr addrspace(1) [[TMP8]], align 4, !noalias [[META7]]
115118; CHECK-NEXT: br label [[REGION_GUARDED_END1:%.*]]
116119; CHECK: region.guarded.end1:
117120; CHECK-NEXT: br label [[REGION_BARRIER2]]
118121; CHECK: region.barrier2:
119- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP4 ]])
122+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6 ]])
120123; CHECK-NEXT: br label [[REGION_EXIT3]]
121124; CHECK: region.exit3:
122125; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
@@ -128,50 +131,53 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
128131; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
129132; CHECK-NEXT: br label [[REGION_CHECK_TID10:%.*]]
130133; CHECK: region.check.tid10:
131- ; CHECK-NEXT: [[TMP6 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
132- ; CHECK-NEXT: [[TMP7 :%.*]] = icmp eq i32 [[TMP6 ]], 0
133- ; CHECK-NEXT: br i1 [[TMP7 ]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
134+ ; CHECK-NEXT: [[TMP9 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
135+ ; CHECK-NEXT: [[TMP10 :%.*]] = icmp eq i32 [[TMP9 ]], 0
136+ ; CHECK-NEXT: br i1 [[TMP10 ]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
134137; CHECK: region.guarded9:
135- ; CHECK-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
138+ ; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[ARRAYIDX7_I]] to ptr addrspace(1)
139+ ; CHECK-NEXT: store i32 [[CALL_I]], ptr addrspace(1) [[TMP11]], align 4, !noalias [[META7]]
136140; CHECK-NEXT: br label [[REGION_GUARDED_END6:%.*]]
137141; CHECK: region.guarded.end6:
138142; CHECK-NEXT: br label [[REGION_BARRIER7]]
139143; CHECK: region.barrier7:
140- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6 ]])
144+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP9 ]])
141145; CHECK-NEXT: br label [[REGION_EXIT8:%.*]]
142146; CHECK: region.exit8:
143147; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
144148; CHECK-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
145149; CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
146150; CHECK-NEXT: br label [[REGION_CHECK_TID15:%.*]]
147151; CHECK: region.check.tid15:
148- ; CHECK-NEXT: [[TMP8 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
149- ; CHECK-NEXT: [[TMP9 :%.*]] = icmp eq i32 [[TMP8 ]], 0
150- ; CHECK-NEXT: br i1 [[TMP9 ]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
152+ ; CHECK-NEXT: [[TMP12 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
153+ ; CHECK-NEXT: [[TMP13 :%.*]] = icmp eq i32 [[TMP12 ]], 0
154+ ; CHECK-NEXT: br i1 [[TMP13 ]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
151155; CHECK: region.guarded14:
152- ; CHECK-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
156+ ; CHECK-NEXT: [[TMP14:%.*]] = addrspacecast ptr [[ARRAYIDX10_I]] to ptr addrspace(1)
157+ ; CHECK-NEXT: store i32 [[CALL8_I]], ptr addrspace(1) [[TMP14]], align 4, !noalias [[META7]]
153158; CHECK-NEXT: br label [[REGION_GUARDED_END11:%.*]]
154159; CHECK: region.guarded.end11:
155160; CHECK-NEXT: br label [[REGION_BARRIER12]]
156161; CHECK: region.barrier12:
157- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP8 ]])
162+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP12 ]])
158163; CHECK-NEXT: br label [[REGION_EXIT13:%.*]]
159164; CHECK: region.exit13:
160165; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
161166; CHECK-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
162167; CHECK-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
163168; CHECK-NEXT: br label [[REGION_CHECK_TID20:%.*]]
164169; CHECK: region.check.tid20:
165- ; CHECK-NEXT: [[TMP10 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
166- ; CHECK-NEXT: [[TMP11 :%.*]] = icmp eq i32 [[TMP10 ]], 0
167- ; CHECK-NEXT: br i1 [[TMP11 ]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
170+ ; CHECK-NEXT: [[TMP15 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
171+ ; CHECK-NEXT: [[TMP16 :%.*]] = icmp eq i32 [[TMP15 ]], 0
172+ ; CHECK-NEXT: br i1 [[TMP16 ]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
168173; CHECK: region.guarded19:
169- ; CHECK-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
174+ ; CHECK-NEXT: [[TMP17:%.*]] = addrspacecast ptr [[ARRAYIDX13_I]] to ptr addrspace(1)
175+ ; CHECK-NEXT: store i32 [[CALL11_I]], ptr addrspace(1) [[TMP17]], align 4, !noalias [[META7]]
170176; CHECK-NEXT: br label [[REGION_GUARDED_END16:%.*]]
171177; CHECK: region.guarded.end16:
172178; CHECK-NEXT: br label [[REGION_BARRIER17]]
173179; CHECK: region.barrier17:
174- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP10 ]])
180+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP15 ]])
175181; CHECK-NEXT: br label [[REGION_EXIT18:%.*]]
176182; CHECK: region.exit18:
177183; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
@@ -232,11 +238,13 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
232238; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
233239; CHECK-DISABLED-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
234240; CHECK-DISABLED-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
235- ; CHECK-DISABLED-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
241+ ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
242+ ; CHECK-DISABLED-NEXT: store i32 1, ptr addrspace(1) [[TMP2]], align 4, !noalias [[META7]]
236243; CHECK-DISABLED-NEXT: [[SEXT:%.*]] = shl i64 [[N]], 32
237244; CHECK-DISABLED-NEXT: [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
238245; CHECK-DISABLED-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
239- ; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
246+ ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
247+ ; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP3]], align 4, !noalias [[META7]]
240248; CHECK-DISABLED-NEXT: call void @usei8ptr(ptr captures(none) [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
241249; CHECK-DISABLED-NEXT: br label [[FOR_COND_I:%.*]]
242250; CHECK-DISABLED: for.cond.i:
@@ -248,23 +256,27 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
248256; CHECK-DISABLED-NEXT: [[SUB3_I:%.*]] = add nsw i32 [[I_0_I]], -1
249257; CHECK-DISABLED-NEXT: [[IDXPROM4_I:%.*]] = zext i32 [[I_0_I]] to i64
250258; CHECK-DISABLED-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
251- ; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
259+ ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX5_I]] to ptr addrspace(1)
260+ ; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
252261; CHECK-DISABLED-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
253262; CHECK-DISABLED-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]]
254263; CHECK-DISABLED: __omp_outlined__.exit:
255264; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr null, i64 0)
256265; CHECK-DISABLED-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META7]]
257266; CHECK-DISABLED-NEXT: [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64
258267; CHECK-DISABLED-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
259- ; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
268+ ; CHECK-DISABLED-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX7_I]] to ptr addrspace(1)
269+ ; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
260270; CHECK-DISABLED-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
261271; CHECK-DISABLED-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
262272; CHECK-DISABLED-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
263- ; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
273+ ; CHECK-DISABLED-NEXT: [[TMP6:%.*]] = addrspacecast ptr [[ARRAYIDX10_I]] to ptr addrspace(1)
274+ ; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr addrspace(1) [[TMP6]], align 4, !noalias [[META7]]
264275; CHECK-DISABLED-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
265276; CHECK-DISABLED-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
266277; CHECK-DISABLED-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
267- ; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
278+ ; CHECK-DISABLED-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[ARRAYIDX13_I]] to ptr addrspace(1)
279+ ; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr addrspace(1) [[TMP7]], align 4, !noalias [[META7]]
268280; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
269281; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
270282; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
0 commit comments