@@ -85,10 +85,8 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
8585; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
8686; CHECK: region.guarded:
8787; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
88- ; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
89- ; CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
90- ; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
91- ; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
88+ ; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
89+ ; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
9290; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
9391; CHECK: region.guarded.end:
9492; CHECK-NEXT: br label [[REGION_BARRIER]]
@@ -109,17 +107,16 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
109107; CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
110108; CHECK-NEXT: br label [[REGION_CHECK_TID5:%.*]]
111109; CHECK: region.check.tid5:
112- ; CHECK-NEXT: [[TMP6 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
113- ; CHECK-NEXT: [[TMP7 :%.*]] = icmp eq i32 [[TMP6 ]], 0
114- ; CHECK-NEXT: br i1 [[TMP7 ]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
110+ ; CHECK-NEXT: [[TMP4 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
111+ ; CHECK-NEXT: [[TMP5 :%.*]] = icmp eq i32 [[TMP4 ]], 0
112+ ; CHECK-NEXT: br i1 [[TMP5 ]], label [[REGION_GUARDED4:%.*]], label [[REGION_BARRIER2:%.*]]
115113; CHECK: region.guarded4:
116- ; CHECK-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[ARRAYIDX5_I]] to ptr addrspace(1)
117- ; CHECK-NEXT: store i32 [[SUB3_I]], ptr addrspace(1) [[TMP8]], align 4, !noalias [[META7]]
114+ ; CHECK-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
118115; CHECK-NEXT: br label [[REGION_GUARDED_END1:%.*]]
119116; CHECK: region.guarded.end1:
120117; CHECK-NEXT: br label [[REGION_BARRIER2]]
121118; CHECK: region.barrier2:
122- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6 ]])
119+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP4 ]])
123120; CHECK-NEXT: br label [[REGION_EXIT3]]
124121; CHECK: region.exit3:
125122; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
@@ -131,53 +128,50 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
131128; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
132129; CHECK-NEXT: br label [[REGION_CHECK_TID10:%.*]]
133130; CHECK: region.check.tid10:
134- ; CHECK-NEXT: [[TMP9 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
135- ; CHECK-NEXT: [[TMP10 :%.*]] = icmp eq i32 [[TMP9 ]], 0
136- ; CHECK-NEXT: br i1 [[TMP10 ]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
131+ ; CHECK-NEXT: [[TMP6 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
132+ ; CHECK-NEXT: [[TMP7 :%.*]] = icmp eq i32 [[TMP6 ]], 0
133+ ; CHECK-NEXT: br i1 [[TMP7 ]], label [[REGION_GUARDED9:%.*]], label [[REGION_BARRIER7:%.*]]
137134; CHECK: region.guarded9:
138- ; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[ARRAYIDX7_I]] to ptr addrspace(1)
139- ; CHECK-NEXT: store i32 [[CALL_I]], ptr addrspace(1) [[TMP11]], align 4, !noalias [[META7]]
135+ ; CHECK-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
140136; CHECK-NEXT: br label [[REGION_GUARDED_END6:%.*]]
141137; CHECK: region.guarded.end6:
142138; CHECK-NEXT: br label [[REGION_BARRIER7]]
143139; CHECK: region.barrier7:
144- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP9 ]])
140+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP6 ]])
145141; CHECK-NEXT: br label [[REGION_EXIT8:%.*]]
146142; CHECK: region.exit8:
147143; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
148144; CHECK-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
149145; CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
150146; CHECK-NEXT: br label [[REGION_CHECK_TID15:%.*]]
151147; CHECK: region.check.tid15:
152- ; CHECK-NEXT: [[TMP12 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
153- ; CHECK-NEXT: [[TMP13 :%.*]] = icmp eq i32 [[TMP12 ]], 0
154- ; CHECK-NEXT: br i1 [[TMP13 ]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
148+ ; CHECK-NEXT: [[TMP8 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
149+ ; CHECK-NEXT: [[TMP9 :%.*]] = icmp eq i32 [[TMP8 ]], 0
150+ ; CHECK-NEXT: br i1 [[TMP9 ]], label [[REGION_GUARDED14:%.*]], label [[REGION_BARRIER12:%.*]]
155151; CHECK: region.guarded14:
156- ; CHECK-NEXT: [[TMP14:%.*]] = addrspacecast ptr [[ARRAYIDX10_I]] to ptr addrspace(1)
157- ; CHECK-NEXT: store i32 [[CALL8_I]], ptr addrspace(1) [[TMP14]], align 4, !noalias [[META7]]
152+ ; CHECK-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
158153; CHECK-NEXT: br label [[REGION_GUARDED_END11:%.*]]
159154; CHECK: region.guarded.end11:
160155; CHECK-NEXT: br label [[REGION_BARRIER12]]
161156; CHECK: region.barrier12:
162- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP12 ]])
157+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP8 ]])
163158; CHECK-NEXT: br label [[REGION_EXIT13:%.*]]
164159; CHECK: region.exit13:
165160; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
166161; CHECK-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
167162; CHECK-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
168163; CHECK-NEXT: br label [[REGION_CHECK_TID20:%.*]]
169164; CHECK: region.check.tid20:
170- ; CHECK-NEXT: [[TMP15 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
171- ; CHECK-NEXT: [[TMP16 :%.*]] = icmp eq i32 [[TMP15 ]], 0
172- ; CHECK-NEXT: br i1 [[TMP16 ]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
165+ ; CHECK-NEXT: [[TMP10 :%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
166+ ; CHECK-NEXT: [[TMP11 :%.*]] = icmp eq i32 [[TMP10 ]], 0
167+ ; CHECK-NEXT: br i1 [[TMP11 ]], label [[REGION_GUARDED19:%.*]], label [[REGION_BARRIER17:%.*]]
173168; CHECK: region.guarded19:
174- ; CHECK-NEXT: [[TMP17:%.*]] = addrspacecast ptr [[ARRAYIDX13_I]] to ptr addrspace(1)
175- ; CHECK-NEXT: store i32 [[CALL11_I]], ptr addrspace(1) [[TMP17]], align 4, !noalias [[META7]]
169+ ; CHECK-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
176170; CHECK-NEXT: br label [[REGION_GUARDED_END16:%.*]]
177171; CHECK: region.guarded.end16:
178172; CHECK-NEXT: br label [[REGION_BARRIER17]]
179173; CHECK: region.barrier17:
180- ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP15 ]])
174+ ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP10 ]])
181175; CHECK-NEXT: br label [[REGION_EXIT18:%.*]]
182176; CHECK: region.exit18:
183177; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
@@ -238,13 +232,11 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
238232; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
239233; CHECK-DISABLED-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
240234; CHECK-DISABLED-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
241- ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
242- ; CHECK-DISABLED-NEXT: store i32 1, ptr addrspace(1) [[TMP2]], align 4, !noalias [[META7]]
235+ ; CHECK-DISABLED-NEXT: store i32 1, ptr [[ARRAYIDX1_I]], align 4, !noalias [[META7]]
243236; CHECK-DISABLED-NEXT: [[SEXT:%.*]] = shl i64 [[N]], 32
244237; CHECK-DISABLED-NEXT: [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
245238; CHECK-DISABLED-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
246- ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
247- ; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP3]], align 4, !noalias [[META7]]
239+ ; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARRAYIDX2_I]], align 4, !noalias [[META7]]
248240; CHECK-DISABLED-NEXT: call void @usei8ptr(ptr captures(none) [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
249241; CHECK-DISABLED-NEXT: br label [[FOR_COND_I:%.*]]
250242; CHECK-DISABLED: for.cond.i:
@@ -256,27 +248,23 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
256248; CHECK-DISABLED-NEXT: [[SUB3_I:%.*]] = add nsw i32 [[I_0_I]], -1
257249; CHECK-DISABLED-NEXT: [[IDXPROM4_I:%.*]] = zext i32 [[I_0_I]] to i64
258250; CHECK-DISABLED-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM4_I]]
259- ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX5_I]] to ptr addrspace(1)
260- ; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
251+ ; CHECK-DISABLED-NEXT: store i32 [[SUB3_I]], ptr [[ARRAYIDX5_I]], align 4, !noalias [[META7]]
261252; CHECK-DISABLED-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
262253; CHECK-DISABLED-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP10:![0-9]+]]
263254; CHECK-DISABLED: __omp_outlined__.exit:
264255; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr null, i64 0)
265256; CHECK-DISABLED-NEXT: [[CALL_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10:[0-9]+]], !noalias [[META7]]
266257; CHECK-DISABLED-NEXT: [[IDXPROM6_I:%.*]] = sext i32 [[CALL_I]] to i64
267258; CHECK-DISABLED-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM6_I]]
268- ; CHECK-DISABLED-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX7_I]] to ptr addrspace(1)
269- ; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
259+ ; CHECK-DISABLED-NEXT: store i32 [[CALL_I]], ptr [[ARRAYIDX7_I]], align 4, !noalias [[META7]]
270260; CHECK-DISABLED-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
271261; CHECK-DISABLED-NEXT: [[IDXPROM9_I:%.*]] = sext i32 [[CALL8_I]] to i64
272262; CHECK-DISABLED-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM9_I]]
273- ; CHECK-DISABLED-NEXT: [[TMP6:%.*]] = addrspacecast ptr [[ARRAYIDX10_I]] to ptr addrspace(1)
274- ; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr addrspace(1) [[TMP6]], align 4, !noalias [[META7]]
263+ ; CHECK-DISABLED-NEXT: store i32 [[CALL8_I]], ptr [[ARRAYIDX10_I]], align 4, !noalias [[META7]]
275264; CHECK-DISABLED-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
276265; CHECK-DISABLED-NEXT: [[IDXPROM12_I:%.*]] = sext i32 [[CALL11_I]] to i64
277266; CHECK-DISABLED-NEXT: [[ARRAYIDX13_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM12_I]]
278- ; CHECK-DISABLED-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[ARRAYIDX13_I]] to ptr addrspace(1)
279- ; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr addrspace(1) [[TMP7]], align 4, !noalias [[META7]]
267+ ; CHECK-DISABLED-NEXT: store i32 [[CALL11_I]], ptr [[ARRAYIDX13_I]], align 4, !noalias [[META7]]
280268; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
281269; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
282270; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias [[META7]]
0 commit comments