@@ -292,6 +292,205 @@ bb.2.end:
292292 %atomicrmw.umin = atomicrmw volatile umin ptr %ptr2 , i32 22 syncscope("singlethread" ) monotonic , align 4
293293 ret void
294294}
295+
296+ define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg_flat (ptr %c , i1 %cond1 , i1 %cond2 , i32 %val , i32 %offset ) #0 {
297+ ; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select_cmpxchg_flat(
298+ ; CHECK-SAME: ptr [[C:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
299+ ; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
300+ ; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
301+ ; CHECK-NEXT: [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
302+ ; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
303+ ; CHECK-NEXT: [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
304+ ; CHECK-NEXT: [[CMPXCHG_0:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 4 monotonic monotonic, align 4
305+ ; CHECK-NEXT: [[CMPXCHG_1:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 5 acq_rel monotonic, align 4
306+ ; CHECK-NEXT: [[CMPXCHG_2:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 6 acquire monotonic, align 4
307+ ; CHECK-NEXT: [[CMPXCHG_3:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 7 release monotonic, align 4
308+ ; CHECK-NEXT: [[CMPXCHG_4:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 8 seq_cst monotonic, align 4
309+ ; CHECK-NEXT: [[CMPXCHG_5:%.*]] = cmpxchg weak ptr [[PTR2]], i32 0, i32 9 seq_cst monotonic, align 4
310+ ; CHECK-NEXT: [[CMPXCHG_6:%.*]] = cmpxchg volatile ptr [[PTR2]], i32 0, i32 10 seq_cst monotonic, align 4
311+ ; CHECK-NEXT: [[CMPXCHG_7:%.*]] = cmpxchg weak volatile ptr [[PTR2]], i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4
312+ ; CHECK-NEXT: ret void
313+ ;
314+ %lptr = alloca i32 , align 4 , addrspace (5 )
315+ %a = addrspacecast ptr addrspace (1 ) @gptr to ptr
316+ %b = addrspacecast ptr addrspace (5 ) %lptr to ptr
317+ %add_a = getelementptr inbounds i8 , ptr %a , i32 %offset
318+ %ptr = select i1 %cond1 , ptr %add_a , ptr %b
319+ %ptr2 = select i1 %cond2 , ptr %ptr , ptr %c
320+ %cmpxchg.0 = cmpxchg ptr %ptr2 , i32 0 , i32 4 monotonic monotonic , align 4
321+ %cmpxchg.1 = cmpxchg ptr %ptr2 , i32 0 , i32 5 acq_rel monotonic , align 4
322+ %cmpxchg.2 = cmpxchg ptr %ptr2 , i32 0 , i32 6 acquire monotonic , align 4
323+ %cmpxchg.3 = cmpxchg ptr %ptr2 , i32 0 , i32 7 release monotonic , align 4
324+ %cmpxchg.4 = cmpxchg ptr %ptr2 , i32 0 , i32 8 seq_cst monotonic , align 4
325+ %cmpxchg.5 = cmpxchg weak ptr %ptr2 , i32 0 , i32 9 seq_cst monotonic , align 4
326+ %cmpxchg.6 = cmpxchg volatile ptr %ptr2 , i32 0 , i32 10 seq_cst monotonic , align 4
327+ %cmpxchg.7 = cmpxchg weak volatile ptr %ptr2 , i32 0 , i32 11 syncscope("singlethread" ) seq_cst monotonic , align 4
328+ ret void
329+ }
330+
331+ define amdgpu_kernel void @no_alias_addr_space_branch_cmpxchg_flat (ptr %c , i1 %cond1 , i1 %cond2 , i32 %val , i32 %offset ) #0 {
332+ ; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch_cmpxchg_flat(
333+ ; CHECK-SAME: ptr [[C:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
334+ ; CHECK-NEXT: br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
335+ ; CHECK: [[BB_1_TRUE]]:
336+ ; CHECK-NEXT: [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
337+ ; CHECK-NEXT: br label %[[BB_1_END:.*]]
338+ ; CHECK: [[BB_1_FALSE]]:
339+ ; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
340+ ; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
341+ ; CHECK-NEXT: br label %[[BB_1_END]]
342+ ; CHECK: [[BB_1_END]]:
343+ ; CHECK-NEXT: [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
344+ ; CHECK-NEXT: br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
345+ ; CHECK: [[BB_2_TRUE]]:
346+ ; CHECK-NEXT: br label %[[BB_2_END]]
347+ ; CHECK: [[BB_2_END]]:
348+ ; CHECK-NEXT: [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
349+ ; CHECK-NEXT: [[CMPXCHG_0:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 4 monotonic monotonic, align 4
350+ ; CHECK-NEXT: [[CMPXCHG_1:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 5 acq_rel monotonic, align 4
351+ ; CHECK-NEXT: [[CMPXCHG_2:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 6 acquire monotonic, align 4
352+ ; CHECK-NEXT: [[CMPXCHG_3:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 7 release monotonic, align 4
353+ ; CHECK-NEXT: [[CMPXCHG_4:%.*]] = cmpxchg ptr [[PTR2]], i32 0, i32 8 seq_cst monotonic, align 4
354+ ; CHECK-NEXT: [[CMPXCHG_5:%.*]] = cmpxchg weak ptr [[PTR2]], i32 0, i32 9 seq_cst monotonic, align 4
355+ ; CHECK-NEXT: [[CMPXCHG_6:%.*]] = cmpxchg volatile ptr [[PTR2]], i32 0, i32 10 seq_cst monotonic, align 4
356+ ; CHECK-NEXT: [[CMPXCHG_7:%.*]] = cmpxchg weak volatile ptr [[PTR2]], i32 0, i32 11 syncscope("singlethread") seq_cst monotonic, align 4
357+ ; CHECK-NEXT: ret void
358+ ;
359+ br i1 %cond1 , label %bb.1.true , label %bb.1.false
360+ bb.1 .true:
361+ %a = addrspacecast ptr addrspace (1 ) @gptr to ptr
362+ br label %bb.1.end
363+
364+ bb.1 .false:
365+ %lptr = alloca i32 , align 4 , addrspace (5 )
366+ %b = addrspacecast ptr addrspace (5 ) %lptr to ptr
367+ br label %bb.1.end
368+
369+ bb.1 .end:
370+ %ptr1 = phi ptr [ %a , %bb.1.true ], [ %b , %bb.1.false ]
371+ br i1 %cond2 , label %bb.2.true , label %bb.2.end
372+
373+ bb.2 .true:
374+ br label %bb.2.end
375+
376+ bb.2 .end:
377+ %ptr2 = phi ptr [ %ptr1 , %bb.1.end ], [ %c , %bb.2.true ]
378+ %cmpxchg.0 = cmpxchg ptr %ptr2 , i32 0 , i32 4 monotonic monotonic , align 4
379+ %cmpxchg.1 = cmpxchg ptr %ptr2 , i32 0 , i32 5 acq_rel monotonic , align 4
380+ %cmpxchg.2 = cmpxchg ptr %ptr2 , i32 0 , i32 6 acquire monotonic , align 4
381+ %cmpxchg.3 = cmpxchg ptr %ptr2 , i32 0 , i32 7 release monotonic , align 4
382+ %cmpxchg.4 = cmpxchg ptr %ptr2 , i32 0 , i32 8 seq_cst monotonic , align 4
383+ %cmpxchg.5 = cmpxchg weak ptr %ptr2 , i32 0 , i32 9 seq_cst monotonic , align 4
384+ %cmpxchg.6 = cmpxchg volatile ptr %ptr2 , i32 0 , i32 10 seq_cst monotonic , align 4
385+ %cmpxchg.7 = cmpxchg weak volatile ptr %ptr2 , i32 0 , i32 11 syncscope("singlethread" ) seq_cst monotonic , align 4
386+ ret void
387+ }
388+
389+ define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw_flat (ptr %c , i1 %cond1 , i1 %cond2 , i32 %val , i32 %offset ) #0 {
390+ ; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_select_atomicrmw_flat(
391+ ; CHECK-SAME: ptr [[C:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
392+ ; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
393+ ; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
394+ ; CHECK-NEXT: [[ADD_A:%.*]] = getelementptr inbounds i8, ptr addrspacecast (ptr addrspace(1) @gptr to ptr), i32 [[OFFSET]]
395+ ; CHECK-NEXT: [[PTR:%.*]] = select i1 [[COND1]], ptr [[ADD_A]], ptr [[B]]
396+ ; CHECK-NEXT: [[PTR2:%.*]] = select i1 [[COND2]], ptr [[PTR]], ptr [[C]]
397+ ; CHECK-NEXT: [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[PTR2]], i32 12 monotonic, align 4
398+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[PTR2]], i32 13 monotonic, align 4
399+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[PTR2]], i32 14 monotonic, align 4
400+ ; CHECK-NEXT: [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[PTR2]], i32 15 monotonic, align 4
401+ ; CHECK-NEXT: [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[PTR2]], i32 16 monotonic, align 4
402+ ; CHECK-NEXT: [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[PTR2]], i32 17 monotonic, align 4
403+ ; CHECK-NEXT: [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[PTR2]], i32 18 monotonic, align 4
404+ ; CHECK-NEXT: [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[PTR2]], i32 19 monotonic, align 4
405+ ; CHECK-NEXT: [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[PTR2]], i32 20 monotonic, align 4
406+ ; CHECK-NEXT: [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[PTR2]], i32 21 syncscope("singlethread") monotonic, align 4
407+ ; CHECK-NEXT: [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR2]], i32 22 syncscope("singlethread") monotonic, align 4
408+ ; CHECK-NEXT: ret void
409+ ;
410+ %lptr = alloca i32 , align 4 , addrspace (5 )
411+ %a = addrspacecast ptr addrspace (1 ) @gptr to ptr
412+ %b = addrspacecast ptr addrspace (5 ) %lptr to ptr
413+ %add_a = getelementptr inbounds i8 , ptr %a , i32 %offset
414+ %ptr = select i1 %cond1 , ptr %add_a , ptr %b
415+ %ptr2 = select i1 %cond2 , ptr %ptr , ptr %c
416+ %atomicrmw.xchg = atomicrmw xchg ptr %ptr2 , i32 12 monotonic , align 4
417+ %atomicrmw.add = atomicrmw add ptr %ptr2 , i32 13 monotonic , align 4
418+ %atomicrmw.sub = atomicrmw sub ptr %ptr2 , i32 14 monotonic , align 4
419+ %atomicrmw.and = atomicrmw and ptr %ptr2 , i32 15 monotonic , align 4
420+ %atomicrmw.nand = atomicrmw nand ptr %ptr2 , i32 16 monotonic , align 4
421+ %atomicrmw.or = atomicrmw or ptr %ptr2 , i32 17 monotonic , align 4
422+ %atomicrmw.xor = atomicrmw xor ptr %ptr2 , i32 18 monotonic , align 4
423+ %atomicrmw.max = atomicrmw max ptr %ptr2 , i32 19 monotonic , align 4
424+ %atomicrmw.min = atomicrmw volatile min ptr %ptr2 , i32 20 monotonic , align 4
425+ %atomicrmw.umax = atomicrmw umax ptr %ptr2 , i32 21 syncscope("singlethread" ) monotonic , align 4
426+ %atomicrmw.umin = atomicrmw volatile umin ptr %ptr2 , i32 22 syncscope("singlethread" ) monotonic , align 4
427+ ret void
428+ }
429+
430+ define amdgpu_kernel void @no_alias_addr_space_branch_atomicrmw_flat (ptr %c , i1 %cond1 , i1 %cond2 , i32 %val , i32 %offset ) #0 {
431+ ; CHECK-LABEL: define amdgpu_kernel void @no_alias_addr_space_branch_atomicrmw_flat(
432+ ; CHECK-SAME: ptr [[C:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0]] {
433+ ; CHECK-NEXT: br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
434+ ; CHECK: [[BB_1_TRUE]]:
435+ ; CHECK-NEXT: [[A:%.*]] = addrspacecast ptr addrspace(1) @gptr to ptr
436+ ; CHECK-NEXT: br label %[[BB_1_END:.*]]
437+ ; CHECK: [[BB_1_FALSE]]:
438+ ; CHECK-NEXT: [[LPTR:%.*]] = alloca i32, align 4, addrspace(5)
439+ ; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(5) [[LPTR]] to ptr
440+ ; CHECK-NEXT: br label %[[BB_1_END]]
441+ ; CHECK: [[BB_1_END]]:
442+ ; CHECK-NEXT: [[PTR1:%.*]] = phi ptr [ [[A]], %[[BB_1_TRUE]] ], [ [[B]], %[[BB_1_FALSE]] ]
443+ ; CHECK-NEXT: br i1 [[COND2]], label %[[BB_2_TRUE:.*]], label %[[BB_2_END:.*]]
444+ ; CHECK: [[BB_2_TRUE]]:
445+ ; CHECK-NEXT: br label %[[BB_2_END]]
446+ ; CHECK: [[BB_2_END]]:
447+ ; CHECK-NEXT: [[PTR2:%.*]] = phi ptr [ [[PTR1]], %[[BB_1_END]] ], [ [[C]], %[[BB_2_TRUE]] ]
448+ ; CHECK-NEXT: [[ATOMICRMW_XCHG:%.*]] = atomicrmw xchg ptr [[PTR2]], i32 12 monotonic, align 4
449+ ; CHECK-NEXT: [[ATOMICRMW_ADD:%.*]] = atomicrmw add ptr [[PTR2]], i32 13 monotonic, align 4
450+ ; CHECK-NEXT: [[ATOMICRMW_SUB:%.*]] = atomicrmw sub ptr [[PTR2]], i32 14 monotonic, align 4
451+ ; CHECK-NEXT: [[ATOMICRMW_AND:%.*]] = atomicrmw and ptr [[PTR2]], i32 15 monotonic, align 4
452+ ; CHECK-NEXT: [[ATOMICRMW_NAND:%.*]] = atomicrmw nand ptr [[PTR2]], i32 16 monotonic, align 4
453+ ; CHECK-NEXT: [[ATOMICRMW_OR:%.*]] = atomicrmw or ptr [[PTR2]], i32 17 monotonic, align 4
454+ ; CHECK-NEXT: [[ATOMICRMW_XOR:%.*]] = atomicrmw xor ptr [[PTR2]], i32 18 monotonic, align 4
455+ ; CHECK-NEXT: [[ATOMICRMW_MAX:%.*]] = atomicrmw max ptr [[PTR2]], i32 19 monotonic, align 4
456+ ; CHECK-NEXT: [[ATOMICRMW_MIN:%.*]] = atomicrmw volatile min ptr [[PTR2]], i32 20 monotonic, align 4
457+ ; CHECK-NEXT: [[ATOMICRMW_UMAX:%.*]] = atomicrmw umax ptr [[PTR2]], i32 21 syncscope("singlethread") monotonic, align 4
458+ ; CHECK-NEXT: [[ATOMICRMW_UMIN:%.*]] = atomicrmw volatile umin ptr [[PTR2]], i32 22 syncscope("singlethread") monotonic, align 4
459+ ; CHECK-NEXT: ret void
460+ ;
461+ br i1 %cond1 , label %bb.1.true , label %bb.1.false
462+ bb.1 .true:
463+ %a = addrspacecast ptr addrspace (1 ) @gptr to ptr
464+ br label %bb.1.end
465+
466+ bb.1 .false:
467+ %lptr = alloca i32 , align 4 , addrspace (5 )
468+ %b = addrspacecast ptr addrspace (5 ) %lptr to ptr
469+ br label %bb.1.end
470+
471+ bb.1 .end:
472+ %ptr1 = phi ptr [ %a , %bb.1.true ], [ %b , %bb.1.false ]
473+ br i1 %cond2 , label %bb.2.true , label %bb.2.end
474+
475+ bb.2 .true:
476+ br label %bb.2.end
477+
478+ bb.2 .end:
479+ %ptr2 = phi ptr [ %ptr1 , %bb.1.end ], [ %c , %bb.2.true ]
480+ %atomicrmw.xchg = atomicrmw xchg ptr %ptr2 , i32 12 monotonic , align 4
481+ %atomicrmw.add = atomicrmw add ptr %ptr2 , i32 13 monotonic , align 4
482+ %atomicrmw.sub = atomicrmw sub ptr %ptr2 , i32 14 monotonic , align 4
483+ %atomicrmw.and = atomicrmw and ptr %ptr2 , i32 15 monotonic , align 4
484+ %atomicrmw.nand = atomicrmw nand ptr %ptr2 , i32 16 monotonic , align 4
485+ %atomicrmw.or = atomicrmw or ptr %ptr2 , i32 17 monotonic , align 4
486+ %atomicrmw.xor = atomicrmw xor ptr %ptr2 , i32 18 monotonic , align 4
487+ %atomicrmw.max = atomicrmw max ptr %ptr2 , i32 19 monotonic , align 4
488+ %atomicrmw.min = atomicrmw volatile min ptr %ptr2 , i32 20 monotonic , align 4
489+ %atomicrmw.umax = atomicrmw umax ptr %ptr2 , i32 21 syncscope("singlethread" ) monotonic , align 4
490+ %atomicrmw.umin = atomicrmw volatile umin ptr %ptr2 , i32 22 syncscope("singlethread" ) monotonic , align 4
491+ ret void
492+ }
493+
295494;.
296495; CHECK: [[META0]] = !{i32 2, i32 3, i32 4, i32 5, i32 6, i32 10}
297496; CHECK: [[META1]] = !{i32 1, i32 5, i32 6, i32 7, i32 8, i32 10}
0 commit comments