@@ -4539,6 +4539,111 @@ _Bool fi7e(_Bool *c) {
45394539 return __scoped_atomic_exchange_n (c , 1 , __ATOMIC_RELAXED ,
45404540 __MEMORY_SCOPE_SINGLE );
45414541}
4542+
4543+ // AMDGCN_CL_DEF-LABEL: define hidden void @fi8a(
4544+ // AMDGCN_CL_DEF-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] {
4545+ // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
4546+ // AMDGCN_CL_DEF-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
4547+ // AMDGCN_CL_DEF-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
4548+ // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
4549+ // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
4550+ // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
4551+ // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
4552+ // AMDGCN_CL_DEF-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
4553+ // AMDGCN_CL_DEF-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
4554+ // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
4555+ // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
4556+ // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
4557+ // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
4558+ // AMDGCN_CL_DEF-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
4559+ // AMDGCN_CL_DEF-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
4560+ // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
4561+ // AMDGCN_CL_DEF-NEXT: store i32 -1, ptr [[DOTATOMICTMP_ASCAST]], align 4
4562+ // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
4563+ // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i32 [[TMP1]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
4564+ // AMDGCN_CL_DEF-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
4565+ // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
4566+ // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
4567+ // AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4
4568+ // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
4569+ // AMDGCN_CL_DEF-NEXT: store i32 -1, ptr [[DOTATOMICTMP1_ASCAST]], align 4
4570+ // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
4571+ // AMDGCN_CL_DEF-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP5]], i32 [[TMP6]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
4572+ // AMDGCN_CL_DEF-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
4573+ // AMDGCN_CL_DEF-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
4574+ // AMDGCN_CL_DEF-NEXT: [[TMP9:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
4575+ // AMDGCN_CL_DEF-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4
4576+ // AMDGCN_CL_DEF-NEXT: ret void
4577+ //
4578+ // AMDGCN_CL_20-LABEL: define hidden void @fi8a(
4579+ // AMDGCN_CL_20-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] {
4580+ // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
4581+ // AMDGCN_CL_20-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
4582+ // AMDGCN_CL_20-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
4583+ // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
4584+ // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
4585+ // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5)
4586+ // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
4587+ // AMDGCN_CL_20-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
4588+ // AMDGCN_CL_20-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
4589+ // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
4590+ // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
4591+ // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
4592+ // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
4593+ // AMDGCN_CL_20-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
4594+ // AMDGCN_CL_20-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
4595+ // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
4596+ // AMDGCN_CL_20-NEXT: store i32 -1, ptr [[DOTATOMICTMP_ASCAST]], align 4
4597+ // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4
4598+ // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i32 [[TMP1]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]]
4599+ // AMDGCN_CL_20-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
4600+ // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4
4601+ // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
4602+ // AMDGCN_CL_20-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4
4603+ // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
4604+ // AMDGCN_CL_20-NEXT: store i32 -1, ptr [[DOTATOMICTMP1_ASCAST]], align 4
4605+ // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4
4606+ // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP5]], i32 [[TMP6]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]]
4607+ // AMDGCN_CL_20-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
4608+ // AMDGCN_CL_20-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
4609+ // AMDGCN_CL_20-NEXT: [[TMP9:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
4610+ // AMDGCN_CL_20-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4
4611+ // AMDGCN_CL_20-NEXT: ret void
4612+ //
4613+ // SPIRV-LABEL: define hidden spir_func void @fi8a(
4614+ // SPIRV-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] {
4615+ // SPIRV-NEXT: [[ENTRY:.*:]]
4616+ // SPIRV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
4617+ // SPIRV-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
4618+ // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4
4619+ // SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4
4620+ // SPIRV-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4
4621+ // SPIRV-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4
4622+ // SPIRV-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
4623+ // SPIRV-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
4624+ // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
4625+ // SPIRV-NEXT: store i32 -1, ptr [[DOTATOMICTMP]], align 4
4626+ // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4
4627+ // SPIRV-NEXT: [[TMP2:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i32 [[TMP1]] syncscope("device") monotonic, align 4
4628+ // SPIRV-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
4629+ // SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
4630+ // SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8
4631+ // SPIRV-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4
4632+ // SPIRV-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8
4633+ // SPIRV-NEXT: store i32 -1, ptr [[DOTATOMICTMP1]], align 4
4634+ // SPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1]], align 4
4635+ // SPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP5]], i32 [[TMP6]] syncscope("device") monotonic, align 4
4636+ // SPIRV-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2]], align 4
4637+ // SPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4
4638+ // SPIRV-NEXT: [[TMP9:%.*]] = load ptr, ptr [[A_ADDR]], align 8
4639+ // SPIRV-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4
4640+ // SPIRV-NEXT: ret void
4641+ //
4642+ void fi8a (unsigned int * a , unsigned int * b ) {
4643+ * b = __scoped_atomic_uinc_wrap (b , ~0U , __ATOMIC_RELAXED , __MEMORY_SCOPE_DEVICE );
4644+ * a = __scoped_atomic_udec_wrap (a , ~0U , __ATOMIC_RELAXED , __MEMORY_SCOPE_DEVICE );
4645+ }
4646+
45424647//.
45434648// AMDGCN_CL_DEF: [[META3]] = !{}
45444649//.
0 commit comments