@@ -139,3 +139,87 @@ define amdgpu_kernel void @select_ptr_crash_i64_local_offsets(i32 %tmp, ptr addr
139139 store i64 %tmp5 , ptr addrspace (1 ) %ptr2 , align 8
140140 ret void
141141}
142+
143+ ; The resultant load cannot be treated as uniform
144+ define amdgpu_kernel void @sample_test (ptr addrspace (1 ) %dest , ptr addrspace (1 ) %sourceA , ptr addrspace (1 ) %sourceB , i1 %tobool.not.i ) #0 {
145+ ; GCN-LABEL: sample_test:
146+ ; GCN: ; %bb.0: ; %entry
147+ ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
148+ ; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
149+ ; GCN-NEXT: v_lshlrev_b32_e32 v2, 3, v0
150+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
151+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
152+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v2
153+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
154+ ; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
155+ ; GCN-NEXT: s_load_dword s2, s[4:5], 0x18
156+ ; GCN-NEXT: v_mov_b32_e32 v3, s1
157+ ; GCN-NEXT: v_add_u32_e32 v2, vcc, s0, v2
158+ ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
159+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
160+ ; GCN-NEXT: s_bitcmp1_b32 s2, 0
161+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
162+ ; GCN-NEXT: s_cselect_b64 vcc, -1, 0
163+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
164+ ; GCN-NEXT: v_mov_b32_e32 v4, s3
165+ ; GCN-NEXT: v_mov_b32_e32 v5, s2
166+ ; GCN-NEXT: s_waitcnt vmcnt(0)
167+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
168+ ; GCN-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
169+ ; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
170+ ; GCN-NEXT: s_endpgm
171+ entry:
172+ %0 = tail call i32 @llvm.amdgcn.workitem.id.x ()
173+ %conv2.i.i.i1 = zext i32 %0 to i64
174+ %arrayidx.i = getelementptr i64 , ptr addrspace (1 ) %sourceA , i64 %conv2.i.i.i1
175+ %dest.gep = getelementptr i64 , ptr addrspace (1 ) %dest , i64 %conv2.i.i.i1
176+ %ld0 = load i64 , ptr addrspace (1 ) %arrayidx.i , align 8 , !amdgpu.noclobber !0
177+ %ld1 = load i64 , ptr addrspace (1 ) %sourceB , align 8
178+ %cond.i = select i1 %tobool.not.i , i64 %ld0 , i64 %ld1
179+ store i64 %cond.i , ptr addrspace (1 ) %dest.gep , align 8
180+ ret void
181+ }
182+
183+ ; The resultant load cannot be treated as uniform
184+ define amdgpu_kernel void @constant_is_not_uniform (ptr addrspace (1 ) %dest , ptr addrspace (4 ) %sourceA , ptr addrspace (4 ) %sourceB , i1 %tobool.not.i ) #0 {
185+ ; GCN-LABEL: constant_is_not_uniform:
186+ ; GCN: ; %bb.0: ; %entry
187+ ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
188+ ; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
189+ ; GCN-NEXT: v_lshlrev_b32_e32 v2, 3, v0
190+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
191+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
192+ ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v2
193+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
194+ ; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
195+ ; GCN-NEXT: s_load_dword s2, s[4:5], 0x18
196+ ; GCN-NEXT: v_mov_b32_e32 v3, s1
197+ ; GCN-NEXT: v_add_u32_e32 v2, vcc, s0, v2
198+ ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
199+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
200+ ; GCN-NEXT: s_bitcmp1_b32 s2, 0
201+ ; GCN-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
202+ ; GCN-NEXT: s_cselect_b64 vcc, -1, 0
203+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
204+ ; GCN-NEXT: v_mov_b32_e32 v4, s3
205+ ; GCN-NEXT: v_mov_b32_e32 v5, s2
206+ ; GCN-NEXT: s_waitcnt vmcnt(0)
207+ ; GCN-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
208+ ; GCN-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
209+ ; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
210+ ; GCN-NEXT: s_endpgm
211+ entry:
212+ %0 = tail call i32 @llvm.amdgcn.workitem.id.x ()
213+ %conv2.i.i.i1 = zext i32 %0 to i64
214+ %arrayidx.i = getelementptr i64 , ptr addrspace (4 ) %sourceA , i64 %conv2.i.i.i1
215+ %dest.gep = getelementptr i64 , ptr addrspace (1 ) %dest , i64 %conv2.i.i.i1
216+ %ld0 = load i64 , ptr addrspace (4 ) %arrayidx.i , align 8
217+ %ld1 = load i64 , ptr addrspace (4 ) %sourceB , align 8
218+ %cond.i = select i1 %tobool.not.i , i64 %ld0 , i64 %ld1
219+ store i64 %cond.i , ptr addrspace (1 ) %dest.gep , align 8
220+ ret void
221+ }
222+
223+ attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
224+
225+ !0 = !{}
0 commit comments