1- ; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
1+ ; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2+
3+ ; TODO: Test with flat scratch
24
35; GCN-LABEL: {{^}}store_fi_lifetime:
46; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
@@ -143,7 +145,7 @@ define amdgpu_kernel void @stored_fi_to_global_2_small_objects(ptr addrspace(1)
143145 ret void
144146}
145147
146- ; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset :
148+ ; GCN-LABEL: {{^}}kernel_stored_fi_to_global_huge_frame_offset :
147149; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
148150; GCN: buffer_store_dword [[BASE_0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
149151
@@ -158,7 +160,32 @@ define amdgpu_kernel void @stored_fi_to_global_2_small_objects(ptr addrspace(1)
158160; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
159161
160162; GCN: buffer_store_dword [[BASE_1_OFF_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
161- define amdgpu_kernel void @stored_fi_to_global_huge_frame_offset (ptr addrspace (1 ) %ptr ) #0 {
163+ define amdgpu_kernel void @kernel_stored_fi_to_global_huge_frame_offset (ptr addrspace (1 ) %ptr ) #0 {
164+ %tmp0 = alloca [4096 x i32 ], addrspace (5 )
165+ %tmp1 = alloca [4096 x i32 ], addrspace (5 )
166+ store volatile i32 0 , ptr addrspace (5 ) %tmp0
167+ %gep1.tmp0 = getelementptr [4096 x i32 ], ptr addrspace (5 ) %tmp0 , i32 0 , i32 4095
168+ store volatile i32 999 , ptr addrspace (5 ) %gep1.tmp0
169+ %gep0.tmp1 = getelementptr [4096 x i32 ], ptr addrspace (5 ) %tmp0 , i32 0 , i32 14
170+ store ptr addrspace (5 ) %gep0.tmp1 , ptr addrspace (1 ) %ptr
171+ ret void
172+ }
173+
174+ ; GCN-LABEL: {{^}}func_stored_fi_to_global_huge_frame_offset:
175+ ; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
176+ ; GCN: buffer_store_dword [[BASE_0]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:4{{$}}
177+
178+ ; GCN: v_lshr_b32_e64 [[FI_TMP:v[0-9]+]], s32, 6
179+ ; GCN: v_add_i32_e32 [[BASE_0_1:v[0-9]+]], vcc, 4, [[FI_TMP]]{{$}}
180+
181+ ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
182+ ; GCN-DAG: v_add_i32_e32 [[BASE_1_OFF_1:v[0-9]+]], vcc, 0x3ffc, [[BASE_0_1]]
183+
184+ ; GCN: v_add_i32_e32 [[BASE_1_OFF_2:v[0-9]+]], vcc, 56, [[BASE_0_1]]
185+ ; GCN: buffer_store_dword [[K]], [[BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
186+
187+ ; GCN: buffer_store_dword [[BASE_1_OFF_2]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
188+ define void @func_stored_fi_to_global_huge_frame_offset (ptr addrspace (1 ) %ptr ) #0 {
162189 %tmp0 = alloca [4096 x i32 ], addrspace (5 )
163190 %tmp1 = alloca [4096 x i32 ], addrspace (5 )
164191 store volatile i32 0 , ptr addrspace (5 ) %tmp0
@@ -190,6 +217,124 @@ entry:
190217 ret void
191218}
192219
220+ ; FIXME: This is broken, and the sgpr input just gets replaced with a VGPR
221+ ; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_sgpr:
222+ ; GCN: v_lshr_b32_e64 [[FI:v[0-9]+]], s32, 6
223+ ; GCN: ; use [[FI]]
224+ define void @func_alloca_offset0__use_asm_sgpr () {
225+ %alloca = alloca i32 , addrspace (5 )
226+ call void asm sideeffect "; use $0" , "s" (ptr addrspace (5 ) %alloca )
227+ ret void
228+ }
229+
230+ ; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_vgpr:
231+ ; GCN: v_lshr_b32_e64 [[FI:v[0-9]+]], s32, 6
232+ ; GCN-NEXT: ;;#ASMSTART
233+ ; GCN-NEXT: ; use [[FI]]
234+ define void @func_alloca_offset0__use_asm_vgpr () {
235+ %alloca = alloca i32 , addrspace (5 )
236+ call void asm sideeffect "; use $0" , "v" (ptr addrspace (5 ) %alloca )
237+ ret void
238+ }
239+
240+ ; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_phys_sgpr:
241+ ; GCN: s_lshr_b32 s8, s32, 6
242+ ; GCN-NEXT: ;;#ASMSTART
243+ ; GCN-NEXT: ; use s8
244+ define void @func_alloca_offset0__use_asm_phys_sgpr () {
245+ %alloca = alloca i32 , addrspace (5 )
246+ call void asm sideeffect "; use $0" , "{s8}" (ptr addrspace (5 ) %alloca )
247+ ret void
248+ }
249+
250+ ; GCN-LABEL: {{^}}func_alloca_offset0__use_asm_phys_vgpr:
251+ ; GCN: v_lshr_b32_e64 v8, s32, 6
252+ ; GCN-NEXT: ;;#ASMSTART
253+ ; GCN-NEXT: ; use v8
254+ define void @func_alloca_offset0__use_asm_phys_vgpr () {
255+ %alloca = alloca i32 , addrspace (5 )
256+ call void asm sideeffect "; use $0" , "{v8}" (ptr addrspace (5 ) %alloca )
257+ ret void
258+ }
259+
260+ ; GCN-LABEL: {{^}}func_alloca_offset_use_asm_sgpr:
261+ ; GCN: v_lshr_b32_e64 [[FI0_TMP0:v[0-9]+]], s32, 6
262+ ; GCN-NEXT: v_add_i32_e32 [[FI0:v[0-9]+]], vcc, 16, [[FI0_TMP0]]
263+
264+ ; GCN: v_lshr_b32_e64 [[TMP:v[0-9]+]], s32, 6
265+ ; GCN-NEXT: s_movk_i32 vcc_lo, 0x4010
266+ ; GCN-NEXT: v_add_i32_e32 [[TMP]], vcc, vcc_lo, [[TMP]]
267+ ; GCN-NEXT: ;;#ASMSTART
268+ ; GCN: ; use [[TMP]]
269+ define void @func_alloca_offset_use_asm_sgpr () {
270+ %alloca0 = alloca [4096 x i32 ], align 16 , addrspace (5 )
271+ %alloca1 = alloca i32 , addrspace (5 )
272+ call void asm sideeffect "; use $0" , "s" (ptr addrspace (5 ) %alloca0 )
273+ call void asm sideeffect "; use $0" , "s" (ptr addrspace (5 ) %alloca1 )
274+ ret void
275+ }
276+
277+ ; FIXME: Shouldn't need to materialize constant
278+ ; GCN-LABEL: {{^}}func_alloca_offset_use_asm_vgpr:
279+ ; GCN: v_lshr_b32_e64 [[FI0_TMP:v[0-9]+]], s32, 6
280+ ; GCN-NEXT: v_add_i32_e32 [[FI0:v[0-9]+]], vcc, 16, [[FI0_TMP]]
281+ ; GCN-NEXT: ;;#ASMSTART
282+ ; GCN-NEXT: ; use [[FI0]]
283+ ; GCN-NEXT: ;;#ASMEND
284+
285+ ; GCN: v_lshr_b32_e64 [[FI1_TMP:v[0-9]+]], s32, 6
286+ ; GCN-NEXT: s_movk_i32 vcc_lo, 0x4010
287+ ; GCN-NEXT: v_add_i32_e32 [[FI1:v[0-9]+]], vcc, vcc_lo, [[FI1_TMP]]
288+ ; GCN-NEXT: ;;#ASMSTART
289+ ; GCN-NEXT: ; use [[FI1]]
290+ ; GCN-NEXT: ;;#ASMEND
291+ define void @func_alloca_offset_use_asm_vgpr () {
292+ %alloca0 = alloca [4096 x i32 ], align 16 , addrspace (5 )
293+ %alloca1 = alloca i32 , addrspace (5 )
294+ call void asm sideeffect "; use $0" , "s" (ptr addrspace (5 ) %alloca0 )
295+ call void asm sideeffect "; use $0" , "v" (ptr addrspace (5 ) %alloca1 )
296+ ret void
297+ }
298+
299+ ; FIXME: Using VGPR for SGPR input
300+ ; GCN-LABEL: {{^}}kernel_alloca_offset_use_asm_sgpr:
301+ ; GCN: v_mov_b32_e32 v0, 16
302+ ; GCN-NOT: v0
303+ ; GCN: ;;#ASMSTART
304+ ; GCN-NEXT: ; use v0
305+ ; GCN-NEXT: ;;#ASMEND
306+
307+ ; GCN: v_mov_b32_e32 v0, 0x4010
308+ ; GCN-NEXT: ;;#ASMSTART
309+ ; GCN-NEXT: ; use v0
310+ ; GCN-NEXT: ;;#ASMEND
311+ define amdgpu_kernel void @kernel_alloca_offset_use_asm_sgpr () {
312+ %alloca0 = alloca [4096 x i32 ], align 16 , addrspace (5 )
313+ %alloca1 = alloca i32 , addrspace (5 )
314+ call void asm sideeffect "; use $0" , "s" (ptr addrspace (5 ) %alloca0 )
315+ call void asm sideeffect "; use $0" , "s" (ptr addrspace (5 ) %alloca1 )
316+ ret void
317+ }
318+
319+ ; GCN-LABEL: {{^}}kernel_alloca_offset_use_asm_vgpr:
320+ ; GCN: v_mov_b32_e32 v0, 16
321+ ; GCN-NOT: v0
322+ ; GCN: ;;#ASMSTART
323+ ; GCN-NEXT: ; use v0
324+ ; GCN-NEXT: ;;#ASMEND
325+
326+ ; GCN: v_mov_b32_e32 v0, 0x4010
327+ ; GCN-NEXT: ;;#ASMSTART
328+ ; GCN-NEXT: ; use v0
329+ ; GCN-NEXT: ;;#ASMEND
330+ define amdgpu_kernel void @kernel_alloca_offset_use_asm_vgpr () {
331+ %alloca0 = alloca [4096 x i32 ], align 16 , addrspace (5 )
332+ %alloca1 = alloca i32 , addrspace (5 )
333+ call void asm sideeffect "; use $0" , "v" (ptr addrspace (5 ) %alloca0 )
334+ call void asm sideeffect "; use $0" , "v" (ptr addrspace (5 ) %alloca1 )
335+ ret void
336+ }
337+
193338declare void @llvm.lifetime.start.p5 (i64 , ptr addrspace (5 ) nocapture ) #1
194339declare void @llvm.lifetime.end.p5 (i64 , ptr addrspace (5 ) nocapture ) #1
195340
0 commit comments