1- ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2- ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
3- ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+ ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,FIJI %s
3+ ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,HAWAII %s
4+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
45
56; Test calls when called by other callable functions rather than
67; kernels.
78
89declare void @external_void_func_i32 (i32 ) #0
910
10- ; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm:
11- ; GCN: s_waitcnt
12-
1311; Spill CSR VGPR used for SGPR spilling
14- ; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
15- ; GCN-NEXT: s_mov_b32 s33, s32
16- ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
17- ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
18- ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
19- ; GCN-DAG: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 2
20- ; GCN-DAG: v_writelane_b32 v40, s30, 0
21- ; GCN-DAG: v_writelane_b32 v40, s31, 1
22-
23- ; GCN: s_swappc_b64
24-
25- ; GCN: v_readlane_b32 s30, v40, 0
26- ; GCN: v_readlane_b32 s31, v40, 1
27- ; GCN: s_mov_b32 s32, s33
28-
29- ; GCN-NEXT: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 2
30- ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
31- ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
32- ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
33- ; GCN-NEXT: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
34- ; GCN-NEXT: s_waitcnt vmcnt(0)
35- ; GCN-NEXT: s_setpc_b64 s[30:31]
3612define void @test_func_call_external_void_func_i32_imm () #0 {
13+ ; GCN-LABEL: test_func_call_external_void_func_i32_imm:
14+ ; GCN: ; %bb.0:
15+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16+ ; GCN-NEXT: s_mov_b32 s16, s33
17+ ; GCN-NEXT: s_mov_b32 s33, s32
18+ ; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1
19+ ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
20+ ; GCN-NEXT: s_mov_b64 exec, s[18:19]
21+ ; GCN-NEXT: v_writelane_b32 v40, s16, 2
22+ ; GCN-NEXT: v_writelane_b32 v40, s30, 0
23+ ; GCN-NEXT: s_addk_i32 s32, 0x400
24+ ; GCN-NEXT: v_writelane_b32 v40, s31, 1
25+ ; GCN-NEXT: s_getpc_b64 s[16:17]
26+ ; GCN-NEXT: s_add_u32 s16, s16, external_void_func_i32@gotpcrel32@lo+4
27+ ; GCN-NEXT: s_addc_u32 s17, s17, external_void_func_i32@gotpcrel32@hi+12
28+ ; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
29+ ; GCN-NEXT: v_mov_b32_e32 v0, 42
30+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
31+ ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
32+ ; GCN-NEXT: v_readlane_b32 s30, v40, 0
33+ ; GCN-NEXT: v_readlane_b32 s31, v40, 1
34+ ; GCN-NEXT: s_mov_b32 s32, s33
35+ ; GCN-NEXT: v_readlane_b32 s4, v40, 2
36+ ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
37+ ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
38+ ; GCN-NEXT: s_mov_b64 exec, s[6:7]
39+ ; GCN-NEXT: s_mov_b32 s33, s4
40+ ; GCN-NEXT: s_waitcnt vmcnt(0)
41+ ; GCN-NEXT: s_setpc_b64 s[30:31]
3742 call void @external_void_func_i32 (i32 42 )
3843 ret void
3944}
4045
41- ; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm_stack_use:
42- ; GCN: s_waitcnt
43- ; GCN: s_mov_b32 s33, s32
44- ; GCN-DAG: s_addk_i32 s32, 0x1400{{$}}
45- ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:
46- ; GCN: s_swappc_b64
47- ; GCN: s_setpc_b64
4846define void @test_func_call_external_void_func_i32_imm_stack_use () #0 {
47+ ; GCN-LABEL: test_func_call_external_void_func_i32_imm_stack_use:
48+ ; GCN: ; %bb.0:
49+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50+ ; GCN-NEXT: s_mov_b32 s16, s33
51+ ; GCN-NEXT: s_mov_b32 s33, s32
52+ ; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1
53+ ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
54+ ; GCN-NEXT: s_mov_b64 exec, s[18:19]
55+ ; GCN-NEXT: v_writelane_b32 v40, s16, 2
56+ ; GCN-NEXT: v_writelane_b32 v40, s30, 0
57+ ; GCN-NEXT: s_addk_i32 s32, 0x1400
58+ ; GCN-NEXT: v_writelane_b32 v40, s31, 1
59+ ; GCN-NEXT: s_getpc_b64 s[16:17]
60+ ; GCN-NEXT: s_add_u32 s16, s16, external_void_func_i32@gotpcrel32@lo+4
61+ ; GCN-NEXT: s_addc_u32 s17, s17, external_void_func_i32@gotpcrel32@hi+12
62+ ; GCN-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
63+ ; GCN-NEXT: v_mov_b32_e32 v0, 0
64+ ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33
65+ ; GCN-NEXT: s_waitcnt vmcnt(0)
66+ ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:64
67+ ; GCN-NEXT: s_waitcnt vmcnt(0)
68+ ; GCN-NEXT: v_mov_b32_e32 v0, 42
69+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
70+ ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
71+ ; GCN-NEXT: v_readlane_b32 s30, v40, 0
72+ ; GCN-NEXT: v_readlane_b32 s31, v40, 1
73+ ; GCN-NEXT: s_mov_b32 s32, s33
74+ ; GCN-NEXT: v_readlane_b32 s4, v40, 2
75+ ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
76+ ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
77+ ; GCN-NEXT: s_mov_b64 exec, s[6:7]
78+ ; GCN-NEXT: s_mov_b32 s33, s4
79+ ; GCN-NEXT: s_waitcnt vmcnt(0)
80+ ; GCN-NEXT: s_setpc_b64 s[30:31]
4981 %alloca = alloca [16 x i32 ], align 4 , addrspace (5 )
5082 %gep15 = getelementptr inbounds [16 x i32 ], ptr addrspace (5 ) %alloca , i32 0 , i32 16
5183 store volatile i32 0 , ptr addrspace (5 ) %alloca
@@ -57,3 +89,7 @@ define void @test_func_call_external_void_func_i32_imm_stack_use() #0 {
5789attributes #0 = { nounwind }
5890attributes #1 = { nounwind readnone }
5991attributes #2 = { nounwind noinline }
92+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
93+ ; FIJI: {{.*}}
94+ ; GFX9: {{.*}}
95+ ; HAWAII: {{.*}}
0 commit comments