Skip to content

Commit f6f2755

Browse files
committed
[NFC][AMDGPU][GISel] Precommit GlobalISel specific tests for call instruction
1 parent 22079e3 commit f6f2755

File tree

1 file changed

+398
-0
lines changed
  • llvm/test/CodeGen/AMDGPU/GlobalISel

1 file changed

+398
-0
lines changed
Lines changed: 398 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,398 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
3+
4+
declare hidden void @external_void_func_void() #0
5+
declare hidden void @external_void_func_i32(i32) #0
6+
declare hidden void @external_void_func_i32_inreg(i32 inreg) #0
7+
declare hidden i32 @external_i32_func_void() #0
8+
9+
declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
10+
11+
declare hidden amdgpu_gfx void @external_gfx_void_func_void() #0
12+
declare hidden amdgpu_gfx void @external_gfx_void_func_i32(i32) #0
13+
declare hidden amdgpu_gfx i32 @external_gfx_i32_func_void() #0
14+
declare hidden amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg) #0
15+
declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 }) #0
16+
declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg) #0
17+
18+
define amdgpu_kernel void @test_call_external_void_func_void() #0 {
19+
; GFX9-LABEL: test_call_external_void_func_void:
20+
; GFX9: ; %bb.0:
21+
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
22+
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
23+
; GFX9-NEXT: s_add_u32 s0, s0, s17
24+
; GFX9-NEXT: s_addc_u32 s1, s1, 0
25+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
26+
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
27+
; GFX9-NEXT: s_mov_b32 s13, s15
28+
; GFX9-NEXT: s_mov_b32 s12, s14
29+
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
30+
; GFX9-NEXT: s_getpc_b64 s[18:19]
31+
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_void@rel32@lo+4
32+
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_void@rel32@hi+12
33+
; GFX9-NEXT: s_mov_b32 s14, s16
34+
; GFX9-NEXT: s_mov_b32 s32, 0
35+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
36+
; GFX9-NEXT: s_endpgm
37+
call void @external_void_func_void()
38+
ret void
39+
}
40+
41+
define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
42+
; GFX9-LABEL: test_call_external_void_func_i32_imm:
43+
; GFX9: ; %bb.0:
44+
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
45+
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
46+
; GFX9-NEXT: s_add_u32 s0, s0, s17
47+
; GFX9-NEXT: s_addc_u32 s1, s1, 0
48+
; GFX9-NEXT: s_add_u32 s8, s8, 8
49+
; GFX9-NEXT: s_addc_u32 s9, s9, 0
50+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
51+
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
52+
; GFX9-NEXT: s_mov_b32 s13, s15
53+
; GFX9-NEXT: s_mov_b32 s12, s14
54+
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
55+
; GFX9-NEXT: s_getpc_b64 s[18:19]
56+
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i32@rel32@lo+4
57+
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i32@rel32@hi+12
58+
; GFX9-NEXT: v_mov_b32_e32 v0, 42
59+
; GFX9-NEXT: s_mov_b32 s14, s16
60+
; GFX9-NEXT: s_mov_b32 s32, 0
61+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
62+
; GFX9-NEXT: s_endpgm
63+
call void @external_void_func_i32(i32 42)
64+
ret void
65+
}
66+
67+
define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
68+
; GFX9-LABEL: test_call_external_i32_func_void:
69+
; GFX9: ; %bb.0:
70+
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
71+
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
72+
; GFX9-NEXT: s_add_u32 s0, s0, s17
73+
; GFX9-NEXT: s_addc_u32 s1, s1, 0
74+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
75+
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
76+
; GFX9-NEXT: s_mov_b32 s13, s15
77+
; GFX9-NEXT: s_mov_b32 s12, s14
78+
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
79+
; GFX9-NEXT: s_getpc_b64 s[18:19]
80+
; GFX9-NEXT: s_add_u32 s18, s18, external_i32_func_void@rel32@lo+4
81+
; GFX9-NEXT: s_addc_u32 s19, s19, external_i32_func_void@rel32@hi+12
82+
; GFX9-NEXT: s_mov_b32 s14, s16
83+
; GFX9-NEXT: s_mov_b32 s32, 0
84+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
85+
; GFX9-NEXT: global_store_dword v[0:1], v0, off
86+
; GFX9-NEXT: s_waitcnt vmcnt(0)
87+
; GFX9-NEXT: s_endpgm
88+
%val = call i32 @external_i32_func_void()
89+
store volatile i32 %val, ptr addrspace(1) poison
90+
ret void
91+
}
92+
93+
define amdgpu_kernel void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
94+
; GFX9-LABEL: test_call_external_void_func_i32_inreg:
95+
; GFX9: ; %bb.0:
96+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97+
; GFX9-NEXT: s_mov_b32 s17, s33
98+
; GFX9-NEXT: s_mov_b32 s33, s32
99+
; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
100+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
101+
; GFX9-NEXT: s_mov_b64 exec, s[18:19]
102+
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
103+
; GFX9-NEXT: s_addk_i32 s32, 0x400
104+
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
105+
; GFX9-NEXT: s_getpc_b64 s[18:19]
106+
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i32_inreg@rel32@lo+4
107+
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i32_inreg@rel32@hi+12
108+
; GFX9-NEXT: s_mov_b32 s0, s16
109+
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
110+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
111+
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
112+
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
113+
; GFX9-NEXT: s_mov_b32 s32, s33
114+
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
115+
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
116+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
117+
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
118+
; GFX9-NEXT: s_mov_b32 s33, s4
119+
; GFX9-NEXT: s_waitcnt vmcnt(0)
120+
; GFX9-NEXT: s_setpc_b64 s[30:31]
121+
call void @external_void_func_i32_inreg(i32 inreg %arg)
122+
ret void
123+
}
124+
125+
define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
126+
; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
127+
; GFX9: ; %bb.0:
128+
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
129+
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
130+
; GFX9-NEXT: s_add_u32 s0, s0, s17
131+
; GFX9-NEXT: s_addc_u32 s1, s1, 0
132+
; GFX9-NEXT: v_mov_b32_e32 v3, 3
133+
; GFX9-NEXT: buffer_store_byte v3, off, s[0:3], 0
134+
; GFX9-NEXT: v_mov_b32_e32 v3, 8
135+
; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4
136+
; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], 0
137+
; GFX9-NEXT: s_nop 0
138+
; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:4
139+
; GFX9-NEXT: s_add_u32 s8, s8, 8
140+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
141+
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
142+
; GFX9-NEXT: s_addc_u32 s9, s9, 0
143+
; GFX9-NEXT: s_movk_i32 s32, 0x800
144+
; GFX9-NEXT: s_mov_b32 s13, s15
145+
; GFX9-NEXT: s_mov_b32 s12, s14
146+
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
147+
; GFX9-NEXT: s_getpc_b64 s[18:19]
148+
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
149+
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
150+
; GFX9-NEXT: v_mov_b32_e32 v0, 8
151+
; GFX9-NEXT: s_mov_b32 s14, s16
152+
; GFX9-NEXT: s_waitcnt vmcnt(1)
153+
; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s32
154+
; GFX9-NEXT: s_waitcnt vmcnt(1)
155+
; GFX9-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:4
156+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
157+
; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:8
158+
; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:12
159+
; GFX9-NEXT: s_waitcnt vmcnt(0)
160+
; GFX9-NEXT: global_store_byte v[0:1], v0, off
161+
; GFX9-NEXT: s_waitcnt vmcnt(0)
162+
; GFX9-NEXT: global_store_dword v[0:1], v1, off
163+
; GFX9-NEXT: s_waitcnt vmcnt(0)
164+
; GFX9-NEXT: s_endpgm
165+
%in.val = alloca { i8, i32 }, align 4, addrspace(5)
166+
%out.val = alloca { i8, i32 }, align 4, addrspace(5)
167+
%in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
168+
%in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1
169+
store i8 3, ptr addrspace(5) %in.gep0
170+
store i32 8, ptr addrspace(5) %in.gep1
171+
call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) %out.val, ptr addrspace(5) %in.val)
172+
%out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0
173+
%out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1
174+
%out.val0 = load i8, ptr addrspace(5) %out.gep0
175+
%out.val1 = load i32, ptr addrspace(5) %out.gep1
176+
store volatile i8 %out.val0, ptr addrspace(1) poison
177+
store volatile i32 %out.val1, ptr addrspace(1) poison
178+
ret void
179+
}
180+
181+
define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
182+
; GFX9-LABEL: test_gfx_call_external_void_func_void:
183+
; GFX9: ; %bb.0:
184+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185+
; GFX9-NEXT: s_mov_b32 s34, s33
186+
; GFX9-NEXT: s_mov_b32 s33, s32
187+
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
188+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
189+
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
190+
; GFX9-NEXT: v_writelane_b32 v40, s34, 2
191+
; GFX9-NEXT: s_addk_i32 s32, 0x400
192+
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
193+
; GFX9-NEXT: s_getpc_b64 s[34:35]
194+
; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_void@rel32@lo+4
195+
; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_void@rel32@hi+12
196+
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
197+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
198+
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
199+
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
200+
; GFX9-NEXT: s_mov_b32 s32, s33
201+
; GFX9-NEXT: v_readlane_b32 s34, v40, 2
202+
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
203+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
204+
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
205+
; GFX9-NEXT: s_mov_b32 s33, s34
206+
; GFX9-NEXT: s_waitcnt vmcnt(0)
207+
; GFX9-NEXT: s_setpc_b64 s[30:31]
208+
call amdgpu_gfx void @external_gfx_void_func_void()
209+
ret void
210+
}
211+
212+
define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
213+
; GFX9-LABEL: test_gfx_call_external_void_func_i32_imm:
214+
; GFX9: ; %bb.0:
215+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216+
; GFX9-NEXT: s_mov_b32 s34, s33
217+
; GFX9-NEXT: s_mov_b32 s33, s32
218+
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
219+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
220+
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
221+
; GFX9-NEXT: v_writelane_b32 v40, s34, 2
222+
; GFX9-NEXT: s_addk_i32 s32, 0x400
223+
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
224+
; GFX9-NEXT: s_getpc_b64 s[34:35]
225+
; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_i32@rel32@lo+4
226+
; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_i32@rel32@hi+12
227+
; GFX9-NEXT: v_mov_b32_e32 v0, 42
228+
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
229+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
230+
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
231+
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
232+
; GFX9-NEXT: s_mov_b32 s32, s33
233+
; GFX9-NEXT: v_readlane_b32 s34, v40, 2
234+
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
235+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
236+
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
237+
; GFX9-NEXT: s_mov_b32 s33, s34
238+
; GFX9-NEXT: s_waitcnt vmcnt(0)
239+
; GFX9-NEXT: s_setpc_b64 s[30:31]
240+
call amdgpu_gfx void @external_gfx_void_func_i32(i32 42)
241+
ret void
242+
}
243+
244+
define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
245+
; GFX9-LABEL: test_gfx_call_external_i32_func_void:
246+
; GFX9: ; %bb.0:
247+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248+
; GFX9-NEXT: s_mov_b32 s34, s33
249+
; GFX9-NEXT: s_mov_b32 s33, s32
250+
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
251+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
252+
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
253+
; GFX9-NEXT: v_writelane_b32 v40, s34, 2
254+
; GFX9-NEXT: s_addk_i32 s32, 0x400
255+
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
256+
; GFX9-NEXT: s_getpc_b64 s[34:35]
257+
; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_i32_func_void@rel32@lo+4
258+
; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_i32_func_void@rel32@hi+12
259+
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
260+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
261+
; GFX9-NEXT: global_store_dword v[0:1], v0, off
262+
; GFX9-NEXT: s_waitcnt vmcnt(0)
263+
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
264+
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
265+
; GFX9-NEXT: s_mov_b32 s32, s33
266+
; GFX9-NEXT: v_readlane_b32 s34, v40, 2
267+
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
268+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
269+
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
270+
; GFX9-NEXT: s_mov_b32 s33, s34
271+
; GFX9-NEXT: s_waitcnt vmcnt(0)
272+
; GFX9-NEXT: s_setpc_b64 s[30:31]
273+
%val = call amdgpu_gfx i32 @external_gfx_i32_func_void()
274+
store volatile i32 %val, ptr addrspace(1) poison
275+
ret void
276+
}
277+
278+
define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 {
279+
; GFX9-LABEL: test_gfx_call_external_void_func_i32_imm_inreg:
280+
; GFX9: ; %bb.0:
281+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282+
; GFX9-NEXT: s_mov_b32 s34, s33
283+
; GFX9-NEXT: s_mov_b32 s33, s32
284+
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
285+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
286+
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
287+
; GFX9-NEXT: v_writelane_b32 v40, s34, 3
288+
; GFX9-NEXT: s_addk_i32 s32, 0x400
289+
; GFX9-NEXT: v_writelane_b32 v40, s4, 0
290+
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
291+
; GFX9-NEXT: s_getpc_b64 s[34:35]
292+
; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_i32_inreg@rel32@lo+4
293+
; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_i32_inreg@rel32@hi+12
294+
; GFX9-NEXT: s_mov_b32 s4, 42
295+
; GFX9-NEXT: v_writelane_b32 v40, s31, 2
296+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
297+
; GFX9-NEXT: v_readlane_b32 s31, v40, 2
298+
; GFX9-NEXT: v_readlane_b32 s30, v40, 1
299+
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
300+
; GFX9-NEXT: s_mov_b32 s32, s33
301+
; GFX9-NEXT: v_readlane_b32 s34, v40, 3
302+
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
303+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
304+
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
305+
; GFX9-NEXT: s_mov_b32 s33, s34
306+
; GFX9-NEXT: s_waitcnt vmcnt(0)
307+
; GFX9-NEXT: s_setpc_b64 s[30:31]
308+
call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42)
309+
ret void
310+
}
311+
312+
define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
313+
; GFX9-LABEL: test_gfx_call_external_void_func_struct_i8_i32:
314+
; GFX9: ; %bb.0:
315+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
316+
; GFX9-NEXT: s_mov_b32 s34, s33
317+
; GFX9-NEXT: s_mov_b32 s33, s32
318+
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
319+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
320+
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
321+
; GFX9-NEXT: v_writelane_b32 v40, s34, 2
322+
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
323+
; GFX9-NEXT: v_mov_b32_e32 v2, 0
324+
; GFX9-NEXT: s_addk_i32 s32, 0x400
325+
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
326+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
327+
; GFX9-NEXT: global_load_ubyte v0, v2, s[34:35]
328+
; GFX9-NEXT: global_load_dword v1, v2, s[34:35] offset:4
329+
; GFX9-NEXT: s_getpc_b64 s[34:35]
330+
; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_struct_i8_i32@rel32@lo+4
331+
; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_struct_i8_i32@rel32@hi+12
332+
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
333+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
334+
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
335+
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
336+
; GFX9-NEXT: s_mov_b32 s32, s33
337+
; GFX9-NEXT: v_readlane_b32 s34, v40, 2
338+
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
339+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
340+
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
341+
; GFX9-NEXT: s_mov_b32 s33, s34
342+
; GFX9-NEXT: s_waitcnt vmcnt(0)
343+
; GFX9-NEXT: s_setpc_b64 s[30:31]
344+
%ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
345+
%val = load { i8, i32 }, ptr addrspace(1) %ptr0
346+
call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val)
347+
ret void
348+
}
349+
350+
define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 {
351+
; GFX9-LABEL: test_gfx_call_external_void_func_struct_i8_i32_inreg:
352+
; GFX9: ; %bb.0:
353+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354+
; GFX9-NEXT: s_mov_b32 s34, s33
355+
; GFX9-NEXT: s_mov_b32 s33, s32
356+
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
357+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
358+
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
359+
; GFX9-NEXT: v_writelane_b32 v40, s34, 4
360+
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0
361+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
362+
; GFX9-NEXT: v_writelane_b32 v40, s4, 0
363+
; GFX9-NEXT: s_addk_i32 s32, 0x400
364+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
365+
; GFX9-NEXT: global_load_ubyte v1, v0, s[34:35]
366+
; GFX9-NEXT: global_load_dword v2, v0, s[34:35] offset:4
367+
; GFX9-NEXT: v_writelane_b32 v40, s5, 1
368+
; GFX9-NEXT: v_writelane_b32 v40, s30, 2
369+
; GFX9-NEXT: s_getpc_b64 s[34:35]
370+
; GFX9-NEXT: s_add_u32 s34, s34, external_gfx_void_func_struct_i8_i32_inreg@rel32@lo+4
371+
; GFX9-NEXT: s_addc_u32 s35, s35, external_gfx_void_func_struct_i8_i32_inreg@rel32@hi+12
372+
; GFX9-NEXT: v_writelane_b32 v40, s31, 3
373+
; GFX9-NEXT: s_waitcnt vmcnt(1)
374+
; GFX9-NEXT: v_readfirstlane_b32 s4, v1
375+
; GFX9-NEXT: s_waitcnt vmcnt(0)
376+
; GFX9-NEXT: v_readfirstlane_b32 s5, v2
377+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
378+
; GFX9-NEXT: v_readlane_b32 s31, v40, 3
379+
; GFX9-NEXT: v_readlane_b32 s30, v40, 2
380+
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
381+
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
382+
; GFX9-NEXT: s_mov_b32 s32, s33
383+
; GFX9-NEXT: v_readlane_b32 s34, v40, 4
384+
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
385+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
386+
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
387+
; GFX9-NEXT: s_mov_b32 s33, s34
388+
; GFX9-NEXT: s_waitcnt vmcnt(0)
389+
; GFX9-NEXT: s_setpc_b64 s[30:31]
390+
%ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
391+
%val = load { i8, i32 }, ptr addrspace(1) %ptr0
392+
call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val)
393+
ret void
394+
}
395+
396+
attributes #0 = { nounwind }
397+
attributes #1 = { nounwind readnone }
398+
attributes #2 = { nounwind noinline }

0 commit comments

Comments
 (0)