Skip to content

Commit 4637bf0

Browse files
authored
[NFC][AMDGPU][GISel] Precommit GlobalISel specific tests for call instruction (#165898)
1 parent 9e341b3 commit 4637bf0

12 files changed

+3248
-1979
lines changed
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
4+
5+
; We've separated this file from call-args-inreg.ll since GlobalISel does not support the bfloat type.
6+
; Ideally, we should merge the two files once that support lands.
7+
8+
declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0
9+
declare hidden void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) #0
10+
11+
define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
12+
; GFX9-LABEL: test_call_external_void_func_bf16_inreg:
13+
; GFX9: ; %bb.0:
14+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15+
; GFX9-NEXT: s_mov_b32 s17, s33
16+
; GFX9-NEXT: s_mov_b32 s33, s32
17+
; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
18+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
19+
; GFX9-NEXT: s_mov_b64 exec, s[18:19]
20+
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
21+
; GFX9-NEXT: s_addk_i32 s32, 0x400
22+
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
23+
; GFX9-NEXT: s_getpc_b64 s[18:19]
24+
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4
25+
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12
26+
; GFX9-NEXT: s_mov_b32 s0, s16
27+
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
28+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
29+
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
30+
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
31+
; GFX9-NEXT: s_mov_b32 s32, s33
32+
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
33+
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
34+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
35+
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
36+
; GFX9-NEXT: s_mov_b32 s33, s4
37+
; GFX9-NEXT: s_waitcnt vmcnt(0)
38+
; GFX9-NEXT: s_setpc_b64 s[30:31]
39+
;
40+
; GFX11-LABEL: test_call_external_void_func_bf16_inreg:
41+
; GFX11: ; %bb.0:
42+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43+
; GFX11-NEXT: s_mov_b32 s1, s33
44+
; GFX11-NEXT: s_mov_b32 s33, s32
45+
; GFX11-NEXT: s_or_saveexec_b32 s2, -1
46+
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
47+
; GFX11-NEXT: s_mov_b32 exec_lo, s2
48+
; GFX11-NEXT: v_writelane_b32 v40, s1, 2
49+
; GFX11-NEXT: s_add_i32 s32, s32, 16
50+
; GFX11-NEXT: s_getpc_b64 s[2:3]
51+
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4
52+
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12
53+
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
54+
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
55+
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
56+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
57+
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
58+
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
59+
; GFX11-NEXT: s_mov_b32 s32, s33
60+
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
61+
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
62+
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
63+
; GFX11-NEXT: s_mov_b32 exec_lo, s1
64+
; GFX11-NEXT: s_mov_b32 s33, s0
65+
; GFX11-NEXT: s_waitcnt vmcnt(0)
66+
; GFX11-NEXT: s_setpc_b64 s[30:31]
67+
call void @external_void_func_bf16_inreg(bfloat inreg %arg)
68+
ret void
69+
}
70+
71+
define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) #0 {
72+
; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg:
73+
; GFX9: ; %bb.0:
74+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75+
; GFX9-NEXT: s_mov_b32 s17, s33
76+
; GFX9-NEXT: s_mov_b32 s33, s32
77+
; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
78+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
79+
; GFX9-NEXT: s_mov_b64 exec, s[18:19]
80+
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
81+
; GFX9-NEXT: s_addk_i32 s32, 0x400
82+
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
83+
; GFX9-NEXT: s_getpc_b64 s[18:19]
84+
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4
85+
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12
86+
; GFX9-NEXT: s_mov_b32 s0, s16
87+
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
88+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
89+
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
90+
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
91+
; GFX9-NEXT: s_mov_b32 s32, s33
92+
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
93+
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
94+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
95+
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
96+
; GFX9-NEXT: s_mov_b32 s33, s4
97+
; GFX9-NEXT: s_waitcnt vmcnt(0)
98+
; GFX9-NEXT: s_setpc_b64 s[30:31]
99+
;
100+
; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg:
101+
; GFX11: ; %bb.0:
102+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103+
; GFX11-NEXT: s_mov_b32 s1, s33
104+
; GFX11-NEXT: s_mov_b32 s33, s32
105+
; GFX11-NEXT: s_or_saveexec_b32 s2, -1
106+
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
107+
; GFX11-NEXT: s_mov_b32 exec_lo, s2
108+
; GFX11-NEXT: v_writelane_b32 v40, s1, 2
109+
; GFX11-NEXT: s_add_i32 s32, s32, 16
110+
; GFX11-NEXT: s_getpc_b64 s[2:3]
111+
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4
112+
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12
113+
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
114+
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
115+
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
116+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
117+
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
118+
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
119+
; GFX11-NEXT: s_mov_b32 s32, s33
120+
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
121+
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
122+
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
123+
; GFX11-NEXT: s_mov_b32 exec_lo, s1
124+
; GFX11-NEXT: s_mov_b32 s33, s0
125+
; GFX11-NEXT: s_waitcnt vmcnt(0)
126+
; GFX11-NEXT: s_setpc_b64 s[30:31]
127+
call void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
128+
ret void
129+
}
130+

0 commit comments

Comments
 (0)