Skip to content

Commit 13fe072

Browse files
committed
[NFC][AMDGPU]GISel] Add RUN lines to call tests
1 parent f6f2755 commit 13fe072

13 files changed

+3243
-1905
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/call.ll

Lines changed: 0 additions & 398 deletions
This file was deleted.
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
4+
5+
declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0
6+
declare hidden void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) #0
7+
8+
define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
9+
; GFX9-LABEL: test_call_external_void_func_bf16_inreg:
10+
; GFX9: ; %bb.0:
11+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12+
; GFX9-NEXT: s_mov_b32 s17, s33
13+
; GFX9-NEXT: s_mov_b32 s33, s32
14+
; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
15+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
16+
; GFX9-NEXT: s_mov_b64 exec, s[18:19]
17+
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
18+
; GFX9-NEXT: s_addk_i32 s32, 0x400
19+
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
20+
; GFX9-NEXT: s_getpc_b64 s[18:19]
21+
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4
22+
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12
23+
; GFX9-NEXT: s_mov_b32 s0, s16
24+
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
25+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
26+
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
27+
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
28+
; GFX9-NEXT: s_mov_b32 s32, s33
29+
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
30+
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
31+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
32+
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
33+
; GFX9-NEXT: s_mov_b32 s33, s4
34+
; GFX9-NEXT: s_waitcnt vmcnt(0)
35+
; GFX9-NEXT: s_setpc_b64 s[30:31]
36+
;
37+
; GFX11-LABEL: test_call_external_void_func_bf16_inreg:
38+
; GFX11: ; %bb.0:
39+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40+
; GFX11-NEXT: s_mov_b32 s1, s33
41+
; GFX11-NEXT: s_mov_b32 s33, s32
42+
; GFX11-NEXT: s_or_saveexec_b32 s2, -1
43+
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
44+
; GFX11-NEXT: s_mov_b32 exec_lo, s2
45+
; GFX11-NEXT: v_writelane_b32 v40, s1, 2
46+
; GFX11-NEXT: s_add_i32 s32, s32, 16
47+
; GFX11-NEXT: s_getpc_b64 s[2:3]
48+
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4
49+
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12
50+
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
51+
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
52+
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
53+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
54+
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
55+
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
56+
; GFX11-NEXT: s_mov_b32 s32, s33
57+
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
58+
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
59+
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
60+
; GFX11-NEXT: s_mov_b32 exec_lo, s1
61+
; GFX11-NEXT: s_mov_b32 s33, s0
62+
; GFX11-NEXT: s_waitcnt vmcnt(0)
63+
; GFX11-NEXT: s_setpc_b64 s[30:31]
64+
call void @external_void_func_bf16_inreg(bfloat inreg %arg)
65+
ret void
66+
}
67+
68+
define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) #0 {
69+
; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg:
70+
; GFX9: ; %bb.0:
71+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72+
; GFX9-NEXT: s_mov_b32 s17, s33
73+
; GFX9-NEXT: s_mov_b32 s33, s32
74+
; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
75+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
76+
; GFX9-NEXT: s_mov_b64 exec, s[18:19]
77+
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
78+
; GFX9-NEXT: s_addk_i32 s32, 0x400
79+
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
80+
; GFX9-NEXT: s_getpc_b64 s[18:19]
81+
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4
82+
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12
83+
; GFX9-NEXT: s_mov_b32 s0, s16
84+
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
85+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
86+
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
87+
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
88+
; GFX9-NEXT: s_mov_b32 s32, s33
89+
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
90+
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
91+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
92+
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
93+
; GFX9-NEXT: s_mov_b32 s33, s4
94+
; GFX9-NEXT: s_waitcnt vmcnt(0)
95+
; GFX9-NEXT: s_setpc_b64 s[30:31]
96+
;
97+
; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg:
98+
; GFX11: ; %bb.0:
99+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100+
; GFX11-NEXT: s_mov_b32 s1, s33
101+
; GFX11-NEXT: s_mov_b32 s33, s32
102+
; GFX11-NEXT: s_or_saveexec_b32 s2, -1
103+
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
104+
; GFX11-NEXT: s_mov_b32 exec_lo, s2
105+
; GFX11-NEXT: v_writelane_b32 v40, s1, 2
106+
; GFX11-NEXT: s_add_i32 s32, s32, 16
107+
; GFX11-NEXT: s_getpc_b64 s[2:3]
108+
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4
109+
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12
110+
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
111+
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
112+
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
113+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
114+
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
115+
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
116+
; GFX11-NEXT: s_mov_b32 s32, s33
117+
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
118+
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
119+
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
120+
; GFX11-NEXT: s_mov_b32 exec_lo, s1
121+
; GFX11-NEXT: s_mov_b32 s33, s0
122+
; GFX11-NEXT: s_waitcnt vmcnt(0)
123+
; GFX11-NEXT: s_setpc_b64 s[30:31]
124+
call void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
125+
ret void
126+
}
127+

llvm/test/CodeGen/AMDGPU/call-args-inreg.ll

Lines changed: 7 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
3-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,SDAG %s
3+
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,SDAG %s
4+
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GISEL %s
5+
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GISEL %s
46

57
declare hidden void @external_void_func_i8_inreg(i8 inreg) #0
68
declare hidden void @external_void_func_i16_inreg(i32 inreg) #0
@@ -12,11 +14,9 @@ declare hidden void @external_void_func_v4i32_inreg(<4 x i32> inreg) #0
1214
declare hidden void @external_void_func_v8i32_inreg(<8 x i32> inreg) #0
1315
declare hidden void @external_void_func_v16i32_inreg(<16 x i32> inreg) #0
1416
declare hidden void @external_void_func_f16_inreg(half inreg) #0
15-
declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0
1617
declare hidden void @external_void_func_f32_inreg(float inreg) #0
1718
declare hidden void @external_void_func_f64_inreg(double inreg) #0
1819
declare hidden void @external_void_func_v2f16_inreg(<2 x half> inreg) #0
19-
declare hidden void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) #0
2020
declare hidden void @external_void_func_v3f16_inreg(<3 x half> inreg) #0
2121
declare hidden void @external_void_func_v4f16_inreg(<4 x half> inreg) #0
2222

@@ -585,66 +585,6 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
585585
ret void
586586
}
587587

588-
define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
589-
; GFX9-LABEL: test_call_external_void_func_bf16_inreg:
590-
; GFX9: ; %bb.0:
591-
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
592-
; GFX9-NEXT: s_mov_b32 s17, s33
593-
; GFX9-NEXT: s_mov_b32 s33, s32
594-
; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
595-
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
596-
; GFX9-NEXT: s_mov_b64 exec, s[18:19]
597-
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
598-
; GFX9-NEXT: s_addk_i32 s32, 0x400
599-
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
600-
; GFX9-NEXT: s_getpc_b64 s[18:19]
601-
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4
602-
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12
603-
; GFX9-NEXT: s_mov_b32 s0, s16
604-
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
605-
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
606-
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
607-
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
608-
; GFX9-NEXT: s_mov_b32 s32, s33
609-
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
610-
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
611-
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
612-
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
613-
; GFX9-NEXT: s_mov_b32 s33, s4
614-
; GFX9-NEXT: s_waitcnt vmcnt(0)
615-
; GFX9-NEXT: s_setpc_b64 s[30:31]
616-
;
617-
; GFX11-LABEL: test_call_external_void_func_bf16_inreg:
618-
; GFX11: ; %bb.0:
619-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620-
; GFX11-NEXT: s_mov_b32 s1, s33
621-
; GFX11-NEXT: s_mov_b32 s33, s32
622-
; GFX11-NEXT: s_or_saveexec_b32 s2, -1
623-
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
624-
; GFX11-NEXT: s_mov_b32 exec_lo, s2
625-
; GFX11-NEXT: v_writelane_b32 v40, s1, 2
626-
; GFX11-NEXT: s_add_i32 s32, s32, 16
627-
; GFX11-NEXT: s_getpc_b64 s[2:3]
628-
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4
629-
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12
630-
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
631-
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
632-
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
633-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
634-
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
635-
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
636-
; GFX11-NEXT: s_mov_b32 s32, s33
637-
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
638-
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
639-
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
640-
; GFX11-NEXT: s_mov_b32 exec_lo, s1
641-
; GFX11-NEXT: s_mov_b32 s33, s0
642-
; GFX11-NEXT: s_waitcnt vmcnt(0)
643-
; GFX11-NEXT: s_setpc_b64 s[30:31]
644-
call void @external_void_func_bf16_inreg(bfloat inreg %arg)
645-
ret void
646-
}
647-
648588
define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
649589
; GFX9-LABEL: test_call_external_void_func_f32_inreg:
650590
; GFX9: ; %bb.0:
@@ -826,67 +766,6 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0
826766
ret void
827767
}
828768

829-
830-
define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) #0 {
831-
; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg:
832-
; GFX9: ; %bb.0:
833-
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
834-
; GFX9-NEXT: s_mov_b32 s17, s33
835-
; GFX9-NEXT: s_mov_b32 s33, s32
836-
; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
837-
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
838-
; GFX9-NEXT: s_mov_b64 exec, s[18:19]
839-
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
840-
; GFX9-NEXT: s_addk_i32 s32, 0x400
841-
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
842-
; GFX9-NEXT: s_getpc_b64 s[18:19]
843-
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4
844-
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12
845-
; GFX9-NEXT: s_mov_b32 s0, s16
846-
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
847-
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
848-
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
849-
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
850-
; GFX9-NEXT: s_mov_b32 s32, s33
851-
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
852-
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
853-
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
854-
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
855-
; GFX9-NEXT: s_mov_b32 s33, s4
856-
; GFX9-NEXT: s_waitcnt vmcnt(0)
857-
; GFX9-NEXT: s_setpc_b64 s[30:31]
858-
;
859-
; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg:
860-
; GFX11: ; %bb.0:
861-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
862-
; GFX11-NEXT: s_mov_b32 s1, s33
863-
; GFX11-NEXT: s_mov_b32 s33, s32
864-
; GFX11-NEXT: s_or_saveexec_b32 s2, -1
865-
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
866-
; GFX11-NEXT: s_mov_b32 exec_lo, s2
867-
; GFX11-NEXT: v_writelane_b32 v40, s1, 2
868-
; GFX11-NEXT: s_add_i32 s32, s32, 16
869-
; GFX11-NEXT: s_getpc_b64 s[2:3]
870-
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4
871-
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12
872-
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
873-
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
874-
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
875-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
876-
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
877-
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
878-
; GFX11-NEXT: s_mov_b32 s32, s33
879-
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
880-
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
881-
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
882-
; GFX11-NEXT: s_mov_b32 exec_lo, s1
883-
; GFX11-NEXT: s_mov_b32 s33, s0
884-
; GFX11-NEXT: s_waitcnt vmcnt(0)
885-
; GFX11-NEXT: s_setpc_b64 s[30:31]
886-
call void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
887-
ret void
888-
}
889-
890769
define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 {
891770
; GFX9-LABEL: test_call_external_void_func_v3f16_inreg:
892771
; GFX9: ; %bb.0:
@@ -1529,3 +1408,6 @@ define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inre
15291408
attributes #0 = { nounwind }
15301409
attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
15311410

1411+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1412+
; GISEL: {{.*}}
1413+
; SDAG: {{.*}}

0 commit comments

Comments
 (0)