11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2- ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s
3- ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s
2+ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GFX950 %s
3+ ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GFX950 %s
4+ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250 %s
5+ ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250 %s
46
57; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s
68; RUN: not llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s
@@ -17,6 +19,18 @@ define { i32, i32 } @v_permlane16_swap_b32_vv(i32 %vdst_old, i32 %src0_old) {
1719; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1820; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1
1921; GCN-NEXT: s_setpc_b64 s[30:31]
22+ ; GFX950-LABEL: v_permlane16_swap_b32_vv:
23+ ; GFX950: ; %bb.0:
24+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25+ ; GFX950-NEXT: v_permlane16_swap_b32_e32 v0, v1
26+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
27+ ;
28+ ; GFX1250-LABEL: v_permlane16_swap_b32_vv:
29+ ; GFX1250: ; %bb.0:
30+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
31+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
32+ ; GFX1250-NEXT: v_permlane16_swap_b32_e32 v0, v1
33+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
2034 %v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 %src0_old , i1 false , i1 false )
2135 ret { i32 , i32 } %v
2236}
@@ -29,6 +43,22 @@ define { i32, i32 } @v_permlane16_swap_b32_vi(i32 %vdst_old) {
2943; GCN-NEXT: s_nop 1
3044; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1
3145; GCN-NEXT: s_setpc_b64 s[30:31]
46+ ; GFX950-LABEL: v_permlane16_swap_b32_vi:
47+ ; GFX950: ; %bb.0:
48+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49+ ; GFX950-NEXT: v_mov_b32_e32 v1, 1
50+ ; GFX950-NEXT: s_nop 1
51+ ; GFX950-NEXT: v_permlane16_swap_b32_e32 v0, v1
52+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
53+ ;
54+ ; GFX1250-LABEL: v_permlane16_swap_b32_vi:
55+ ; GFX1250: ; %bb.0:
56+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
57+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
58+ ; GFX1250-NEXT: v_mov_b32_e32 v1, 1
59+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
60+ ; GFX1250-NEXT: v_permlane16_swap_b32_e32 v0, v1
61+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
3262 %v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 1 , i1 false , i1 false )
3363 ret { i32 , i32 } %v
3464}
@@ -41,6 +71,22 @@ define { i32, i32 } @v_permlane16_swap_b32_vl(i32 %vdst_old) {
4171; GCN-NEXT: s_nop 1
4272; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1
4373; GCN-NEXT: s_setpc_b64 s[30:31]
74+ ; GFX950-LABEL: v_permlane16_swap_b32_vl:
75+ ; GFX950: ; %bb.0:
76+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77+ ; GFX950-NEXT: v_mov_b32_e32 v1, 0xc1d1
78+ ; GFX950-NEXT: s_nop 1
79+ ; GFX950-NEXT: v_permlane16_swap_b32_e32 v0, v1
80+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
81+ ;
82+ ; GFX1250-LABEL: v_permlane16_swap_b32_vl:
83+ ; GFX1250: ; %bb.0:
84+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
85+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
86+ ; GFX1250-NEXT: v_mov_b32_e32 v1, 0xc1d1
87+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
88+ ; GFX1250-NEXT: v_permlane16_swap_b32_e32 v0, v1
89+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
4490 %v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 49617 , i1 false , i1 false )
4591 ret { i32 , i32 } %v
4692}
@@ -54,6 +100,23 @@ define { i32, i32 } @v_permlane16_swap_b32_iv(i32 %src0_old) {
54100; GCN-NEXT: s_nop 1
55101; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1
56102; GCN-NEXT: s_setpc_b64 s[30:31]
103+ ; GFX950-LABEL: v_permlane16_swap_b32_iv:
104+ ; GFX950: ; %bb.0:
105+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106+ ; GFX950-NEXT: v_mov_b32_e32 v1, v0
107+ ; GFX950-NEXT: v_mov_b32_e32 v0, 1
108+ ; GFX950-NEXT: s_nop 1
109+ ; GFX950-NEXT: v_permlane16_swap_b32_e32 v0, v1
110+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
111+ ;
112+ ; GFX1250-LABEL: v_permlane16_swap_b32_iv:
113+ ; GFX1250: ; %bb.0:
114+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
115+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
116+ ; GFX1250-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 1
117+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
118+ ; GFX1250-NEXT: v_permlane16_swap_b32_e32 v0, v1
119+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
57120 %v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 1 , i32 %src0_old , i1 false , i1 false )
58121 ret { i32 , i32 } %v
59122}
@@ -67,6 +130,23 @@ define { i32, i32 } @v_permlane16_swap_b32_ss(i32 inreg %vdst_old, i32 inreg %sr
67130; GCN-NEXT: s_nop 1
68131; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1
69132; GCN-NEXT: s_setpc_b64 s[30:31]
133+ ; GFX950-LABEL: v_permlane16_swap_b32_ss:
134+ ; GFX950: ; %bb.0:
135+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136+ ; GFX950-NEXT: v_mov_b32_e32 v0, s0
137+ ; GFX950-NEXT: v_mov_b32_e32 v1, s1
138+ ; GFX950-NEXT: s_nop 1
139+ ; GFX950-NEXT: v_permlane16_swap_b32_e32 v0, v1
140+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
141+ ;
142+ ; GFX1250-LABEL: v_permlane16_swap_b32_ss:
143+ ; GFX1250: ; %bb.0:
144+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
145+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
146+ ; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
147+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
148+ ; GFX1250-NEXT: v_permlane16_swap_b32_e32 v0, v1
149+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
70150 %v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 %src0_old , i1 false , i1 false )
71151 ret { i32 , i32 } %v
72152}
@@ -80,6 +160,23 @@ define { i32, i32 } @v_permlane16_swap_b32_sv(i32 inreg %vdst_old, i32 %src0_old
80160; GCN-NEXT: s_nop 1
81161; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1
82162; GCN-NEXT: s_setpc_b64 s[30:31]
163+ ; GFX950-LABEL: v_permlane16_swap_b32_sv:
164+ ; GFX950: ; %bb.0:
165+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166+ ; GFX950-NEXT: v_mov_b32_e32 v1, v0
167+ ; GFX950-NEXT: v_mov_b32_e32 v0, s0
168+ ; GFX950-NEXT: s_nop 1
169+ ; GFX950-NEXT: v_permlane16_swap_b32_e32 v0, v1
170+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
171+ ;
172+ ; GFX1250-LABEL: v_permlane16_swap_b32_sv:
173+ ; GFX1250: ; %bb.0:
174+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
175+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
176+ ; GFX1250-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s0
177+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
178+ ; GFX1250-NEXT: v_permlane16_swap_b32_e32 v0, v1
179+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
83180 %v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 %src0_old , i1 false , i1 false )
84181 ret { i32 , i32 } %v
85182}
@@ -92,6 +189,22 @@ define { i32, i32 } @v_permlane16_swap_b32_vs(i32 %vdst_old, i32 inreg %src0_old
92189; GCN-NEXT: s_nop 1
93190; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1
94191; GCN-NEXT: s_setpc_b64 s[30:31]
192+ ; GFX950-LABEL: v_permlane16_swap_b32_vs:
193+ ; GFX950: ; %bb.0:
194+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195+ ; GFX950-NEXT: v_mov_b32_e32 v1, s0
196+ ; GFX950-NEXT: s_nop 1
197+ ; GFX950-NEXT: v_permlane16_swap_b32_e32 v0, v1
198+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
199+ ;
200+ ; GFX1250-LABEL: v_permlane16_swap_b32_vs:
201+ ; GFX1250: ; %bb.0:
202+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
203+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
204+ ; GFX1250-NEXT: v_mov_b32_e32 v1, s0
205+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
206+ ; GFX1250-NEXT: v_permlane16_swap_b32_e32 v0, v1
207+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
95208 %v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 %src0_old , i1 false , i1 false )
96209 ret { i32 , i32 } %v
97210}
@@ -102,6 +215,18 @@ define { i32, i32 } @v_permlane16_swap_b32_vv_fi(i32 %vdst_old, i32 %src0_old) {
102215; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103216; GCN-NEXT: v_permlane16_swap_b32_e64 v0, v1 fi:1
104217; GCN-NEXT: s_setpc_b64 s[30:31]
218+ ; GFX950-LABEL: v_permlane16_swap_b32_vv_fi:
219+ ; GFX950: ; %bb.0:
220+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221+ ; GFX950-NEXT: v_permlane16_swap_b32_e64 v0, v1 fi:1
222+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
223+ ;
224+ ; GFX1250-LABEL: v_permlane16_swap_b32_vv_fi:
225+ ; GFX1250: ; %bb.0:
226+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
227+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
228+ ; GFX1250-NEXT: v_permlane16_swap_b32_e64 v0, v1 fi:1
229+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
105230 %v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 %src0_old , i1 true , i1 false )
106231 ret { i32 , i32 } %v
107232}
@@ -112,6 +237,18 @@ define { i32, i32 } @v_permlane16_swap_b32_vv_bc(i32 %vdst_old, i32 %src0_old) {
112237; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113238; GCN-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1
114239; GCN-NEXT: s_setpc_b64 s[30:31]
240+ ; GFX950-LABEL: v_permlane16_swap_b32_vv_bc:
241+ ; GFX950: ; %bb.0:
242+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243+ ; GFX950-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1
244+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
245+ ;
246+ ; GFX1250-LABEL: v_permlane16_swap_b32_vv_bc:
247+ ; GFX1250: ; %bb.0:
248+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
249+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
250+ ; GFX1250-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1
251+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
115252 %v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 %src0_old , i1 false , i1 true )
116253 ret { i32 , i32 } %v
117254}
@@ -122,6 +259,18 @@ define { i32, i32 } @v_permlane16_swap_b32_vv_fi_bc(i32 %vdst_old, i32 %src0_old
122259; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123260; GCN-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1 fi:1
124261; GCN-NEXT: s_setpc_b64 s[30:31]
262+ ; GFX950-LABEL: v_permlane16_swap_b32_vv_fi_bc:
263+ ; GFX950: ; %bb.0:
264+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
265+ ; GFX950-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1 fi:1
266+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
267+ ;
268+ ; GFX1250-LABEL: v_permlane16_swap_b32_vv_fi_bc:
269+ ; GFX1250: ; %bb.0:
270+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
271+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
272+ ; GFX1250-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1 fi:1
273+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
125274 %v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 %src0_old , i1 true , i1 true )
126275 ret { i32 , i32 } %v
127276}
0 commit comments