1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s
3
- ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GCN %s
2
+ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GFX950 %s
3
+ ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefix=GFX950 %s
4
+ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250 %s
5
+ ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250 %s
4
6
5
7
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s
6
8
; RUN: not llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s
@@ -17,6 +19,18 @@ define { i32, i32 } @v_permlane16_swap_b32_vv(i32 %vdst_old, i32 %src0_old) {
17
19
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18
20
; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1
19
21
; GCN-NEXT: s_setpc_b64 s[30:31]
22
+ ; GFX950-LABEL: v_permlane16_swap_b32_vv:
23
+ ; GFX950: ; %bb.0:
24
+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25
+ ; GFX950-NEXT: v_permlane16_swap_b32_e32 v0, v1
26
+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
27
+ ;
28
+ ; GFX1250-LABEL: v_permlane16_swap_b32_vv:
29
+ ; GFX1250: ; %bb.0:
30
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
31
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
32
+ ; GFX1250-NEXT: v_permlane16_swap_b32_e32 v0, v1
33
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
20
34
%v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 %src0_old , i1 false , i1 false )
21
35
ret { i32 , i32 } %v
22
36
}
@@ -29,6 +43,22 @@ define { i32, i32 } @v_permlane16_swap_b32_vi(i32 %vdst_old) {
29
43
; GCN-NEXT: s_nop 1
30
44
; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1
31
45
; GCN-NEXT: s_setpc_b64 s[30:31]
46
+ ; GFX950-LABEL: v_permlane16_swap_b32_vi:
47
+ ; GFX950: ; %bb.0:
48
+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49
+ ; GFX950-NEXT: v_mov_b32_e32 v1, 1
50
+ ; GFX950-NEXT: s_nop 1
51
+ ; GFX950-NEXT: v_permlane16_swap_b32_e32 v0, v1
52
+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
53
+ ;
54
+ ; GFX1250-LABEL: v_permlane16_swap_b32_vi:
55
+ ; GFX1250: ; %bb.0:
56
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
57
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
58
+ ; GFX1250-NEXT: v_mov_b32_e32 v1, 1
59
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
60
+ ; GFX1250-NEXT: v_permlane16_swap_b32_e32 v0, v1
61
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
32
62
%v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 1 , i1 false , i1 false )
33
63
ret { i32 , i32 } %v
34
64
}
@@ -41,6 +71,22 @@ define { i32, i32 } @v_permlane16_swap_b32_vl(i32 %vdst_old) {
41
71
; GCN-NEXT: s_nop 1
42
72
; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1
43
73
; GCN-NEXT: s_setpc_b64 s[30:31]
74
+ ; GFX950-LABEL: v_permlane16_swap_b32_vl:
75
+ ; GFX950: ; %bb.0:
76
+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77
+ ; GFX950-NEXT: v_mov_b32_e32 v1, 0xc1d1
78
+ ; GFX950-NEXT: s_nop 1
79
+ ; GFX950-NEXT: v_permlane16_swap_b32_e32 v0, v1
80
+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
81
+ ;
82
+ ; GFX1250-LABEL: v_permlane16_swap_b32_vl:
83
+ ; GFX1250: ; %bb.0:
84
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
85
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
86
+ ; GFX1250-NEXT: v_mov_b32_e32 v1, 0xc1d1
87
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
88
+ ; GFX1250-NEXT: v_permlane16_swap_b32_e32 v0, v1
89
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
44
90
%v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 49617 , i1 false , i1 false )
45
91
ret { i32 , i32 } %v
46
92
}
@@ -54,6 +100,23 @@ define { i32, i32 } @v_permlane16_swap_b32_iv(i32 %src0_old) {
54
100
; GCN-NEXT: s_nop 1
55
101
; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1
56
102
; GCN-NEXT: s_setpc_b64 s[30:31]
103
+ ; GFX950-LABEL: v_permlane16_swap_b32_iv:
104
+ ; GFX950: ; %bb.0:
105
+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106
+ ; GFX950-NEXT: v_mov_b32_e32 v1, v0
107
+ ; GFX950-NEXT: v_mov_b32_e32 v0, 1
108
+ ; GFX950-NEXT: s_nop 1
109
+ ; GFX950-NEXT: v_permlane16_swap_b32_e32 v0, v1
110
+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
111
+ ;
112
+ ; GFX1250-LABEL: v_permlane16_swap_b32_iv:
113
+ ; GFX1250: ; %bb.0:
114
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
115
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
116
+ ; GFX1250-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 1
117
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
118
+ ; GFX1250-NEXT: v_permlane16_swap_b32_e32 v0, v1
119
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
57
120
%v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 1 , i32 %src0_old , i1 false , i1 false )
58
121
ret { i32 , i32 } %v
59
122
}
@@ -67,6 +130,23 @@ define { i32, i32 } @v_permlane16_swap_b32_ss(i32 inreg %vdst_old, i32 inreg %sr
67
130
; GCN-NEXT: s_nop 1
68
131
; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1
69
132
; GCN-NEXT: s_setpc_b64 s[30:31]
133
+ ; GFX950-LABEL: v_permlane16_swap_b32_ss:
134
+ ; GFX950: ; %bb.0:
135
+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136
+ ; GFX950-NEXT: v_mov_b32_e32 v0, s0
137
+ ; GFX950-NEXT: v_mov_b32_e32 v1, s1
138
+ ; GFX950-NEXT: s_nop 1
139
+ ; GFX950-NEXT: v_permlane16_swap_b32_e32 v0, v1
140
+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
141
+ ;
142
+ ; GFX1250-LABEL: v_permlane16_swap_b32_ss:
143
+ ; GFX1250: ; %bb.0:
144
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
145
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
146
+ ; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
147
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
148
+ ; GFX1250-NEXT: v_permlane16_swap_b32_e32 v0, v1
149
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
70
150
%v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 %src0_old , i1 false , i1 false )
71
151
ret { i32 , i32 } %v
72
152
}
@@ -80,6 +160,23 @@ define { i32, i32 } @v_permlane16_swap_b32_sv(i32 inreg %vdst_old, i32 %src0_old
80
160
; GCN-NEXT: s_nop 1
81
161
; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1
82
162
; GCN-NEXT: s_setpc_b64 s[30:31]
163
+ ; GFX950-LABEL: v_permlane16_swap_b32_sv:
164
+ ; GFX950: ; %bb.0:
165
+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166
+ ; GFX950-NEXT: v_mov_b32_e32 v1, v0
167
+ ; GFX950-NEXT: v_mov_b32_e32 v0, s0
168
+ ; GFX950-NEXT: s_nop 1
169
+ ; GFX950-NEXT: v_permlane16_swap_b32_e32 v0, v1
170
+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
171
+ ;
172
+ ; GFX1250-LABEL: v_permlane16_swap_b32_sv:
173
+ ; GFX1250: ; %bb.0:
174
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
175
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
176
+ ; GFX1250-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s0
177
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
178
+ ; GFX1250-NEXT: v_permlane16_swap_b32_e32 v0, v1
179
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
83
180
%v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 %src0_old , i1 false , i1 false )
84
181
ret { i32 , i32 } %v
85
182
}
@@ -92,6 +189,22 @@ define { i32, i32 } @v_permlane16_swap_b32_vs(i32 %vdst_old, i32 inreg %src0_old
92
189
; GCN-NEXT: s_nop 1
93
190
; GCN-NEXT: v_permlane16_swap_b32_e32 v0, v1
94
191
; GCN-NEXT: s_setpc_b64 s[30:31]
192
+ ; GFX950-LABEL: v_permlane16_swap_b32_vs:
193
+ ; GFX950: ; %bb.0:
194
+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195
+ ; GFX950-NEXT: v_mov_b32_e32 v1, s0
196
+ ; GFX950-NEXT: s_nop 1
197
+ ; GFX950-NEXT: v_permlane16_swap_b32_e32 v0, v1
198
+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
199
+ ;
200
+ ; GFX1250-LABEL: v_permlane16_swap_b32_vs:
201
+ ; GFX1250: ; %bb.0:
202
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
203
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
204
+ ; GFX1250-NEXT: v_mov_b32_e32 v1, s0
205
+ ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
206
+ ; GFX1250-NEXT: v_permlane16_swap_b32_e32 v0, v1
207
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
95
208
%v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 %src0_old , i1 false , i1 false )
96
209
ret { i32 , i32 } %v
97
210
}
@@ -102,6 +215,18 @@ define { i32, i32 } @v_permlane16_swap_b32_vv_fi(i32 %vdst_old, i32 %src0_old) {
102
215
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103
216
; GCN-NEXT: v_permlane16_swap_b32_e64 v0, v1 fi:1
104
217
; GCN-NEXT: s_setpc_b64 s[30:31]
218
+ ; GFX950-LABEL: v_permlane16_swap_b32_vv_fi:
219
+ ; GFX950: ; %bb.0:
220
+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221
+ ; GFX950-NEXT: v_permlane16_swap_b32_e64 v0, v1 fi:1
222
+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
223
+ ;
224
+ ; GFX1250-LABEL: v_permlane16_swap_b32_vv_fi:
225
+ ; GFX1250: ; %bb.0:
226
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
227
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
228
+ ; GFX1250-NEXT: v_permlane16_swap_b32_e64 v0, v1 fi:1
229
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
105
230
%v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 %src0_old , i1 true , i1 false )
106
231
ret { i32 , i32 } %v
107
232
}
@@ -112,6 +237,18 @@ define { i32, i32 } @v_permlane16_swap_b32_vv_bc(i32 %vdst_old, i32 %src0_old) {
112
237
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113
238
; GCN-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1
114
239
; GCN-NEXT: s_setpc_b64 s[30:31]
240
+ ; GFX950-LABEL: v_permlane16_swap_b32_vv_bc:
241
+ ; GFX950: ; %bb.0:
242
+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243
+ ; GFX950-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1
244
+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
245
+ ;
246
+ ; GFX1250-LABEL: v_permlane16_swap_b32_vv_bc:
247
+ ; GFX1250: ; %bb.0:
248
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
249
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
250
+ ; GFX1250-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1
251
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
115
252
%v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 %src0_old , i1 false , i1 true )
116
253
ret { i32 , i32 } %v
117
254
}
@@ -122,6 +259,18 @@ define { i32, i32 } @v_permlane16_swap_b32_vv_fi_bc(i32 %vdst_old, i32 %src0_old
122
259
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123
260
; GCN-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1 fi:1
124
261
; GCN-NEXT: s_setpc_b64 s[30:31]
262
+ ; GFX950-LABEL: v_permlane16_swap_b32_vv_fi_bc:
263
+ ; GFX950: ; %bb.0:
264
+ ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
265
+ ; GFX950-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1 fi:1
266
+ ; GFX950-NEXT: s_setpc_b64 s[30:31]
267
+ ;
268
+ ; GFX1250-LABEL: v_permlane16_swap_b32_vv_fi_bc:
269
+ ; GFX1250: ; %bb.0:
270
+ ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
271
+ ; GFX1250-NEXT: s_wait_kmcnt 0x0
272
+ ; GFX1250-NEXT: v_permlane16_swap_b32_e64 v0, v1 bound_ctrl:1 fi:1
273
+ ; GFX1250-NEXT: s_set_pc_i64 s[30:31]
125
274
%v = call { i32 , i32 } @llvm.amdgcn.permlane16.swap (i32 %vdst_old , i32 %src0_old , i1 true , i1 true )
126
275
ret { i32 , i32 } %v
127
276
}
0 commit comments