1- ;RUN: llc < %s -mtriple=amdgcn -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
2- ;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=FUNC %s
3- ;RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
4-
5- ; FUNC-LABEL: {{^}}test_select_v2i32:
6-
7- ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Z
8- ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Y
9-
10- ; VI: s_cmp_gt_i32
11- ; VI: s_cselect_b32
12- ; VI: s_cmp_gt_i32
13- ; VI: s_cselect_b32
14-
15- ; SI-DAG: s_cmp_gt_i32
16- ; SI-DAG: s_cselect_b32
17- ; SI-DAG: s_cmp_gt_i32
18- ; SI-DAG: s_cselect_b32
1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+ ;RUN: llc < %s -mtriple=amdgcn -verify-machineinstrs | FileCheck --check-prefixes=SI %s
3+ ;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=VI %s
4+ ;RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck --check-prefixes=EG %s
195
206define amdgpu_kernel void @test_select_v2i32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in0 , ptr addrspace (1 ) %in1 , <2 x i32 > %val ) {
7+ ; SI-LABEL: test_select_v2i32:
8+ ; SI: ; %bb.0: ; %entry
9+ ; SI-NEXT: s_load_dwordx8 s[0:7], s[2:3], 0x9
10+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
11+ ; SI-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x0
12+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
13+ ; SI-NEXT: s_mov_b32 s3, 0xf000
14+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
15+ ; SI-NEXT: s_cmp_gt_i32 s9, s5
16+ ; SI-NEXT: s_cselect_b32 s5, s7, s9
17+ ; SI-NEXT: s_cmp_gt_i32 s8, s4
18+ ; SI-NEXT: s_cselect_b32 s4, s6, s8
19+ ; SI-NEXT: s_mov_b32 s2, -1
20+ ; SI-NEXT: v_mov_b32_e32 v1, s5
21+ ; SI-NEXT: v_mov_b32_e32 v0, s4
22+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
23+ ; SI-NEXT: s_endpgm
24+ ;
25+ ; VI-LABEL: test_select_v2i32:
26+ ; VI: ; %bb.0: ; %entry
27+ ; VI-NEXT: s_load_dwordx8 s[0:7], s[2:3], 0x24
28+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
29+ ; VI-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x0
30+ ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
31+ ; VI-NEXT: s_mov_b32 s3, 0xf000
32+ ; VI-NEXT: s_mov_b32 s2, -1
33+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
34+ ; VI-NEXT: s_cmp_gt_i32 s9, s5
35+ ; VI-NEXT: s_cselect_b32 s5, s7, s9
36+ ; VI-NEXT: s_cmp_gt_i32 s8, s4
37+ ; VI-NEXT: s_cselect_b32 s4, s6, s8
38+ ; VI-NEXT: v_mov_b32_e32 v0, s4
39+ ; VI-NEXT: v_mov_b32_e32 v1, s5
40+ ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
41+ ; VI-NEXT: s_endpgm
42+ ;
43+ ; EG-LABEL: test_select_v2i32:
44+ ; EG: ; %bb.0: ; %entry
45+ ; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
46+ ; EG-NEXT: TEX 1 @6
47+ ; EG-NEXT: ALU 5, @12, KC0[CB0:0-32], KC1[]
48+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
49+ ; EG-NEXT: CF_END
50+ ; EG-NEXT: PAD
51+ ; EG-NEXT: Fetch clause starting at 6:
52+ ; EG-NEXT: VTX_READ_64 T1.XY, T1.X, 0, #1
53+ ; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
54+ ; EG-NEXT: ALU clause starting at 10:
55+ ; EG-NEXT: MOV T0.X, KC0[2].Z,
56+ ; EG-NEXT: MOV * T1.X, KC0[2].W,
57+ ; EG-NEXT: ALU clause starting at 12:
58+ ; EG-NEXT: SETGT_INT * T0.W, T0.Y, T1.Y,
59+ ; EG-NEXT: CNDE_INT T0.Y, PV.W, T0.Y, KC0[3].Z,
60+ ; EG-NEXT: SETGT_INT * T0.W, T0.X, T1.X,
61+ ; EG-NEXT: CNDE_INT T0.X, PV.W, T0.X, KC0[3].Y,
62+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
63+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
2164entry:
2265 %load0 = load <2 x i32 >, ptr addrspace (1 ) %in0
2366 %load1 = load <2 x i32 >, ptr addrspace (1 ) %in1
@@ -27,17 +70,72 @@ entry:
2770 ret void
2871}
2972
30- ; FUNC-LABEL: {{^}}test_select_v2f32:
31-
32- ; EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33- ; EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
34-
35- ; SI: v_cmp_neq_f32_e32 vcc
36- ; SI: v_cndmask_b32_e32
37- ; SI: v_cmp_neq_f32_e32 vcc
38- ; SI: v_cndmask_b32_e32
39-
4073define amdgpu_kernel void @test_select_v2f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in0 , ptr addrspace (1 ) %in1 ) {
74+ ; SI-LABEL: test_select_v2f32:
75+ ; SI: ; %bb.0: ; %entry
76+ ; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
77+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xd
78+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
79+ ; SI-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
80+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
81+ ; SI-NEXT: s_mov_b32 s7, 0xf000
82+ ; SI-NEXT: s_mov_b32 s6, -1
83+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
84+ ; SI-NEXT: v_mov_b32_e32 v0, s0
85+ ; SI-NEXT: v_mov_b32_e32 v1, s1
86+ ; SI-NEXT: v_mov_b32_e32 v2, s3
87+ ; SI-NEXT: v_cmp_neq_f32_e32 vcc, s3, v1
88+ ; SI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
89+ ; SI-NEXT: v_mov_b32_e32 v2, s2
90+ ; SI-NEXT: v_cmp_neq_f32_e32 vcc, s2, v0
91+ ; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
92+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
93+ ; SI-NEXT: s_endpgm
94+ ;
95+ ; VI-LABEL: test_select_v2f32:
96+ ; VI: ; %bb.0: ; %entry
97+ ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x34
98+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
99+ ; VI-NEXT: s_mov_b32 s7, 0xf000
100+ ; VI-NEXT: s_mov_b32 s6, -1
101+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
102+ ; VI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
103+ ; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
104+ ; VI-NEXT: s_mov_b32 s4, s0
105+ ; VI-NEXT: s_mov_b32 s5, s1
106+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
107+ ; VI-NEXT: v_mov_b32_e32 v1, s9
108+ ; VI-NEXT: v_mov_b32_e32 v0, s8
109+ ; VI-NEXT: v_mov_b32_e32 v2, s3
110+ ; VI-NEXT: v_cmp_neq_f32_e32 vcc, s3, v1
111+ ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
112+ ; VI-NEXT: v_mov_b32_e32 v2, s2
113+ ; VI-NEXT: v_cmp_neq_f32_e32 vcc, s2, v0
114+ ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
115+ ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
116+ ; VI-NEXT: s_endpgm
117+ ;
118+ ; EG-LABEL: test_select_v2f32:
119+ ; EG: ; %bb.0: ; %entry
120+ ; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
121+ ; EG-NEXT: TEX 1 @6
122+ ; EG-NEXT: ALU 5, @12, KC0[CB0:0-32], KC1[]
123+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
124+ ; EG-NEXT: CF_END
125+ ; EG-NEXT: PAD
126+ ; EG-NEXT: Fetch clause starting at 6:
127+ ; EG-NEXT: VTX_READ_64 T1.XY, T1.X, 0, #1
128+ ; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
129+ ; EG-NEXT: ALU clause starting at 10:
130+ ; EG-NEXT: MOV T0.X, KC0[2].Z,
131+ ; EG-NEXT: MOV * T1.X, KC0[2].W,
132+ ; EG-NEXT: ALU clause starting at 12:
133+ ; EG-NEXT: SETNE_DX10 * T0.W, T0.Y, T1.Y,
134+ ; EG-NEXT: CNDE_INT T0.Y, PV.W, T1.Y, T0.Y,
135+ ; EG-NEXT: SETNE_DX10 * T0.W, T0.X, T1.X,
136+ ; EG-NEXT: CNDE_INT T0.X, PV.W, T1.X, T0.X,
137+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
138+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
41139entry:
42140 %0 = load <2 x float >, ptr addrspace (1 ) %in0
43141 %1 = load <2 x float >, ptr addrspace (1 ) %in1
@@ -47,24 +145,86 @@ entry:
47145 ret void
48146}
49147
50- ;FUNC-LABEL: {{^}}test_select_v4i32:
51-
52- ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[4].X
53- ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].W
54- ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Z
55- ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Y
56-
57- ; VI: s_cselect_b32
58- ; VI: s_cselect_b32
59- ; VI: s_cselect_b32
60- ; VI: s_cselect_b32
61-
62- ; SI-DAG: s_cselect_b32
63- ; SI-DAG: s_cselect_b32
64- ; SI-DAG: s_cselect_b32
65- ; SI-DAG: s_cselect_b32
66-
67148define amdgpu_kernel void @test_select_v4i32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in0 , ptr addrspace (1 ) %in1 , <4 x i32 > %val ) {
149+ ; SI-LABEL: test_select_v4i32:
150+ ; SI: ; %bb.0: ; %entry
151+ ; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
152+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xd
153+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
154+ ; SI-NEXT: s_load_dwordx4 s[8:11], s[6:7], 0x0
155+ ; SI-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x0
156+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x11
157+ ; SI-NEXT: s_mov_b32 s7, 0xf000
158+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
159+ ; SI-NEXT: s_cmp_gt_i32 s10, s14
160+ ; SI-NEXT: s_cselect_b32 s2, s2, s10
161+ ; SI-NEXT: s_cmp_gt_i32 s9, s13
162+ ; SI-NEXT: s_cselect_b32 s1, s1, s9
163+ ; SI-NEXT: s_cmp_gt_i32 s11, s15
164+ ; SI-NEXT: s_cselect_b32 s3, s3, s11
165+ ; SI-NEXT: s_cmp_gt_i32 s8, s12
166+ ; SI-NEXT: s_cselect_b32 s0, s0, s8
167+ ; SI-NEXT: s_mov_b32 s6, -1
168+ ; SI-NEXT: v_mov_b32_e32 v2, s2
169+ ; SI-NEXT: v_mov_b32_e32 v1, s1
170+ ; SI-NEXT: v_mov_b32_e32 v3, s3
171+ ; SI-NEXT: v_mov_b32_e32 v0, s0
172+ ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
173+ ; SI-NEXT: s_endpgm
174+ ;
175+ ; VI-LABEL: test_select_v4i32:
176+ ; VI: ; %bb.0: ; %entry
177+ ; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
178+ ; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
179+ ; VI-NEXT: s_mov_b32 s11, 0xf000
180+ ; VI-NEXT: s_mov_b32 s10, -1
181+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
182+ ; VI-NEXT: s_load_dwordx4 s[12:15], s[6:7], 0x0
183+ ; VI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x0
184+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x44
185+ ; VI-NEXT: s_mov_b32 s8, s4
186+ ; VI-NEXT: s_mov_b32 s9, s5
187+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
188+ ; VI-NEXT: s_cmp_gt_i32 s14, s18
189+ ; VI-NEXT: s_cselect_b32 s2, s2, s14
190+ ; VI-NEXT: s_cmp_gt_i32 s13, s17
191+ ; VI-NEXT: s_cselect_b32 s1, s1, s13
192+ ; VI-NEXT: s_cmp_gt_i32 s15, s19
193+ ; VI-NEXT: s_cselect_b32 s3, s3, s15
194+ ; VI-NEXT: s_cmp_gt_i32 s12, s16
195+ ; VI-NEXT: s_cselect_b32 s0, s0, s12
196+ ; VI-NEXT: v_mov_b32_e32 v0, s0
197+ ; VI-NEXT: v_mov_b32_e32 v1, s1
198+ ; VI-NEXT: v_mov_b32_e32 v2, s2
199+ ; VI-NEXT: v_mov_b32_e32 v3, s3
200+ ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
201+ ; VI-NEXT: s_endpgm
202+ ;
203+ ; EG-LABEL: test_select_v4i32:
204+ ; EG: ; %bb.0: ; %entry
205+ ; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
206+ ; EG-NEXT: TEX 1 @6
207+ ; EG-NEXT: ALU 9, @12, KC0[CB0:0-32], KC1[]
208+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
209+ ; EG-NEXT: CF_END
210+ ; EG-NEXT: PAD
211+ ; EG-NEXT: Fetch clause starting at 6:
212+ ; EG-NEXT: VTX_READ_128 T1.XYZW, T1.X, 0, #1
213+ ; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
214+ ; EG-NEXT: ALU clause starting at 10:
215+ ; EG-NEXT: MOV T0.X, KC0[2].Z,
216+ ; EG-NEXT: MOV * T1.X, KC0[2].W,
217+ ; EG-NEXT: ALU clause starting at 12:
218+ ; EG-NEXT: SETGT_INT T1.W, T0.W, T1.W,
219+ ; EG-NEXT: SETGT_INT * T2.W, T0.Z, T1.Z,
220+ ; EG-NEXT: CNDE_INT * T0.W, PV.W, T0.W, KC0[4].X,
221+ ; EG-NEXT: CNDE_INT T0.Z, T2.W, T0.Z, KC0[3].W,
222+ ; EG-NEXT: SETGT_INT * T1.W, T0.Y, T1.Y,
223+ ; EG-NEXT: CNDE_INT T0.Y, PV.W, T0.Y, KC0[3].Z,
224+ ; EG-NEXT: SETGT_INT * T1.W, T0.X, T1.X,
225+ ; EG-NEXT: CNDE_INT T0.X, PV.W, T0.X, KC0[3].Y,
226+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
227+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
68228entry:
69229 %load0 = load <4 x i32 >, ptr addrspace (1 ) %in0
70230 %load1 = load <4 x i32 >, ptr addrspace (1 ) %in1
@@ -74,17 +234,92 @@ entry:
74234 ret void
75235}
76236
77- ;FUNC-LABEL: {{^}}test_select_v4f32:
78- ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
79- ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
80- ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
81- ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
82-
83- ; SI: v_cndmask_b32_e32
84- ; SI: v_cndmask_b32_e32
85- ; SI: v_cndmask_b32_e32
86- ; SI: v_cndmask_b32_e32
87237define amdgpu_kernel void @test_select_v4f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in0 , ptr addrspace (1 ) %in1 ) {
238+ ; SI-LABEL: test_select_v4f32:
239+ ; SI: ; %bb.0: ; %entry
240+ ; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
241+ ; SI-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0xd
242+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
243+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
244+ ; SI-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x0
245+ ; SI-NEXT: s_mov_b32 s7, 0xf000
246+ ; SI-NEXT: s_mov_b32 s6, -1
247+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
248+ ; SI-NEXT: v_mov_b32_e32 v0, s8
249+ ; SI-NEXT: v_mov_b32_e32 v1, s9
250+ ; SI-NEXT: v_mov_b32_e32 v2, s10
251+ ; SI-NEXT: v_mov_b32_e32 v3, s11
252+ ; SI-NEXT: v_mov_b32_e32 v4, s3
253+ ; SI-NEXT: v_cmp_neq_f32_e32 vcc, s3, v3
254+ ; SI-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
255+ ; SI-NEXT: v_mov_b32_e32 v4, s2
256+ ; SI-NEXT: v_cmp_neq_f32_e32 vcc, s2, v2
257+ ; SI-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
258+ ; SI-NEXT: v_mov_b32_e32 v4, s1
259+ ; SI-NEXT: v_cmp_neq_f32_e32 vcc, s1, v1
260+ ; SI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
261+ ; SI-NEXT: v_mov_b32_e32 v4, s0
262+ ; SI-NEXT: v_cmp_neq_f32_e32 vcc, s0, v0
263+ ; SI-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
264+ ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
265+ ; SI-NEXT: s_endpgm
266+ ;
267+ ; VI-LABEL: test_select_v4f32:
268+ ; VI: ; %bb.0: ; %entry
269+ ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x34
270+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
271+ ; VI-NEXT: s_mov_b32 s7, 0xf000
272+ ; VI-NEXT: s_mov_b32 s6, -1
273+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
274+ ; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0
275+ ; VI-NEXT: s_mov_b32 s4, s0
276+ ; VI-NEXT: s_mov_b32 s5, s1
277+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
278+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
279+ ; VI-NEXT: v_mov_b32_e32 v3, s11
280+ ; VI-NEXT: v_mov_b32_e32 v2, s10
281+ ; VI-NEXT: v_mov_b32_e32 v1, s9
282+ ; VI-NEXT: v_mov_b32_e32 v4, s3
283+ ; VI-NEXT: v_cmp_neq_f32_e32 vcc, s3, v3
284+ ; VI-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
285+ ; VI-NEXT: v_mov_b32_e32 v4, s2
286+ ; VI-NEXT: v_cmp_neq_f32_e32 vcc, s2, v2
287+ ; VI-NEXT: v_mov_b32_e32 v0, s8
288+ ; VI-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
289+ ; VI-NEXT: v_mov_b32_e32 v4, s1
290+ ; VI-NEXT: v_cmp_neq_f32_e32 vcc, s1, v1
291+ ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
292+ ; VI-NEXT: v_mov_b32_e32 v4, s0
293+ ; VI-NEXT: v_cmp_neq_f32_e32 vcc, s0, v0
294+ ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
295+ ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
296+ ; VI-NEXT: s_endpgm
297+ ;
298+ ; EG-LABEL: test_select_v4f32:
299+ ; EG: ; %bb.0: ; %entry
300+ ; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
301+ ; EG-NEXT: TEX 1 @6
302+ ; EG-NEXT: ALU 9, @12, KC0[CB0:0-32], KC1[]
303+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
304+ ; EG-NEXT: CF_END
305+ ; EG-NEXT: PAD
306+ ; EG-NEXT: Fetch clause starting at 6:
307+ ; EG-NEXT: VTX_READ_128 T1.XYZW, T1.X, 0, #1
308+ ; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
309+ ; EG-NEXT: ALU clause starting at 10:
310+ ; EG-NEXT: MOV T0.X, KC0[2].Z,
311+ ; EG-NEXT: MOV * T1.X, KC0[2].W,
312+ ; EG-NEXT: ALU clause starting at 12:
313+ ; EG-NEXT: SETNE_DX10 T2.W, T0.W, T1.W,
314+ ; EG-NEXT: SETNE_DX10 * T3.W, T0.Z, T1.Z,
315+ ; EG-NEXT: CNDE_INT * T0.W, PV.W, T1.W, T0.W,
316+ ; EG-NEXT: CNDE_INT T0.Z, T3.W, T1.Z, T0.Z,
317+ ; EG-NEXT: SETNE_DX10 * T1.W, T0.Y, T1.Y,
318+ ; EG-NEXT: CNDE_INT T0.Y, PV.W, T1.Y, T0.Y,
319+ ; EG-NEXT: SETNE_DX10 * T1.W, T0.X, T1.X,
320+ ; EG-NEXT: CNDE_INT T0.X, PV.W, T1.X, T0.X,
321+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
322+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
88323entry:
89324 %0 = load <4 x float >, ptr addrspace (1 ) %in0
90325 %1 = load <4 x float >, ptr addrspace (1 ) %in1
0 commit comments