@@ -105,31 +105,36 @@ define i32 @s_add_i32(i32 inreg %a, i32 inreg %b) {
105105; GFX7-LABEL: s_add_i32:
106106; GFX7: ; %bb.0:
107107; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108- ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
108+ ; GFX7-NEXT: s_add_i32 s16, s16, s17
109+ ; GFX7-NEXT: v_mov_b32_e32 v0, s16
109110; GFX7-NEXT: s_setpc_b64 s[30:31]
110111;
111112; GFX9-LABEL: s_add_i32:
112113; GFX9: ; %bb.0:
113114; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114- ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
115+ ; GFX9-NEXT: s_add_i32 s16, s16, s17
116+ ; GFX9-NEXT: v_mov_b32_e32 v0, s16
115117; GFX9-NEXT: s_setpc_b64 s[30:31]
116118;
117119; GFX8-LABEL: s_add_i32:
118120; GFX8: ; %bb.0:
119121; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120- ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
122+ ; GFX8-NEXT: s_add_i32 s16, s16, s17
123+ ; GFX8-NEXT: v_mov_b32_e32 v0, s16
121124; GFX8-NEXT: s_setpc_b64 s[30:31]
122125;
123126; GFX10-LABEL: s_add_i32:
124127; GFX10: ; %bb.0:
125128; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126- ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
129+ ; GFX10-NEXT: s_add_i32 s16, s16, s17
130+ ; GFX10-NEXT: v_mov_b32_e32 v0, s16
127131; GFX10-NEXT: s_setpc_b64 s[30:31]
128132;
129133; GFX11-LABEL: s_add_i32:
130134; GFX11: ; %bb.0:
131135; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132- ; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
136+ ; GFX11-NEXT: s_add_i32 s0, s0, s1
137+ ; GFX11-NEXT: v_mov_b32_e32 v0, s0
133138; GFX11-NEXT: s_setpc_b64 s[30:31]
134139;
135140; GFX12-LABEL: s_add_i32:
@@ -139,44 +144,46 @@ define i32 @s_add_i32(i32 inreg %a, i32 inreg %b) {
139144; GFX12-NEXT: s_wait_samplecnt 0x0
140145; GFX12-NEXT: s_wait_bvhcnt 0x0
141146; GFX12-NEXT: s_wait_kmcnt 0x0
142- ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v1
147+ ; GFX12-NEXT: s_add_co_i32 s0, s0, s1
148+ ; GFX12-NEXT: s_wait_alu 0xfffe
149+ ; GFX12-NEXT: v_mov_b32_e32 v0, s0
143150; GFX12-NEXT: s_setpc_b64 s[30:31]
144151 %c = add i32 %a , %b
145152 ret i32 %c
146153}
147154
148155define i32 @v_add_i32 (i32 %a , i32 %b ) {
149- ; GFX7-LABEL: s_add_i32 :
156+ ; GFX7-LABEL: v_add_i32 :
150157; GFX7: ; %bb.0:
151158; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152159; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
153160; GFX7-NEXT: s_setpc_b64 s[30:31]
154161;
155- ; GFX9-LABEL: s_add_i32 :
162+ ; GFX9-LABEL: v_add_i32 :
156163; GFX9: ; %bb.0:
157164; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158165; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
159166; GFX9-NEXT: s_setpc_b64 s[30:31]
160167;
161- ; GFX8-LABEL: s_add_i32 :
168+ ; GFX8-LABEL: v_add_i32 :
162169; GFX8: ; %bb.0:
163170; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164171; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
165172; GFX8-NEXT: s_setpc_b64 s[30:31]
166173;
167- ; GFX10-LABEL: s_add_i32 :
174+ ; GFX10-LABEL: v_add_i32 :
168175; GFX10: ; %bb.0:
169176; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170177; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
171178; GFX10-NEXT: s_setpc_b64 s[30:31]
172179;
173- ; GFX11-LABEL: s_add_i32 :
180+ ; GFX11-LABEL: v_add_i32 :
174181; GFX11: ; %bb.0:
175182; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176183; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
177184; GFX11-NEXT: s_setpc_b64 s[30:31]
178185;
179- ; GFX12-LABEL: s_add_i32 :
186+ ; GFX12-LABEL: v_add_i32 :
180187; GFX12: ; %bb.0:
181188; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
182189; GFX12-NEXT: s_wait_expcnt 0x0
@@ -189,88 +196,42 @@ define i32 @v_add_i32(i32 %a, i32 %b) {
189196 ret i32 %c
190197}
191198
192- define <2 x i16 > @s_add_v2i16 (<2 x i16 > inreg %a , <2 x i16 > inreg %b ) {
193- ; GFX7-LABEL: s_add_v2i16:
194- ; GFX7: ; %bb.0:
195- ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196- ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
197- ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
198- ; GFX7-NEXT: s_setpc_b64 s[30:31]
199- ;
200- ; GFX9-LABEL: s_add_v2i16:
201- ; GFX9: ; %bb.0:
202- ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203- ; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
204- ; GFX9-NEXT: s_setpc_b64 s[30:31]
205- ;
206- ; GFX8-LABEL: s_add_v2i16:
207- ; GFX8: ; %bb.0:
208- ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209- ; GFX8-NEXT: v_add_u16_e32 v2, v0, v1
210- ; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
211- ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
212- ; GFX8-NEXT: s_setpc_b64 s[30:31]
213- ;
214- ; GFX10-LABEL: s_add_v2i16:
215- ; GFX10: ; %bb.0:
216- ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217- ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
218- ; GFX10-NEXT: s_setpc_b64 s[30:31]
219- ;
220- ; GFX11-LABEL: s_add_v2i16:
221- ; GFX11: ; %bb.0:
222- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223- ; GFX11-NEXT: v_pk_add_u16 v0, v0, v1
224- ; GFX11-NEXT: s_setpc_b64 s[30:31]
225- ;
226- ; GFX12-LABEL: s_add_v2i16:
227- ; GFX12: ; %bb.0:
228- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
229- ; GFX12-NEXT: s_wait_expcnt 0x0
230- ; GFX12-NEXT: s_wait_samplecnt 0x0
231- ; GFX12-NEXT: s_wait_bvhcnt 0x0
232- ; GFX12-NEXT: s_wait_kmcnt 0x0
233- ; GFX12-NEXT: v_pk_add_u16 v0, v0, v1
234- ; GFX12-NEXT: s_setpc_b64 s[30:31]
235- %c = add <2 x i16 > %a , %b
236- ret <2 x i16 > %c
237- }
238-
199+ ; TODO: Add test for s_add_v2i16
239200define <2 x i16 > @v_add_v2i16 (<2 x i16 > %a , <2 x i16 > %b ) {
240- ; GFX7-LABEL: s_add_v2i16 :
201+ ; GFX7-LABEL: v_add_v2i16 :
241202; GFX7: ; %bb.0:
242203; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243204; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
244205; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
245206; GFX7-NEXT: s_setpc_b64 s[30:31]
246207;
247- ; GFX9-LABEL: s_add_v2i16 :
208+ ; GFX9-LABEL: v_add_v2i16 :
248209; GFX9: ; %bb.0:
249210; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250211; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
251212; GFX9-NEXT: s_setpc_b64 s[30:31]
252213;
253- ; GFX8-LABEL: s_add_v2i16 :
214+ ; GFX8-LABEL: v_add_v2i16 :
254215; GFX8: ; %bb.0:
255216; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256217; GFX8-NEXT: v_add_u16_e32 v2, v0, v1
257218; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
258219; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
259220; GFX8-NEXT: s_setpc_b64 s[30:31]
260221;
261- ; GFX10-LABEL: s_add_v2i16 :
222+ ; GFX10-LABEL: v_add_v2i16 :
262223; GFX10: ; %bb.0:
263224; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264225; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
265226; GFX10-NEXT: s_setpc_b64 s[30:31]
266227;
267- ; GFX11-LABEL: s_add_v2i16 :
228+ ; GFX11-LABEL: v_add_v2i16 :
268229; GFX11: ; %bb.0:
269230; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
270231; GFX11-NEXT: v_pk_add_u16 v0, v0, v1
271232; GFX11-NEXT: s_setpc_b64 s[30:31]
272233;
273- ; GFX12-LABEL: s_add_v2i16 :
234+ ; GFX12-LABEL: v_add_v2i16 :
274235; GFX12: ; %bb.0:
275236; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
276237; GFX12-NEXT: s_wait_expcnt 0x0
@@ -287,36 +248,45 @@ define i64 @s_add_i64(i64 inreg %a, i64 inreg %b) {
287248; GFX7-LABEL: s_add_i64:
288249; GFX7: ; %bb.0:
289250; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290- ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
291- ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
251+ ; GFX7-NEXT: s_add_u32 s4, s16, s18
252+ ; GFX7-NEXT: s_addc_u32 s5, s17, s19
253+ ; GFX7-NEXT: v_mov_b32_e32 v0, s4
254+ ; GFX7-NEXT: v_mov_b32_e32 v1, s5
292255; GFX7-NEXT: s_setpc_b64 s[30:31]
293256;
294257; GFX9-LABEL: s_add_i64:
295258; GFX9: ; %bb.0:
296259; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
297- ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
298- ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
260+ ; GFX9-NEXT: s_add_u32 s4, s16, s18
261+ ; GFX9-NEXT: s_addc_u32 s5, s17, s19
262+ ; GFX9-NEXT: v_mov_b32_e32 v0, s4
263+ ; GFX9-NEXT: v_mov_b32_e32 v1, s5
299264; GFX9-NEXT: s_setpc_b64 s[30:31]
300265;
301266; GFX8-LABEL: s_add_i64:
302267; GFX8: ; %bb.0:
303268; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304- ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
305- ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
269+ ; GFX8-NEXT: s_add_u32 s4, s16, s18
270+ ; GFX8-NEXT: s_addc_u32 s5, s17, s19
271+ ; GFX8-NEXT: v_mov_b32_e32 v0, s4
272+ ; GFX8-NEXT: v_mov_b32_e32 v1, s5
306273; GFX8-NEXT: s_setpc_b64 s[30:31]
307274;
308275; GFX10-LABEL: s_add_i64:
309276; GFX10: ; %bb.0:
310277; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
311- ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
312- ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
278+ ; GFX10-NEXT: s_add_u32 s4, s16, s18
279+ ; GFX10-NEXT: s_addc_u32 s5, s17, s19
280+ ; GFX10-NEXT: v_mov_b32_e32 v0, s4
281+ ; GFX10-NEXT: v_mov_b32_e32 v1, s5
313282; GFX10-NEXT: s_setpc_b64 s[30:31]
314283;
315284; GFX11-LABEL: s_add_i64:
316285; GFX11: ; %bb.0:
317286; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318- ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
319- ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
287+ ; GFX11-NEXT: s_add_u32 s0, s0, s2
288+ ; GFX11-NEXT: s_addc_u32 s1, s1, s3
289+ ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
320290; GFX11-NEXT: s_setpc_b64 s[30:31]
321291;
322292; GFX12-LABEL: s_add_i64:
@@ -326,50 +296,50 @@ define i64 @s_add_i64(i64 inreg %a, i64 inreg %b) {
326296; GFX12-NEXT: s_wait_samplecnt 0x0
327297; GFX12-NEXT: s_wait_bvhcnt 0x0
328298; GFX12-NEXT: s_wait_kmcnt 0x0
329- ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v2
330- ; GFX12-NEXT: v_add_nc_u32_e32 v1, v1, v3
299+ ; GFX12-NEXT: v_add_nc_u32_e64 v0, s0, s2
300+ ; GFX12-NEXT: v_add_nc_u32_e64 v1, s1, s3
331301; GFX12-NEXT: s_setpc_b64 s[30:31]
332302 %c = add i64 %a , %b
333303 ret i64 %c
334304}
335305
336306define i64 @v_add_i64 (i64 %a , i64 %b ) {
337- ; GFX7-LABEL: s_add_i64 :
307+ ; GFX7-LABEL: v_add_i64 :
338308; GFX7: ; %bb.0:
339309; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
340310; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
341311; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
342312; GFX7-NEXT: s_setpc_b64 s[30:31]
343313;
344- ; GFX9-LABEL: s_add_i64 :
314+ ; GFX9-LABEL: v_add_i64 :
345315; GFX9: ; %bb.0:
346316; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347317; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
348318; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
349319; GFX9-NEXT: s_setpc_b64 s[30:31]
350320;
351- ; GFX8-LABEL: s_add_i64 :
321+ ; GFX8-LABEL: v_add_i64 :
352322; GFX8: ; %bb.0:
353323; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354324; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
355325; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
356326; GFX8-NEXT: s_setpc_b64 s[30:31]
357327;
358- ; GFX10-LABEL: s_add_i64 :
328+ ; GFX10-LABEL: v_add_i64 :
359329; GFX10: ; %bb.0:
360330; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
361331; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
362332; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
363333; GFX10-NEXT: s_setpc_b64 s[30:31]
364334;
365- ; GFX11-LABEL: s_add_i64 :
335+ ; GFX11-LABEL: v_add_i64 :
366336; GFX11: ; %bb.0:
367337; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
368338; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
369339; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
370340; GFX11-NEXT: s_setpc_b64 s[30:31]
371341;
372- ; GFX12-LABEL: s_add_i64 :
342+ ; GFX12-LABEL: v_add_i64 :
373343; GFX12: ; %bb.0:
374344; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
375345; GFX12-NEXT: s_wait_expcnt 0x0
0 commit comments