66; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
77; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s
88
9- define i16 @s_add_i16 (i16 %a , i16 %b ) {
9+ define i16 @s_add_i16 (i16 inreg %a , i16 inreg %b ) {
1010; GFX7-LABEL: s_add_i16:
1111; GFX7: ; %bb.0:
1212; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13- ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
13+ ; GFX7-NEXT: s_add_i32 s16, s16, s17
14+ ; GFX7-NEXT: v_mov_b32_e32 v0, s16
1415; GFX7-NEXT: s_setpc_b64 s[30:31]
1516;
1617; GFX9-LABEL: s_add_i16:
1718; GFX9: ; %bb.0:
1819; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19- ; GFX9-NEXT: v_add_u16_e32 v0, v0, v1
20+ ; GFX9-NEXT: s_add_i32 s16, s16, s17
21+ ; GFX9-NEXT: v_mov_b32_e32 v0, s16
2022; GFX9-NEXT: s_setpc_b64 s[30:31]
2123;
2224; GFX8-LABEL: s_add_i16:
2325; GFX8: ; %bb.0:
2426; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25- ; GFX8-NEXT: v_add_u16_e32 v0, v0, v1
27+ ; GFX8-NEXT: s_add_i32 s16, s16, s17
28+ ; GFX8-NEXT: v_mov_b32_e32 v0, s16
2629; GFX8-NEXT: s_setpc_b64 s[30:31]
2730;
2831; GFX10-LABEL: s_add_i16:
2932; GFX10: ; %bb.0:
3033; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31- ; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
34+ ; GFX10-NEXT: s_add_i32 s16, s16, s17
35+ ; GFX10-NEXT: v_mov_b32_e32 v0, s16
3236; GFX10-NEXT: s_setpc_b64 s[30:31]
3337;
3438; GFX11-LABEL: s_add_i16:
3539; GFX11: ; %bb.0:
3640; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37- ; GFX11-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l
41+ ; GFX11-NEXT: s_add_i32 s0, s0, s1
42+ ; GFX11-NEXT: v_mov_b32_e32 v0, s0
3843; GFX11-NEXT: s_setpc_b64 s[30:31]
3944;
4045; GFX12-LABEL: s_add_i16:
@@ -44,13 +49,103 @@ define i16 @s_add_i16(i16 %a, i16 %b) {
4449; GFX12-NEXT: s_wait_samplecnt 0x0
4550; GFX12-NEXT: s_wait_bvhcnt 0x0
4651; GFX12-NEXT: s_wait_kmcnt 0x0
52+ ; GFX12-NEXT: s_add_co_i32 s0, s0, s1
53+ ; GFX12-NEXT: s_wait_alu 0xfffe
54+ ; GFX12-NEXT: v_mov_b32_e32 v0, s0
55+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
56+ %c = add i16 %a , %b
57+ ret i16 %c
58+ }
59+
60+ define i16 @v_add_i16 (i16 %a , i16 %b ) {
61+ ; GFX7-LABEL: v_add_i16:
62+ ; GFX7: ; %bb.0:
63+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64+ ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
65+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
66+ ;
67+ ; GFX9-LABEL: v_add_i16:
68+ ; GFX9: ; %bb.0:
69+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70+ ; GFX9-NEXT: v_add_u16_e32 v0, v0, v1
71+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
72+ ;
73+ ; GFX8-LABEL: v_add_i16:
74+ ; GFX8: ; %bb.0:
75+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76+ ; GFX8-NEXT: v_add_u16_e32 v0, v0, v1
77+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
78+ ;
79+ ; GFX10-LABEL: v_add_i16:
80+ ; GFX10: ; %bb.0:
81+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82+ ; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
83+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
84+ ;
85+ ; GFX11-LABEL: v_add_i16:
86+ ; GFX11: ; %bb.0:
87+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88+ ; GFX11-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l
89+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
90+ ;
91+ ; GFX12-LABEL: v_add_i16:
92+ ; GFX12: ; %bb.0:
93+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
94+ ; GFX12-NEXT: s_wait_expcnt 0x0
95+ ; GFX12-NEXT: s_wait_samplecnt 0x0
96+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
97+ ; GFX12-NEXT: s_wait_kmcnt 0x0
4798; GFX12-NEXT: v_add_nc_u16 v0, v0, v1
4899; GFX12-NEXT: s_setpc_b64 s[30:31]
49100 %c = add i16 %a , %b
50101 ret i16 %c
51102}
52103
53- define i32 @s_add_i32 (i32 %a , i32 %b ) {
104+ define i32 @s_add_i32 (i32 inreg %a , i32 inreg %b ) {
105+ ; GFX7-LABEL: s_add_i32:
106+ ; GFX7: ; %bb.0:
107+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108+ ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
109+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
110+ ;
111+ ; GFX9-LABEL: s_add_i32:
112+ ; GFX9: ; %bb.0:
113+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114+ ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
115+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
116+ ;
117+ ; GFX8-LABEL: s_add_i32:
118+ ; GFX8: ; %bb.0:
119+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120+ ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
121+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
122+ ;
123+ ; GFX10-LABEL: s_add_i32:
124+ ; GFX10: ; %bb.0:
125+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126+ ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
127+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
128+ ;
129+ ; GFX11-LABEL: s_add_i32:
130+ ; GFX11: ; %bb.0:
131+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132+ ; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
133+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
134+ ;
135+ ; GFX12-LABEL: s_add_i32:
136+ ; GFX12: ; %bb.0:
137+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
138+ ; GFX12-NEXT: s_wait_expcnt 0x0
139+ ; GFX12-NEXT: s_wait_samplecnt 0x0
140+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
141+ ; GFX12-NEXT: s_wait_kmcnt 0x0
142+ ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v1
143+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
144+ %c = add i32 %a , %b
145+ ret i32 %c
146+ }
147+
148+ define i32 @v_add_i32 (i32 %a , i32 %b ) {
54149; GFX7-LABEL: s_add_i32:
55150; GFX7: ; %bb.0:
56151; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -94,7 +189,7 @@ define i32 @s_add_i32(i32 %a, i32 %b) {
94189 ret i32 %c
95190}
96191
97- define <2 x i16 > @s_add_v2i16 (<2 x i16 > %a , <2 x i16 > %b ) {
192+ define <2 x i16 > @s_add_v2i16 (<2 x i16 > inreg %a , <2 x i16 > inreg %b ) {
98193; GFX7-LABEL: s_add_v2i16:
99194; GFX7: ; %bb.0:
100195; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -141,7 +236,104 @@ define <2 x i16> @s_add_v2i16(<2 x i16> %a, <2 x i16> %b) {
141236 ret <2 x i16 > %c
142237}
143238
144- define i64 @s_add_i64 (i64 %a , i64 %b ) {
239+ define <2 x i16 > @v_add_v2i16 (<2 x i16 > %a , <2 x i16 > %b ) {
240+ ; GFX7-LABEL: s_add_v2i16:
241+ ; GFX7: ; %bb.0:
242+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243+ ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
244+ ; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
245+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
246+ ;
247+ ; GFX9-LABEL: s_add_v2i16:
248+ ; GFX9: ; %bb.0:
249+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250+ ; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
251+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
252+ ;
253+ ; GFX8-LABEL: s_add_v2i16:
254+ ; GFX8: ; %bb.0:
255+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256+ ; GFX8-NEXT: v_add_u16_e32 v2, v0, v1
257+ ; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
258+ ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
259+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
260+ ;
261+ ; GFX10-LABEL: s_add_v2i16:
262+ ; GFX10: ; %bb.0:
263+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264+ ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
265+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
266+ ;
267+ ; GFX11-LABEL: s_add_v2i16:
268+ ; GFX11: ; %bb.0:
269+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
270+ ; GFX11-NEXT: v_pk_add_u16 v0, v0, v1
271+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
272+ ;
273+ ; GFX12-LABEL: s_add_v2i16:
274+ ; GFX12: ; %bb.0:
275+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
276+ ; GFX12-NEXT: s_wait_expcnt 0x0
277+ ; GFX12-NEXT: s_wait_samplecnt 0x0
278+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
279+ ; GFX12-NEXT: s_wait_kmcnt 0x0
280+ ; GFX12-NEXT: v_pk_add_u16 v0, v0, v1
281+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
282+ %c = add <2 x i16 > %a , %b
283+ ret <2 x i16 > %c
284+ }
285+
286+ define i64 @s_add_i64 (i64 inreg %a , i64 inreg %b ) {
287+ ; GFX7-LABEL: s_add_i64:
288+ ; GFX7: ; %bb.0:
289+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290+ ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
291+ ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
292+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
293+ ;
294+ ; GFX9-LABEL: s_add_i64:
295+ ; GFX9: ; %bb.0:
296+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
297+ ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
298+ ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
299+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
300+ ;
301+ ; GFX8-LABEL: s_add_i64:
302+ ; GFX8: ; %bb.0:
303+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304+ ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
305+ ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
306+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
307+ ;
308+ ; GFX10-LABEL: s_add_i64:
309+ ; GFX10: ; %bb.0:
310+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
311+ ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
312+ ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
313+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
314+ ;
315+ ; GFX11-LABEL: s_add_i64:
316+ ; GFX11: ; %bb.0:
317+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318+ ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
319+ ; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
320+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
321+ ;
322+ ; GFX12-LABEL: s_add_i64:
323+ ; GFX12: ; %bb.0:
324+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
325+ ; GFX12-NEXT: s_wait_expcnt 0x0
326+ ; GFX12-NEXT: s_wait_samplecnt 0x0
327+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
328+ ; GFX12-NEXT: s_wait_kmcnt 0x0
329+ ; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v2
330+ ; GFX12-NEXT: v_add_nc_u32_e32 v1, v1, v3
331+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
332+ %c = add i64 %a , %b
333+ ret i64 %c
334+ }
335+
336+ define i64 @v_add_i64 (i64 %a , i64 %b ) {
145337; GFX7-LABEL: s_add_i64:
146338; GFX7: ; %bb.0:
147339; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
0 commit comments