Skip to content

Commit f07c87c

Browse files
committed
Fix tests and introduce sub.ll
1 parent 7e2a7b7 commit f07c87c

File tree

3 files changed

+585
-95
lines changed

3 files changed

+585
-95
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll

Lines changed: 201 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,35 +6,40 @@
66
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
77
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s
88

9-
define i16 @s_add_i16(i16 %a, i16 %b) {
9+
define i16 @s_add_i16(i16 inreg %a, i16 inreg %b) {
1010
; GFX7-LABEL: s_add_i16:
1111
; GFX7: ; %bb.0:
1212
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13-
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
13+
; GFX7-NEXT: s_add_i32 s16, s16, s17
14+
; GFX7-NEXT: v_mov_b32_e32 v0, s16
1415
; GFX7-NEXT: s_setpc_b64 s[30:31]
1516
;
1617
; GFX9-LABEL: s_add_i16:
1718
; GFX9: ; %bb.0:
1819
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19-
; GFX9-NEXT: v_add_u16_e32 v0, v0, v1
20+
; GFX9-NEXT: s_add_i32 s16, s16, s17
21+
; GFX9-NEXT: v_mov_b32_e32 v0, s16
2022
; GFX9-NEXT: s_setpc_b64 s[30:31]
2123
;
2224
; GFX8-LABEL: s_add_i16:
2325
; GFX8: ; %bb.0:
2426
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25-
; GFX8-NEXT: v_add_u16_e32 v0, v0, v1
27+
; GFX8-NEXT: s_add_i32 s16, s16, s17
28+
; GFX8-NEXT: v_mov_b32_e32 v0, s16
2629
; GFX8-NEXT: s_setpc_b64 s[30:31]
2730
;
2831
; GFX10-LABEL: s_add_i16:
2932
; GFX10: ; %bb.0:
3033
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31-
; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
34+
; GFX10-NEXT: s_add_i32 s16, s16, s17
35+
; GFX10-NEXT: v_mov_b32_e32 v0, s16
3236
; GFX10-NEXT: s_setpc_b64 s[30:31]
3337
;
3438
; GFX11-LABEL: s_add_i16:
3539
; GFX11: ; %bb.0:
3640
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37-
; GFX11-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l
41+
; GFX11-NEXT: s_add_i32 s0, s0, s1
42+
; GFX11-NEXT: v_mov_b32_e32 v0, s0
3843
; GFX11-NEXT: s_setpc_b64 s[30:31]
3944
;
4045
; GFX12-LABEL: s_add_i16:
@@ -44,13 +49,103 @@ define i16 @s_add_i16(i16 %a, i16 %b) {
4449
; GFX12-NEXT: s_wait_samplecnt 0x0
4550
; GFX12-NEXT: s_wait_bvhcnt 0x0
4651
; GFX12-NEXT: s_wait_kmcnt 0x0
52+
; GFX12-NEXT: s_add_co_i32 s0, s0, s1
53+
; GFX12-NEXT: s_wait_alu 0xfffe
54+
; GFX12-NEXT: v_mov_b32_e32 v0, s0
55+
; GFX12-NEXT: s_setpc_b64 s[30:31]
56+
%c = add i16 %a, %b
57+
ret i16 %c
58+
}
59+
60+
define i16 @v_add_i16(i16 %a, i16 %b) {
61+
; GFX7-LABEL: v_add_i16:
62+
; GFX7: ; %bb.0:
63+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64+
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
65+
; GFX7-NEXT: s_setpc_b64 s[30:31]
66+
;
67+
; GFX9-LABEL: v_add_i16:
68+
; GFX9: ; %bb.0:
69+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70+
; GFX9-NEXT: v_add_u16_e32 v0, v0, v1
71+
; GFX9-NEXT: s_setpc_b64 s[30:31]
72+
;
73+
; GFX8-LABEL: v_add_i16:
74+
; GFX8: ; %bb.0:
75+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76+
; GFX8-NEXT: v_add_u16_e32 v0, v0, v1
77+
; GFX8-NEXT: s_setpc_b64 s[30:31]
78+
;
79+
; GFX10-LABEL: v_add_i16:
80+
; GFX10: ; %bb.0:
81+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82+
; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
83+
; GFX10-NEXT: s_setpc_b64 s[30:31]
84+
;
85+
; GFX11-LABEL: v_add_i16:
86+
; GFX11: ; %bb.0:
87+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88+
; GFX11-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l
89+
; GFX11-NEXT: s_setpc_b64 s[30:31]
90+
;
91+
; GFX12-LABEL: v_add_i16:
92+
; GFX12: ; %bb.0:
93+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
94+
; GFX12-NEXT: s_wait_expcnt 0x0
95+
; GFX12-NEXT: s_wait_samplecnt 0x0
96+
; GFX12-NEXT: s_wait_bvhcnt 0x0
97+
; GFX12-NEXT: s_wait_kmcnt 0x0
4798
; GFX12-NEXT: v_add_nc_u16 v0, v0, v1
4899
; GFX12-NEXT: s_setpc_b64 s[30:31]
49100
%c = add i16 %a, %b
50101
ret i16 %c
51102
}
52103

53-
define i32 @s_add_i32(i32 %a, i32 %b) {
104+
define i32 @s_add_i32(i32 inreg %a, i32 inreg %b) {
105+
; GFX7-LABEL: s_add_i32:
106+
; GFX7: ; %bb.0:
107+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108+
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
109+
; GFX7-NEXT: s_setpc_b64 s[30:31]
110+
;
111+
; GFX9-LABEL: s_add_i32:
112+
; GFX9: ; %bb.0:
113+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114+
; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
115+
; GFX9-NEXT: s_setpc_b64 s[30:31]
116+
;
117+
; GFX8-LABEL: s_add_i32:
118+
; GFX8: ; %bb.0:
119+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
121+
; GFX8-NEXT: s_setpc_b64 s[30:31]
122+
;
123+
; GFX10-LABEL: s_add_i32:
124+
; GFX10: ; %bb.0:
125+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126+
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
127+
; GFX10-NEXT: s_setpc_b64 s[30:31]
128+
;
129+
; GFX11-LABEL: s_add_i32:
130+
; GFX11: ; %bb.0:
131+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132+
; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
133+
; GFX11-NEXT: s_setpc_b64 s[30:31]
134+
;
135+
; GFX12-LABEL: s_add_i32:
136+
; GFX12: ; %bb.0:
137+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
138+
; GFX12-NEXT: s_wait_expcnt 0x0
139+
; GFX12-NEXT: s_wait_samplecnt 0x0
140+
; GFX12-NEXT: s_wait_bvhcnt 0x0
141+
; GFX12-NEXT: s_wait_kmcnt 0x0
142+
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v1
143+
; GFX12-NEXT: s_setpc_b64 s[30:31]
144+
%c = add i32 %a, %b
145+
ret i32 %c
146+
}
147+
148+
define i32 @v_add_i32(i32 %a, i32 %b) {
54149
; GFX7-LABEL: s_add_i32:
55150
; GFX7: ; %bb.0:
56151
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -94,7 +189,7 @@ define i32 @s_add_i32(i32 %a, i32 %b) {
94189
ret i32 %c
95190
}
96191

97-
define <2 x i16> @s_add_v2i16(<2 x i16> %a, <2 x i16> %b) {
192+
define <2 x i16> @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
98193
; GFX7-LABEL: s_add_v2i16:
99194
; GFX7: ; %bb.0:
100195
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -141,7 +236,104 @@ define <2 x i16> @s_add_v2i16(<2 x i16> %a, <2 x i16> %b) {
141236
ret <2 x i16> %c
142237
}
143238

144-
define i64 @s_add_i64(i64 %a, i64 %b) {
239+
define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) {
240+
; GFX7-LABEL: s_add_v2i16:
241+
; GFX7: ; %bb.0:
242+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243+
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
244+
; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
245+
; GFX7-NEXT: s_setpc_b64 s[30:31]
246+
;
247+
; GFX9-LABEL: s_add_v2i16:
248+
; GFX9: ; %bb.0:
249+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250+
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
251+
; GFX9-NEXT: s_setpc_b64 s[30:31]
252+
;
253+
; GFX8-LABEL: s_add_v2i16:
254+
; GFX8: ; %bb.0:
255+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256+
; GFX8-NEXT: v_add_u16_e32 v2, v0, v1
257+
; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
258+
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
259+
; GFX8-NEXT: s_setpc_b64 s[30:31]
260+
;
261+
; GFX10-LABEL: s_add_v2i16:
262+
; GFX10: ; %bb.0:
263+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264+
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
265+
; GFX10-NEXT: s_setpc_b64 s[30:31]
266+
;
267+
; GFX11-LABEL: s_add_v2i16:
268+
; GFX11: ; %bb.0:
269+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
270+
; GFX11-NEXT: v_pk_add_u16 v0, v0, v1
271+
; GFX11-NEXT: s_setpc_b64 s[30:31]
272+
;
273+
; GFX12-LABEL: s_add_v2i16:
274+
; GFX12: ; %bb.0:
275+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
276+
; GFX12-NEXT: s_wait_expcnt 0x0
277+
; GFX12-NEXT: s_wait_samplecnt 0x0
278+
; GFX12-NEXT: s_wait_bvhcnt 0x0
279+
; GFX12-NEXT: s_wait_kmcnt 0x0
280+
; GFX12-NEXT: v_pk_add_u16 v0, v0, v1
281+
; GFX12-NEXT: s_setpc_b64 s[30:31]
282+
%c = add <2 x i16> %a, %b
283+
ret <2 x i16> %c
284+
}
285+
286+
define i64 @s_add_i64(i64 inreg %a, i64 inreg %b) {
287+
; GFX7-LABEL: s_add_i64:
288+
; GFX7: ; %bb.0:
289+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290+
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
291+
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
292+
; GFX7-NEXT: s_setpc_b64 s[30:31]
293+
;
294+
; GFX9-LABEL: s_add_i64:
295+
; GFX9: ; %bb.0:
296+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
297+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
298+
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
299+
; GFX9-NEXT: s_setpc_b64 s[30:31]
300+
;
301+
; GFX8-LABEL: s_add_i64:
302+
; GFX8: ; %bb.0:
303+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304+
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
305+
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
306+
; GFX8-NEXT: s_setpc_b64 s[30:31]
307+
;
308+
; GFX10-LABEL: s_add_i64:
309+
; GFX10: ; %bb.0:
310+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
311+
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
312+
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
313+
; GFX10-NEXT: s_setpc_b64 s[30:31]
314+
;
315+
; GFX11-LABEL: s_add_i64:
316+
; GFX11: ; %bb.0:
317+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318+
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
319+
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
320+
; GFX11-NEXT: s_setpc_b64 s[30:31]
321+
;
322+
; GFX12-LABEL: s_add_i64:
323+
; GFX12: ; %bb.0:
324+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
325+
; GFX12-NEXT: s_wait_expcnt 0x0
326+
; GFX12-NEXT: s_wait_samplecnt 0x0
327+
; GFX12-NEXT: s_wait_bvhcnt 0x0
328+
; GFX12-NEXT: s_wait_kmcnt 0x0
329+
; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v2
330+
; GFX12-NEXT: v_add_nc_u32_e32 v1, v1, v3
331+
; GFX12-NEXT: s_setpc_b64 s[30:31]
332+
%c = add i64 %a, %b
333+
ret i64 %c
334+
}
335+
336+
define i64 @v_add_i64(i64 %a, i64 %b) {
145337
; GFX7-LABEL: s_add_i64:
146338
; GFX7: ; %bb.0:
147339
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add-sub.ll

Lines changed: 0 additions & 86 deletions
This file was deleted.

0 commit comments

Comments
 (0)