Skip to content

Commit 6376418

Browse files
authored
AMDGPU: Add baseline test for add64 with constant test (#122048)
Add baseline test for 64-bit adds when the low half of an operand is known 0.
1 parent b054289 commit 6376418

File tree

2 files changed

+426
-0
lines changed

2 files changed

+426
-0
lines changed
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3+
4+
; Reduce a 64-bit add by a constant if we know the low 32-bits are all
5+
; zero.
6+
7+
; add i64:x, K if computeTrailingZeros(K) >= 32
8+
; => build_pair (add x.hi, K.hi), x.lo
9+
10+
define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_0(i64 inreg %reg) {
11+
; GFX9-LABEL: s_add_i64_const_low_bits_known0_0:
12+
; GFX9: ; %bb.0:
13+
; GFX9-NEXT: s_add_u32 s0, s0, 0
14+
; GFX9-NEXT: s_addc_u32 s1, s1, 0x40000
15+
; GFX9-NEXT: ; return to shader part epilog
16+
%add = add i64 %reg, 1125899906842624 ; (1 << 50)
17+
ret i64 %add
18+
}
19+
20+
define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_1(i64 inreg %reg) {
21+
; GFX9-LABEL: s_add_i64_const_low_bits_known0_1:
22+
; GFX9: ; %bb.0:
23+
; GFX9-NEXT: s_add_u32 s0, s0, 0
24+
; GFX9-NEXT: s_addc_u32 s1, s1, 1
25+
; GFX9-NEXT: ; return to shader part epilog
26+
%add = add i64 %reg, 4294967296 ; (1 << 32)
27+
ret i64 %add
28+
}
29+
30+
define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_2(i64 inreg %reg) {
31+
; GFX9-LABEL: s_add_i64_const_low_bits_known0_2:
32+
; GFX9: ; %bb.0:
33+
; GFX9-NEXT: s_add_u32 s0, s0, 0
34+
; GFX9-NEXT: s_addc_u32 s1, s1, 2
35+
; GFX9-NEXT: ; return to shader part epilog
36+
%add = add i64 %reg, 8589934592 ; (1 << 33)
37+
ret i64 %add
38+
}
39+
40+
define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_3(i64 inreg %reg) {
41+
; GFX9-LABEL: s_add_i64_const_low_bits_known0_3:
42+
; GFX9: ; %bb.0:
43+
; GFX9-NEXT: s_add_u32 s0, s0, 0
44+
; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
45+
; GFX9-NEXT: ; return to shader part epilog
46+
%add = add i64 %reg, -9223372036854775808 ; (1 << 63)
47+
ret i64 %add
48+
}
49+
50+
define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_4(i64 inreg %reg) {
51+
; GFX9-LABEL: s_add_i64_const_low_bits_known0_4:
52+
; GFX9: ; %bb.0:
53+
; GFX9-NEXT: s_add_u32 s0, s0, 0
54+
; GFX9-NEXT: s_addc_u32 s1, s1, -1
55+
; GFX9-NEXT: ; return to shader part epilog
56+
%add = add i64 %reg, -4294967296 ; 0xffffffff00000000
57+
ret i64 %add
58+
}
59+
60+
define i64 @v_add_i64_const_low_bits_known0_0(i64 %reg) {
61+
; GFX9-LABEL: v_add_i64_const_low_bits_known0_0:
62+
; GFX9: ; %bb.0:
63+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64+
; GFX9-NEXT: v_mov_b32_e32 v2, 0x40000
65+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
66+
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
67+
; GFX9-NEXT: s_setpc_b64 s[30:31]
68+
%add = add i64 %reg, 1125899906842624 ; (1 << 50)
69+
ret i64 %add
70+
}
71+
72+
define i64 @v_add_i64_const_low_bits_known0_1(i64 %reg) {
73+
; GFX9-LABEL: v_add_i64_const_low_bits_known0_1:
74+
; GFX9: ; %bb.0:
75+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
77+
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
78+
; GFX9-NEXT: s_setpc_b64 s[30:31]
79+
%add = add i64 %reg, 4294967296 ; (1 << 32)
80+
ret i64 %add
81+
}
82+
83+
define i64 @v_add_i64_const_low_bits_known0_2(i64 %reg) {
84+
; GFX9-LABEL: v_add_i64_const_low_bits_known0_2:
85+
; GFX9: ; %bb.0:
86+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
88+
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
89+
; GFX9-NEXT: s_setpc_b64 s[30:31]
90+
%add = add i64 %reg, 8589934592 ; (1 << 33)
91+
ret i64 %add
92+
}
93+
94+
define i64 @v_add_i64_const_low_bits_known0_3(i64 %reg) {
95+
; GFX9-LABEL: v_add_i64_const_low_bits_known0_3:
96+
; GFX9: ; %bb.0:
97+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98+
; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
99+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
100+
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
101+
; GFX9-NEXT: s_setpc_b64 s[30:31]
102+
%add = add i64 %reg, -9223372036854775808 ; (1 << 63)
103+
ret i64 %add
104+
}
105+
106+
define i64 @v_add_i64_const_low_bits_known0_4(i64 %reg) {
107+
; GFX9-LABEL: v_add_i64_const_low_bits_known0_4:
108+
; GFX9: ; %bb.0:
109+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
111+
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
112+
; GFX9-NEXT: s_setpc_b64 s[30:31]
113+
%add = add i64 %reg, -4294967296 ; 0xffffffff00000000
114+
ret i64 %add
115+
}
116+
117+
define amdgpu_ps i64 @s_add_i64_const_high_bits_known0_0(i64 inreg %reg) {
118+
; GFX9-LABEL: s_add_i64_const_high_bits_known0_0:
119+
; GFX9: ; %bb.0:
120+
; GFX9-NEXT: s_add_u32 s0, s0, -1
121+
; GFX9-NEXT: s_addc_u32 s1, s1, 0
122+
; GFX9-NEXT: ; return to shader part epilog
123+
%add = add i64 %reg, 4294967295 ; (1 << 31)
124+
ret i64 %add
125+
}
126+
127+
define i64 @v_add_i64_const_high_bits_known0_0(i64 %reg) {
128+
; GFX9-LABEL: v_add_i64_const_high_bits_known0_0:
129+
; GFX9: ; %bb.0:
130+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0
132+
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
133+
; GFX9-NEXT: s_setpc_b64 s[30:31]
134+
%add = add i64 %reg, 4294967295 ; (1 << 31)
135+
ret i64 %add
136+
}
137+
138+
define <2 x i64> @v_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> %reg) {
139+
; GFX9-LABEL: v_add_v2i64_splat_const_low_bits_known0_0:
140+
; GFX9: ; %bb.0:
141+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
143+
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
144+
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 0, v2
145+
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 1, v3, vcc
146+
; GFX9-NEXT: s_setpc_b64 s[30:31]
147+
%add = add <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
148+
ret <2 x i64> %add
149+
}
150+
151+
define <2 x i64> @v_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> %reg) {
152+
; GFX9-LABEL: v_add_v2i64_nonsplat_const_low_bits_known0_0:
153+
; GFX9: ; %bb.0:
154+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
156+
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
157+
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 0, v2
158+
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 2, v3, vcc
159+
; GFX9-NEXT: s_setpc_b64 s[30:31]
160+
%add = add <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
161+
ret <2 x i64> %add
162+
}
163+
164+
define amdgpu_ps <2 x i64> @s_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
165+
; GFX9-LABEL: s_add_v2i64_splat_const_low_bits_known0_0:
166+
; GFX9: ; %bb.0:
167+
; GFX9-NEXT: s_add_u32 s0, s0, 0
168+
; GFX9-NEXT: s_addc_u32 s1, s1, 1
169+
; GFX9-NEXT: s_add_u32 s2, s2, 0
170+
; GFX9-NEXT: s_addc_u32 s3, s3, 1
171+
; GFX9-NEXT: ; return to shader part epilog
172+
%add = add <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
173+
ret <2 x i64> %add
174+
}
175+
176+
define amdgpu_ps <2 x i64> @s_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
177+
; GFX9-LABEL: s_add_v2i64_nonsplat_const_low_bits_known0_0:
178+
; GFX9: ; %bb.0:
179+
; GFX9-NEXT: s_add_u32 s0, s0, 0
180+
; GFX9-NEXT: s_addc_u32 s1, s1, 1
181+
; GFX9-NEXT: s_add_u32 s2, s2, 0
182+
; GFX9-NEXT: s_addc_u32 s3, s3, 2
183+
; GFX9-NEXT: ; return to shader part epilog
184+
%add = add <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
185+
ret <2 x i64> %add
186+
}
187+
188+
; We could reduce this to use a 32-bit add if we use computeKnownBits
189+
define i64 @v_add_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) {
190+
; GFX9-LABEL: v_add_i64_variable_high_bits_known0_0:
191+
; GFX9: ; %bb.0:
192+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
194+
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
195+
; GFX9-NEXT: s_setpc_b64 s[30:31]
196+
%zext.offset.hi32 = zext i32 %offset.hi32 to i64
197+
%in.high.bits = shl i64 %zext.offset.hi32, 32
198+
%add = add i64 %reg, %in.high.bits
199+
ret i64 %add
200+
}
201+
202+
; We could reduce this to use a 32-bit add if we use computeKnownBits
203+
define amdgpu_ps i64 @s_add_i64_variable_high_bits_known0_0(i64 inreg %reg, i32 inreg %offset.hi32) {
204+
; GFX9-LABEL: s_add_i64_variable_high_bits_known0_0:
205+
; GFX9: ; %bb.0:
206+
; GFX9-NEXT: s_add_u32 s0, s0, 0
207+
; GFX9-NEXT: s_addc_u32 s1, s1, s2
208+
; GFX9-NEXT: ; return to shader part epilog
209+
%zext.offset.hi32 = zext i32 %offset.hi32 to i64
210+
%in.high.bits = shl i64 %zext.offset.hi32, 32
211+
%add = add i64 %reg, %in.high.bits
212+
ret i64 %add
213+
}

0 commit comments

Comments
 (0)