@@ -7,7 +7,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
77; GCN: ; %bb.0:
88; GCN-NEXT: v_add_max_u32 v0, v0, v1, v2
99; GCN-NEXT: ; return to shader part epilog
10- %add = add i32 %a , %b
10+ %add = call i32 @llvm.uadd.sat.i32 ( i32 %a , i32 %b )
1111 %max = call i32 @llvm.umax.i32 (i32 %add , i32 %c )
1212 %ret = bitcast i32 %max to float
1313 ret float %ret
@@ -18,39 +18,38 @@ define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
1818; GCN: ; %bb.0:
1919; GCN-NEXT: v_add_max_u32 v0, s0, v0, v1
2020; GCN-NEXT: ; return to shader part epilog
21- %add = add i32 %a , %b
21+ %add = call i32 @llvm.uadd.sat.i32 ( i32 %a , i32 %b )
2222 %max = call i32 @llvm.umax.i32 (i32 %add , i32 %c )
2323 %ret = bitcast i32 %max to float
2424 ret float %ret
2525}
2626
2727define amdgpu_ps float @add_max_u32_ssv (i32 inreg %a , i32 inreg %b , i32 %c ) {
28- ; SDAG-LABEL: add_max_u32_ssv:
29- ; SDAG: ; %bb.0:
30- ; SDAG-NEXT: v_add_max_u32 v0, s0, s1, v0
31- ; SDAG-NEXT: ; return to shader part epilog
32- ;
33- ; GISEL-LABEL: add_max_u32_ssv:
34- ; GISEL: ; %bb.0:
35- ; GISEL-NEXT: s_add_co_i32 s0, s0, s1
36- ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
37- ; GISEL-NEXT: v_max_u32_e32 v0, s0, v0
38- ; GISEL-NEXT: ; return to shader part epilog
39- %add = add i32 %a , %b
28+ ; GCN-LABEL: add_max_u32_ssv:
29+ ; GCN: ; %bb.0:
30+ ; GCN-NEXT: v_add_max_u32 v0, s0, s1, v0
31+ ; GCN-NEXT: ; return to shader part epilog
32+ %add = call i32 @llvm.uadd.sat.i32 (i32 %a , i32 %b )
4033 %max = call i32 @llvm.umax.i32 (i32 %add , i32 %c )
4134 %ret = bitcast i32 %max to float
4235 ret float %ret
4336}
4437
4538define amdgpu_ps float @add_max_u32_sss (i32 inreg %a , i32 inreg %b , i32 inreg %c ) {
46- ; GCN-LABEL: add_max_u32_sss:
47- ; GCN: ; %bb.0:
48- ; GCN-NEXT: s_add_co_i32 s0, s0, s1
49- ; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
50- ; GCN-NEXT: s_max_u32 s0, s0, s2
51- ; GCN-NEXT: v_mov_b32_e32 v0, s0
52- ; GCN-NEXT: ; return to shader part epilog
53- %add = add i32 %a , %b
39+ ; SDAG-LABEL: add_max_u32_sss:
40+ ; SDAG: ; %bb.0:
41+ ; SDAG-NEXT: v_add_nc_u32_e64 v0, s0, s1 clamp
42+ ; SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
43+ ; SDAG-NEXT: v_max_u32_e32 v0, s2, v0
44+ ; SDAG-NEXT: ; return to shader part epilog
45+ ;
46+ ; GISEL-LABEL: add_max_u32_sss:
47+ ; GISEL: ; %bb.0:
48+ ; GISEL-NEXT: v_mov_b32_e32 v0, s2
49+ ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
50+ ; GISEL-NEXT: v_add_max_u32 v0, s0, s1, v0
51+ ; GISEL-NEXT: ; return to shader part epilog
52+ %add = call i32 @llvm.uadd.sat.i32 (i32 %a , i32 %b )
5453 %max = call i32 @llvm.umax.i32 (i32 %add , i32 %c )
5554 %ret = bitcast i32 %max to float
5655 ret float %ret
@@ -61,7 +60,7 @@ define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
6160; GCN: ; %bb.0:
6261; GCN-NEXT: v_add_max_u32 v0, v0, s0, 4
6362; GCN-NEXT: ; return to shader part epilog
64- %add = add i32 %a , %b
63+ %add = call i32 @llvm.uadd.sat.i32 ( i32 %a , i32 %b )
6564 %max = call i32 @llvm.umax.i32 (i32 %add , i32 4 )
6665 %ret = bitcast i32 %max to float
6766 ret float %ret
@@ -72,26 +71,19 @@ define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
7271; GCN: ; %bb.0:
7372; GCN-NEXT: v_add_max_u32 v0, s0, v0, 0x64
7473; GCN-NEXT: ; return to shader part epilog
75- %add = add i32 %a , %b
74+ %add = call i32 @llvm.uadd.sat.i32 ( i32 %a , i32 %b )
7675 %max = call i32 @llvm.umax.i32 (i32 %add , i32 100 )
7776 %ret = bitcast i32 %max to float
7877 ret float %ret
7978}
8079
81- define amdgpu_ps float @add_max_u32_slv (i32 inreg %a , i32 %b ) {
82- ; SDAG-LABEL: add_max_u32_slv:
83- ; SDAG: ; %bb.0:
84- ; SDAG-NEXT: v_add_max_u32 v0, 0x64, s0, v0
85- ; SDAG-NEXT: ; return to shader part epilog
86- ;
87- ; GISEL-LABEL: add_max_u32_slv:
88- ; GISEL: ; %bb.0:
89- ; GISEL-NEXT: s_addk_co_i32 s0, 0x64
90- ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
91- ; GISEL-NEXT: v_max_u32_e32 v0, s0, v0
92- ; GISEL-NEXT: ; return to shader part epilog
93- %add = add i32 %a , 100
94- %max = call i32 @llvm.umax.i32 (i32 %add , i32 %b )
80+ define amdgpu_ps float @add_max_u32_slv (i32 inreg %a , i32 %b , i32 %c ) {
81+ ; GCN-LABEL: add_max_u32_slv:
82+ ; GCN: ; %bb.0:
83+ ; GCN-NEXT: v_add_max_u32 v0, s0, v0, v1
84+ ; GCN-NEXT: ; return to shader part epilog
85+ %add = call i32 @llvm.uadd.sat.i32 (i32 %a , i32 %b )
86+ %max = call i32 @llvm.umax.i32 (i32 %add , i32 %c )
9587 %ret = bitcast i32 %max to float
9688 ret float %ret
9789}
@@ -101,7 +93,7 @@ define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
10193; GCN: ; %bb.0:
10294; GCN-NEXT: v_add_max_i32 v0, v0, v1, v2
10395; GCN-NEXT: ; return to shader part epilog
104- %add = add i32 %a , %b
96+ %add = call i32 @llvm.sadd.sat.i32 ( i32 %a , i32 %b )
10597 %max = call i32 @llvm.smax.i32 (i32 %add , i32 %c )
10698 %ret = bitcast i32 %max to float
10799 ret float %ret
@@ -112,7 +104,7 @@ define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
112104; GCN: ; %bb.0:
113105; GCN-NEXT: v_add_min_u32 v0, v0, v1, v2
114106; GCN-NEXT: ; return to shader part epilog
115- %add = add i32 %a , %b
107+ %add = call i32 @llvm.uadd.sat.i32 ( i32 %a , i32 %b )
116108 %max = call i32 @llvm.umin.i32 (i32 %add , i32 %c )
117109 %ret = bitcast i32 %max to float
118110 ret float %ret
@@ -123,7 +115,7 @@ define amdgpu_ps float @add_min_i32_vvv(i32 %a, i32 %b, i32 %c) {
123115; GCN: ; %bb.0:
124116; GCN-NEXT: v_add_min_i32 v0, v0, v1, v2
125117; GCN-NEXT: ; return to shader part epilog
126- %add = add i32 %a , %b
118+ %add = call i32 @llvm.sadd.sat.i32 ( i32 %a , i32 %b )
127119 %max = call i32 @llvm.smin.i32 (i32 %add , i32 %c )
128120 %ret = bitcast i32 %max to float
129121 ret float %ret
@@ -134,7 +126,7 @@ define amdgpu_ps float @add_max_v2u16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16>
134126; GCN: ; %bb.0:
135127; GCN-NEXT: v_pk_add_max_u16 v0, v0, v1, v2
136128; GCN-NEXT: ; return to shader part epilog
137- %add = add <2 x i16 > %a , %b
129+ %add = call <2 x i16 > @llvm.uadd.sat.i32 (< 2 x i16 > %a , < 2 x i16 > %b )
138130 %max = call <2 x i16 > @llvm.umax.v216 (<2 x i16 > %add , <2 x i16 > %c )
139131 %ret = bitcast <2 x i16 > %max to float
140132 ret float %ret
@@ -145,29 +137,18 @@ define amdgpu_ps float @add_max_v2u16_svv(<2 x i16> inreg %a, <2 x i16> %b, <2 x
145137; GCN: ; %bb.0:
146138; GCN-NEXT: v_pk_add_max_u16 v0, s0, v0, v1
147139; GCN-NEXT: ; return to shader part epilog
148- %add = add <2 x i16 > %a , %b
140+ %add = call <2 x i16 > @llvm.uadd.sat.i32 (< 2 x i16 > %a , < 2 x i16 > %b )
149141 %max = call <2 x i16 > @llvm.umax.v216 (<2 x i16 > %add , <2 x i16 > %c )
150142 %ret = bitcast <2 x i16 > %max to float
151143 ret float %ret
152144}
153145
154146define amdgpu_ps float @add_max_v2u16_ssv (<2 x i16 > inreg %a , <2 x i16 > inreg %b , <2 x i16 > %c ) {
155- ; SDAG-LABEL: add_max_v2u16_ssv:
156- ; SDAG: ; %bb.0:
157- ; SDAG-NEXT: v_pk_add_max_u16 v0, s0, s1, v0
158- ; SDAG-NEXT: ; return to shader part epilog
159- ;
160- ; GISEL-LABEL: add_max_v2u16_ssv:
161- ; GISEL: ; %bb.0:
162- ; GISEL-NEXT: s_lshr_b32 s2, s0, 16
163- ; GISEL-NEXT: s_lshr_b32 s3, s1, 16
164- ; GISEL-NEXT: s_add_co_i32 s0, s0, s1
165- ; GISEL-NEXT: s_add_co_i32 s2, s2, s3
166- ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
167- ; GISEL-NEXT: s_pack_ll_b32_b16 s0, s0, s2
168- ; GISEL-NEXT: v_pk_max_u16 v0, s0, v0
169- ; GISEL-NEXT: ; return to shader part epilog
170- %add = add <2 x i16 > %a , %b
147+ ; GCN-LABEL: add_max_v2u16_ssv:
148+ ; GCN: ; %bb.0:
149+ ; GCN-NEXT: v_pk_add_max_u16 v0, s0, s1, v0
150+ ; GCN-NEXT: ; return to shader part epilog
151+ %add = call <2 x i16 > @llvm.uadd.sat.i32 (<2 x i16 > %a , <2 x i16 > %b )
171152 %max = call <2 x i16 > @llvm.umax.v216 (<2 x i16 > %add , <2 x i16 > %c )
172153 %ret = bitcast <2 x i16 > %max to float
173154 ret float %ret
@@ -176,30 +157,18 @@ define amdgpu_ps float @add_max_v2u16_ssv(<2 x i16> inreg %a, <2 x i16> inreg %b
176157define amdgpu_ps float @add_max_v2u16_sss (<2 x i16 > inreg %a , <2 x i16 > inreg %b , <2 x i16 > inreg %c ) {
177158; SDAG-LABEL: add_max_v2u16_sss:
178159; SDAG: ; %bb.0:
179- ; SDAG-NEXT: v_pk_add_u16 v0, s0, s1
160+ ; SDAG-NEXT: v_pk_add_u16 v0, s0, s1 clamp
180161; SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
181162; SDAG-NEXT: v_pk_max_u16 v0, v0, s2
182163; SDAG-NEXT: ; return to shader part epilog
183164;
184165; GISEL-LABEL: add_max_v2u16_sss:
185166; GISEL: ; %bb.0:
186- ; GISEL-NEXT: s_lshr_b32 s3, s0, 16
187- ; GISEL-NEXT: s_lshr_b32 s4, s1, 16
188- ; GISEL-NEXT: s_add_co_i32 s0, s0, s1
189- ; GISEL-NEXT: s_add_co_i32 s3, s3, s4
190- ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
191- ; GISEL-NEXT: s_pack_ll_b32_b16 s0, s0, s3
192- ; GISEL-NEXT: s_and_b32 s3, s2, 0xffff
193- ; GISEL-NEXT: s_lshr_b32 s1, s0, 16
194- ; GISEL-NEXT: s_and_b32 s0, s0, 0xffff
195- ; GISEL-NEXT: s_lshr_b32 s2, s2, 16
196- ; GISEL-NEXT: s_max_u32 s0, s0, s3
197- ; GISEL-NEXT: s_max_u32 s1, s1, s2
198- ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
199- ; GISEL-NEXT: s_pack_ll_b32_b16 s0, s0, s1
200- ; GISEL-NEXT: v_mov_b32_e32 v0, s0
167+ ; GISEL-NEXT: v_mov_b32_e32 v0, s2
168+ ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
169+ ; GISEL-NEXT: v_pk_add_max_u16 v0, s0, s1, v0
201170; GISEL-NEXT: ; return to shader part epilog
202- %add = add <2 x i16 > %a , %b
171+ %add = call <2 x i16 > @llvm.uadd.sat.i32 (< 2 x i16 > %a , < 2 x i16 > %b )
203172 %max = call <2 x i16 > @llvm.umax.v216 (<2 x i16 > %add , <2 x i16 > %c )
204173 %ret = bitcast <2 x i16 > %max to float
205174 ret float %ret
@@ -210,7 +179,7 @@ define amdgpu_ps float @add_max_v2u16_vsi(<2 x i16> %a, <2 x i16> inreg %b) {
210179; GCN: ; %bb.0:
211180; GCN-NEXT: v_pk_add_max_u16 v0, v0, s0, 4
212181; GCN-NEXT: ; return to shader part epilog
213- %add = add <2 x i16 > %a , %b
182+ %add = call <2 x i16 > @llvm.uadd.sat.i32 (< 2 x i16 > %a , < 2 x i16 > %b )
214183 %max = call <2 x i16 > @llvm.umax.v216 (<2 x i16 > %add , <2 x i16 > <i16 4 , i16 0 >)
215184 %ret = bitcast <2 x i16 > %max to float
216185 ret float %ret
@@ -221,28 +190,18 @@ define amdgpu_ps float @add_max_v2u16_svl(<2 x i16> inreg %a, <2 x i16> %b) {
221190; GCN: ; %bb.0:
222191; GCN-NEXT: v_pk_add_max_u16 v0, s0, v0, 0x650064
223192; GCN-NEXT: ; return to shader part epilog
224- %add = add <2 x i16 > %a , %b
193+ %add = call <2 x i16 > @llvm.uadd.sat.i32 (< 2 x i16 > %a , < 2 x i16 > %b )
225194 %max = call <2 x i16 > @llvm.umax.v216 (<2 x i16 > %add , <2 x i16 > <i16 100 , i16 101 >)
226195 %ret = bitcast <2 x i16 > %max to float
227196 ret float %ret
228197}
229198
230199define amdgpu_ps float @add_max_v2u16_slv (<2 x i16 > inreg %a , <2 x i16 > %b ) {
231- ; SDAG-LABEL: add_max_v2u16_slv:
232- ; SDAG: ; %bb.0:
233- ; SDAG-NEXT: v_pk_add_max_u16 v0, 0x640064, s0, v0
234- ; SDAG-NEXT: ; return to shader part epilog
235- ;
236- ; GISEL-LABEL: add_max_v2u16_slv:
237- ; GISEL: ; %bb.0:
238- ; GISEL-NEXT: s_lshr_b32 s1, s0, 16
239- ; GISEL-NEXT: s_add_co_i32 s0, s0, 0x640064
240- ; GISEL-NEXT: s_addk_co_i32 s1, 0x64
241- ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
242- ; GISEL-NEXT: s_pack_ll_b32_b16 s0, s0, s1
243- ; GISEL-NEXT: v_pk_max_u16 v0, s0, v0
244- ; GISEL-NEXT: ; return to shader part epilog
245- %add = add <2 x i16 > %a , <i16 100 , i16 100 >
200+ ; GCN-LABEL: add_max_v2u16_slv:
201+ ; GCN: ; %bb.0:
202+ ; GCN-NEXT: v_pk_add_max_u16 v0, 0x640064, s0, v0
203+ ; GCN-NEXT: ; return to shader part epilog
204+ %add = call <2 x i16 > @llvm.uadd.sat.i32 (<2 x i16 > %a , <2 x i16 > <i16 100 , i16 100 >)
246205 %max = call <2 x i16 > @llvm.umax.v216 (<2 x i16 > %add , <2 x i16 > %b )
247206 %ret = bitcast <2 x i16 > %max to float
248207 ret float %ret
@@ -253,7 +212,7 @@ define amdgpu_ps float @add_max_v2s16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16>
253212; GCN: ; %bb.0:
254213; GCN-NEXT: v_pk_add_max_i16 v0, v0, v1, v2
255214; GCN-NEXT: ; return to shader part epilog
256- %add = add <2 x i16 > %a , %b
215+ %add = call <2 x i16 > @llvm.sadd.sat.i32 (< 2 x i16 > %a , < 2 x i16 > %b )
257216 %max = call <2 x i16 > @llvm.smax.v216 (<2 x i16 > %add , <2 x i16 > %c )
258217 %ret = bitcast <2 x i16 > %max to float
259218 ret float %ret
@@ -264,7 +223,7 @@ define amdgpu_ps float @add_min_v2u16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16>
264223; GCN: ; %bb.0:
265224; GCN-NEXT: v_pk_add_min_u16 v0, v0, v1, v2
266225; GCN-NEXT: ; return to shader part epilog
267- %add = add <2 x i16 > %a , %b
226+ %add = call <2 x i16 > @llvm.uadd.sat.i32 (< 2 x i16 > %a , < 2 x i16 > %b )
268227 %max = call <2 x i16 > @llvm.umin.v216 (<2 x i16 > %add , <2 x i16 > %c )
269228 %ret = bitcast <2 x i16 > %max to float
270229 ret float %ret
@@ -275,7 +234,7 @@ define amdgpu_ps float @add_min_v2s16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16>
275234; GCN: ; %bb.0:
276235; GCN-NEXT: v_pk_add_min_i16 v0, v0, v1, v2
277236; GCN-NEXT: ; return to shader part epilog
278- %add = add <2 x i16 > %a , %b
237+ %add = call <2 x i16 > @llvm.sadd.sat.i32 (< 2 x i16 > %a , < 2 x i16 > %b )
279238 %max = call <2 x i16 > @llvm.smin.v216 (<2 x i16 > %add , <2 x i16 > %c )
280239 %ret = bitcast <2 x i16 > %max to float
281240 ret float %ret
0 commit comments