@@ -154,4 +154,226 @@ define <2 x double> @test_pown_reduced_fast_v2f64_known_odd(<2 x double> %x, <2
154154 ret <2 x double > %pow_sign1
155155}
156156
157+ define float @copysign_f32_f32_sign_known_p0_or_n0 (float %x , i32 %y.i ) {
158+ ; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0:
159+ ; GFX9: ; %bb.0:
160+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
162+ ; GFX9-NEXT: s_brev_b32 s4, -2
163+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
164+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
165+ %y.even = shl i32 %y.i , 31
166+ %y.even.as.f32 = bitcast i32 %y.even to float
167+ %copysign = call float @llvm.copysign.f32 (float %x , float %y.even.as.f32 )
168+ ret float %copysign
169+ }
170+
171+ define double @copysign_f64_f32_sign_known_p0_or_n0 (double %x , i32 %y.i ) {
172+ ; GFX9-LABEL: copysign_f64_f32_sign_known_p0_or_n0:
173+ ; GFX9: ; %bb.0:
174+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175+ ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 31, v2
176+ ; GFX9-NEXT: s_brev_b32 s4, -2
177+ ; GFX9-NEXT: v_bfi_b32 v1, s4, v1, v2
178+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
179+ %y.even = shl i32 %y.i , 31
180+ %y.even.as.f32 = bitcast i32 %y.even to float
181+ %y.even.as.f32.fpext = fpext float %y.even.as.f32 to double
182+ %copysign = call double @llvm.copysign.f64 (double %x , double %y.even.as.f32.fpext )
183+ ret double %copysign
184+ }
185+
186+ define half @copysign_f16_f32_sign_known_p0_or_n0 (half %x , i32 %y.i ) {
187+ ; GFX9-LABEL: copysign_f16_f32_sign_known_p0_or_n0:
188+ ; GFX9: ; %bb.0:
189+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
191+ ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
192+ ; GFX9-NEXT: s_movk_i32 s4, 0x7fff
193+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
194+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
195+ %y.even = shl i32 %y.i , 31
196+ %y.even.as.f32 = bitcast i32 %y.even to float
197+ %y.even.as.f32.fptrunc = fptrunc float %y.even.as.f32 to half
198+ %copysign = call half @llvm.copysign.f16 (half %x , half %y.even.as.f32.fptrunc )
199+ ret half %copysign
200+ }
201+
202+ define float @copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_fabs (float %x.arg , i32 %y.i ) {
203+ ; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_fabs:
204+ ; GFX9: ; %bb.0:
205+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
207+ ; GFX9-NEXT: s_brev_b32 s4, -2
208+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
209+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
210+ %x = call float @llvm.fabs.f32 (float %x.arg )
211+ %y.even = shl i32 %y.i , 31
212+ %y.even.as.f32 = bitcast i32 %y.even to float
213+ %copysign = call float @llvm.copysign.f32 (float %x , float %y.even.as.f32 )
214+ ret float %copysign
215+ }
216+
217+ define float @copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_select (float %x.arg , i32 %y.i ) {
218+ ; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_select:
219+ ; GFX9: ; %bb.0:
220+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221+ ; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
222+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
223+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
224+ ; GFX9-NEXT: s_brev_b32 s4, -2
225+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
226+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
227+ %x.ule.0 = fcmp ule float %x.arg , 0 .0
228+ %x = select i1 %x.ule.0 , float 0 .0 , float %x.arg
229+ %y.even = shl i32 %y.i , 31
230+ %y.even.as.f32 = bitcast i32 %y.even to float
231+ %copysign = call float @llvm.copysign.f32 (float %x , float %y.even.as.f32 )
232+ ret float %copysign
233+ }
234+
235+ define float @copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_nnan_nsz_sqrt (float %x.arg , i32 %y.i ) {
236+ ; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_nnan_nsz_sqrt:
237+ ; GFX9: ; %bb.0:
238+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239+ ; GFX9-NEXT: s_mov_b32 s4, 0xf800000
240+ ; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
241+ ; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
242+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
243+ ; GFX9-NEXT: v_sqrt_f32_e32 v2, v0
244+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
245+ ; GFX9-NEXT: v_add_u32_e32 v3, -1, v2
246+ ; GFX9-NEXT: v_fma_f32 v4, -v3, v2, v0
247+ ; GFX9-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4
248+ ; GFX9-NEXT: v_add_u32_e32 v4, 1, v2
249+ ; GFX9-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5]
250+ ; GFX9-NEXT: v_fma_f32 v2, -v4, v2, v0
251+ ; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2
252+ ; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5]
253+ ; GFX9-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
254+ ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
255+ ; GFX9-NEXT: v_mov_b32_e32 v3, 0x260
256+ ; GFX9-NEXT: v_cmp_class_f32_e32 vcc, v0, v3
257+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
258+ ; GFX9-NEXT: s_brev_b32 s4, -2
259+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
260+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
261+ %x = call nnan nsz float @llvm.sqrt.f32 (float %x.arg )
262+ %y.even = shl i32 %y.i , 31
263+ %y.even.as.f32 = bitcast i32 %y.even to float
264+ %copysign = call float @llvm.copysign.f32 (float %x , float %y.even.as.f32 )
265+ ret float %copysign
266+ }
267+
268+ define float @copysign_f32_f32_sign_known_p0_or_n0__mag_almost_positive_nsz_sqrt (float %x.arg , i32 %y.i ) {
269+ ; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_almost_positive_nsz_sqrt:
270+ ; GFX9: ; %bb.0:
271+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272+ ; GFX9-NEXT: s_mov_b32 s4, 0xf800000
273+ ; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
274+ ; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
275+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
276+ ; GFX9-NEXT: v_sqrt_f32_e32 v2, v0
277+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
278+ ; GFX9-NEXT: v_add_u32_e32 v3, -1, v2
279+ ; GFX9-NEXT: v_fma_f32 v4, -v3, v2, v0
280+ ; GFX9-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4
281+ ; GFX9-NEXT: v_add_u32_e32 v4, 1, v2
282+ ; GFX9-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5]
283+ ; GFX9-NEXT: v_fma_f32 v2, -v4, v2, v0
284+ ; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2
285+ ; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5]
286+ ; GFX9-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
287+ ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
288+ ; GFX9-NEXT: v_mov_b32_e32 v3, 0x260
289+ ; GFX9-NEXT: v_cmp_class_f32_e32 vcc, v0, v3
290+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
291+ ; GFX9-NEXT: s_brev_b32 s4, -2
292+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
293+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
294+ %x = call nsz float @llvm.sqrt.f32 (float %x.arg )
295+ %y.even = shl i32 %y.i , 31
296+ %y.even.as.f32 = bitcast i32 %y.even to float
297+ %copysign = call float @llvm.copysign.f32 (float %x , float %y.even.as.f32 )
298+ ret float %copysign
299+ }
300+
301+ define float @copysign_f32_f32_sign_known_p0_or_n0__mag_almost_positive_nnan_sqrt (float %x.arg , i32 %y.i ) {
302+ ; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_almost_positive_nnan_sqrt:
303+ ; GFX9: ; %bb.0:
304+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305+ ; GFX9-NEXT: s_mov_b32 s4, 0xf800000
306+ ; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
307+ ; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
308+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
309+ ; GFX9-NEXT: v_sqrt_f32_e32 v2, v0
310+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
311+ ; GFX9-NEXT: v_add_u32_e32 v3, -1, v2
312+ ; GFX9-NEXT: v_fma_f32 v4, -v3, v2, v0
313+ ; GFX9-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4
314+ ; GFX9-NEXT: v_add_u32_e32 v4, 1, v2
315+ ; GFX9-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5]
316+ ; GFX9-NEXT: v_fma_f32 v2, -v4, v2, v0
317+ ; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2
318+ ; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5]
319+ ; GFX9-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
320+ ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
321+ ; GFX9-NEXT: v_mov_b32_e32 v3, 0x260
322+ ; GFX9-NEXT: v_cmp_class_f32_e32 vcc, v0, v3
323+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
324+ ; GFX9-NEXT: s_brev_b32 s4, -2
325+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
326+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
327+ %x = call nnan float @llvm.sqrt.f32 (float %x.arg )
328+ %y.even = shl i32 %y.i , 31
329+ %y.even.as.f32 = bitcast i32 %y.even to float
330+ %copysign = call float @llvm.copysign.f32 (float %x , float %y.even.as.f32 )
331+ ret float %copysign
332+ }
333+
334+ define float @test_copysign_pow_fast_f32__integral_y (float %x , i32 %y.i ) {
335+ ; GFX9-LABEL: test_copysign_pow_fast_f32__integral_y:
336+ ; GFX9: ; %bb.0:
337+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
338+ ; GFX9-NEXT: s_mov_b32 s4, 0x800000
339+ ; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
340+ ; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000
341+ ; GFX9-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
342+ ; GFX9-NEXT: v_mul_f32_e64 v3, |v0|, v3
343+ ; GFX9-NEXT: v_log_f32_e32 v3, v3
344+ ; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1
345+ ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000
346+ ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
347+ ; GFX9-NEXT: v_sub_f32_e32 v2, v3, v2
348+ ; GFX9-NEXT: v_mul_f32_e32 v3, v2, v1
349+ ; GFX9-NEXT: s_mov_b32 s4, 0xc2fc0000
350+ ; GFX9-NEXT: v_mov_b32_e32 v4, 0x42800000
351+ ; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3
352+ ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
353+ ; GFX9-NEXT: v_fma_f32 v2, v2, v1, v3
354+ ; GFX9-NEXT: v_cvt_i32_f32_e32 v1, v1
355+ ; GFX9-NEXT: v_exp_f32_e32 v2, v2
356+ ; GFX9-NEXT: v_mov_b32_e32 v3, 0x1f800000
357+ ; GFX9-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
358+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
359+ ; GFX9-NEXT: v_mul_f32_e32 v2, v2, v3
360+ ; GFX9-NEXT: v_and_b32_e32 v0, v1, v0
361+ ; GFX9-NEXT: s_brev_b32 s4, -2
362+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v2, v0
363+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
364+ %y = sitofp i32 %y.i to float
365+ %y.fptosi = fptosi float %y to i32
366+ %fabs = call fast float @llvm.fabs.f32 (float %x )
367+ %log2 = call fast float @llvm.log2.f32 (float %fabs )
368+ %pownI2F = sitofp i32 %y.i to float
369+ %ylogx = fmul fast float %log2 , %pownI2F
370+ %exp2 = call fast float @llvm.exp2.f32 (float %ylogx )
371+ %yeven = shl i32 %y.fptosi , 31
372+ %x.i32 = bitcast float %x to i32
373+ %pow_sign = and i32 %yeven , %x.i32
374+ %pow_sign.f32 = bitcast i32 %pow_sign to float
375+ %pow_sign1 = call fast float @llvm.copysign.f32 (float %exp2 , float %pow_sign.f32 )
376+ ret float %pow_sign1
377+ }
378+
157379attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
0 commit comments