Skip to content

Commit 54053cf

Browse files
committed
AMDGPU: Add baseline tests for copysign with known signmask input (llvm#167265)
1 parent 726c049 commit 54053cf

File tree

2 files changed

+316
-0
lines changed

2 files changed

+316
-0
lines changed

llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,4 +370,112 @@ define float @test_copysign_pow_fast_f32__integral_y(float %x, i32 %y.i) {
370370
ret float %pow_sign1
371371
}
372372

373+
define double @test_pow_fast_f64integral_y(double %x, i32 %y.i) #0 {
374+
; GFX9-LABEL: test_pow_fast_f64integral_y:
375+
; GFX9: ; %bb.0:
376+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377+
; GFX9-NEXT: s_mov_b32 s16, s33
378+
; GFX9-NEXT: s_mov_b32 s33, s32
379+
; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
380+
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
381+
; GFX9-NEXT: s_mov_b64 exec, s[18:19]
382+
; GFX9-NEXT: v_writelane_b32 v43, s16, 15
383+
; GFX9-NEXT: v_writelane_b32 v43, s30, 0
384+
; GFX9-NEXT: v_writelane_b32 v43, s31, 1
385+
; GFX9-NEXT: v_writelane_b32 v43, s34, 2
386+
; GFX9-NEXT: v_writelane_b32 v43, s35, 3
387+
; GFX9-NEXT: v_writelane_b32 v43, s36, 4
388+
; GFX9-NEXT: v_writelane_b32 v43, s37, 5
389+
; GFX9-NEXT: v_writelane_b32 v43, s38, 6
390+
; GFX9-NEXT: v_writelane_b32 v43, s39, 7
391+
; GFX9-NEXT: v_writelane_b32 v43, s48, 8
392+
; GFX9-NEXT: v_writelane_b32 v43, s49, 9
393+
; GFX9-NEXT: v_writelane_b32 v43, s50, 10
394+
; GFX9-NEXT: v_writelane_b32 v43, s51, 11
395+
; GFX9-NEXT: s_addk_i32 s32, 0x800
396+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
397+
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
398+
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
399+
; GFX9-NEXT: v_writelane_b32 v43, s52, 12
400+
; GFX9-NEXT: v_mov_b32_e32 v42, v1
401+
; GFX9-NEXT: v_writelane_b32 v43, s53, 13
402+
; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42
403+
; GFX9-NEXT: s_getpc_b64 s[16:17]
404+
; GFX9-NEXT: s_add_u32 s16, s16, _Z4log2d@rel32@lo+4
405+
; GFX9-NEXT: s_addc_u32 s17, s17, _Z4log2d@rel32@hi+12
406+
; GFX9-NEXT: v_writelane_b32 v43, s54, 14
407+
; GFX9-NEXT: v_mov_b32_e32 v40, v31
408+
; GFX9-NEXT: v_mov_b32_e32 v41, v2
409+
; GFX9-NEXT: s_mov_b32 s50, s15
410+
; GFX9-NEXT: s_mov_b32 s51, s14
411+
; GFX9-NEXT: s_mov_b32 s52, s13
412+
; GFX9-NEXT: s_mov_b32 s53, s12
413+
; GFX9-NEXT: s_mov_b64 s[34:35], s[10:11]
414+
; GFX9-NEXT: s_mov_b64 s[36:37], s[8:9]
415+
; GFX9-NEXT: s_mov_b64 s[38:39], s[6:7]
416+
; GFX9-NEXT: s_mov_b64 s[48:49], s[4:5]
417+
; GFX9-NEXT: s_brev_b32 s54, -2
418+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
419+
; GFX9-NEXT: v_cvt_f64_i32_e32 v[2:3], v41
420+
; GFX9-NEXT: s_getpc_b64 s[16:17]
421+
; GFX9-NEXT: s_add_u32 s16, s16, _Z4exp2d@rel32@lo+4
422+
; GFX9-NEXT: s_addc_u32 s17, s17, _Z4exp2d@rel32@hi+12
423+
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
424+
; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
425+
; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
426+
; GFX9-NEXT: s_mov_b64 s[8:9], s[36:37]
427+
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
428+
; GFX9-NEXT: s_mov_b32 s12, s53
429+
; GFX9-NEXT: s_mov_b32 s13, s52
430+
; GFX9-NEXT: s_mov_b32 s14, s51
431+
; GFX9-NEXT: s_mov_b32 s15, s50
432+
; GFX9-NEXT: v_mov_b32_e32 v31, v40
433+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
434+
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 31, v41
435+
; GFX9-NEXT: v_and_b32_e32 v2, v2, v42
436+
; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
437+
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
438+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
439+
; GFX9-NEXT: v_bfi_b32 v1, s54, v1, v2
440+
; GFX9-NEXT: v_readlane_b32 s54, v43, 14
441+
; GFX9-NEXT: v_readlane_b32 s53, v43, 13
442+
; GFX9-NEXT: v_readlane_b32 s52, v43, 12
443+
; GFX9-NEXT: v_readlane_b32 s51, v43, 11
444+
; GFX9-NEXT: v_readlane_b32 s50, v43, 10
445+
; GFX9-NEXT: v_readlane_b32 s49, v43, 9
446+
; GFX9-NEXT: v_readlane_b32 s48, v43, 8
447+
; GFX9-NEXT: v_readlane_b32 s39, v43, 7
448+
; GFX9-NEXT: v_readlane_b32 s38, v43, 6
449+
; GFX9-NEXT: v_readlane_b32 s37, v43, 5
450+
; GFX9-NEXT: v_readlane_b32 s36, v43, 4
451+
; GFX9-NEXT: v_readlane_b32 s35, v43, 3
452+
; GFX9-NEXT: v_readlane_b32 s34, v43, 2
453+
; GFX9-NEXT: v_readlane_b32 s31, v43, 1
454+
; GFX9-NEXT: v_readlane_b32 s30, v43, 0
455+
; GFX9-NEXT: s_mov_b32 s32, s33
456+
; GFX9-NEXT: v_readlane_b32 s4, v43, 15
457+
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
458+
; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
459+
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
460+
; GFX9-NEXT: s_mov_b32 s33, s4
461+
; GFX9-NEXT: s_waitcnt vmcnt(0)
462+
; GFX9-NEXT: s_setpc_b64 s[30:31]
463+
%fabs = call fast double @llvm.fabs.f64(double %x)
464+
%log2 = call fast double @_Z4log2d(double %fabs)
465+
%pownI2F = sitofp i32 %y.i to double
466+
%ylogx = fmul fast double %log2, %pownI2F
467+
%exp2 = call fast nofpclass(nan ninf nzero nsub nnorm) double @_Z4exp2d(double %ylogx)
468+
%ytou = zext i32 %y.i to i64
469+
%yeven = shl i64 %ytou, 63
470+
%x.i64 = bitcast double %x to i64
471+
%pow_sign = and i64 %yeven, %x.i64
472+
%pow_sign.f64 = bitcast i64 %pow_sign to double
473+
%pow_sign1 = call fast double @llvm.copysign.f64(double %exp2, double %pow_sign.f64)
474+
ret double %pow_sign1
475+
}
476+
477+
declare hidden double @_Z4exp2d(double) #1
478+
declare hidden double @_Z4log2d(double) #1
479+
373480
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
481+
attributes #1 = { norecurse nounwind memory(read) }
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3+
4+
; Negative test, don't know %x is positive
5+
define half @copysign_known_signmask_f16(half %x, i16 %sign) {
6+
; GFX9-LABEL: copysign_known_signmask_f16:
7+
; GFX9: ; %bb.0:
8+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9+
; GFX9-NEXT: v_lshlrev_b16_e32 v1, 15, v1
10+
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
11+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
12+
; GFX9-NEXT: s_setpc_b64 s[30:31]
13+
%signmask = shl i16 %sign, 15
14+
%signmask.bitcast = bitcast i16 %signmask to half
15+
%result = call half @llvm.copysign.f16(half %x, half %signmask.bitcast)
16+
ret half %result
17+
}
18+
19+
; Negative test, don't know %x is positive
20+
define float @copysign_known_signmask_f32(float %x, i32 %sign) {
21+
; GFX9-LABEL: copysign_known_signmask_f32:
22+
; GFX9: ; %bb.0:
23+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
25+
; GFX9-NEXT: s_brev_b32 s4, -2
26+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
27+
; GFX9-NEXT: s_setpc_b64 s[30:31]
28+
%signmask = shl i32 %sign, 31
29+
%signmask.bitcast = bitcast i32 %signmask to float
30+
%result = call float @llvm.copysign.f32(float %x, float %signmask.bitcast)
31+
ret float %result
32+
}
33+
34+
; Negative test, don't know %x is positive
35+
define double @copysign_known_signmask_f64(double %x, i64 %sign) {
36+
; GFX9-LABEL: copysign_known_signmask_f64:
37+
; GFX9: ; %bb.0:
38+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39+
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 31, v2
40+
; GFX9-NEXT: s_brev_b32 s4, -2
41+
; GFX9-NEXT: v_bfi_b32 v1, s4, v1, v2
42+
; GFX9-NEXT: s_setpc_b64 s[30:31]
43+
%signmask = shl i64 %sign, 63
44+
%signmask.bitcast = bitcast i64 %signmask to double
45+
%result = call double @llvm.copysign.f64(double %x, double %signmask.bitcast)
46+
ret double %result
47+
}
48+
49+
; Negative test, don't know %x is positive
50+
define float @copysign_known_signmask_f32_known_not_known_positive_mag_maybe_nan(float nofpclass(ninf nzero nsub nnorm) %sign.bit.known.zero, i32 %sign) {
51+
; GFX9-LABEL: copysign_known_signmask_f32_known_not_known_positive_mag_maybe_nan:
52+
; GFX9: ; %bb.0:
53+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
55+
; GFX9-NEXT: s_brev_b32 s4, -2
56+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
57+
; GFX9-NEXT: s_setpc_b64 s[30:31]
58+
%signmask = shl i32 %sign, 31
59+
%signmask.bitcast = bitcast i32 %signmask to float
60+
%result = call float @llvm.copysign.f32(float %sign.bit.known.zero, float %signmask.bitcast)
61+
ret float %result
62+
}
63+
64+
; Negative test, don't know %x is positive
65+
define float @copysign_known_signmask_f32_known_not_known_positive_mag_maybe_negzero(float nofpclass(nan ninf nsub nnorm) %sign.bit.known.zero, i32 %sign) {
66+
; GFX9-LABEL: copysign_known_signmask_f32_known_not_known_positive_mag_maybe_negzero:
67+
; GFX9: ; %bb.0:
68+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
70+
; GFX9-NEXT: s_brev_b32 s4, -2
71+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
72+
; GFX9-NEXT: s_setpc_b64 s[30:31]
73+
%signmask = shl i32 %sign, 31
74+
%signmask.bitcast = bitcast i32 %signmask to float
75+
%result = call float @llvm.copysign.f32(float %sign.bit.known.zero, float %signmask.bitcast)
76+
ret float %result
77+
}
78+
79+
define half @copysign_known_signmask_f16_known_positive_mag(half nofpclass(nan ninf nzero nsub nnorm) %sign.bit.known.zero, i16 %sign) {
80+
; GFX9-LABEL: copysign_known_signmask_f16_known_positive_mag:
81+
; GFX9: ; %bb.0:
82+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83+
; GFX9-NEXT: v_lshlrev_b16_e32 v1, 15, v1
84+
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
85+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
86+
; GFX9-NEXT: s_setpc_b64 s[30:31]
87+
%signmask = shl i16 %sign, 15
88+
%signmask.bitcast = bitcast i16 %signmask to half
89+
%result = call half @llvm.copysign.f16(half %sign.bit.known.zero, half %signmask.bitcast)
90+
ret half %result
91+
}
92+
93+
define float @copysign_known_signmask_f32_known_positive_mag(float nofpclass(nan ninf nzero nsub nnorm) %sign.bit.known.zero, i32 %sign) {
94+
; GFX9-LABEL: copysign_known_signmask_f32_known_positive_mag:
95+
; GFX9: ; %bb.0:
96+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
98+
; GFX9-NEXT: s_brev_b32 s4, -2
99+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
100+
; GFX9-NEXT: s_setpc_b64 s[30:31]
101+
%signmask = shl i32 %sign, 31
102+
%signmask.bitcast = bitcast i32 %signmask to float
103+
%result = call float @llvm.copysign.f32(float %sign.bit.known.zero, float %signmask.bitcast)
104+
ret float %result
105+
}
106+
107+
define double @copysign_known_signmask_f64_known_positive_mag(double nofpclass(nan ninf nzero nsub nnorm) %sign.bit.known.zero, i64 %sign) {
108+
; GFX9-LABEL: copysign_known_signmask_f64_known_positive_mag:
109+
; GFX9: ; %bb.0:
110+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111+
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 31, v2
112+
; GFX9-NEXT: s_brev_b32 s4, -2
113+
; GFX9-NEXT: v_bfi_b32 v1, s4, v1, v2
114+
; GFX9-NEXT: s_setpc_b64 s[30:31]
115+
%signmask = shl i64 %sign, 63
116+
%signmask.bitcast = bitcast i64 %signmask to double
117+
%result = call double @llvm.copysign.f64(double %sign.bit.known.zero, double %signmask.bitcast)
118+
ret double %result
119+
}
120+
121+
; exp always returns a positive result, excluding the unknown nan sign
122+
; bit.
123+
define float @copysign_known_signmask_f32_known_positive_mag__nnan_exp(float %x, i32 %sign) {
124+
; GFX9-LABEL: copysign_known_signmask_f32_known_positive_mag__nnan_exp:
125+
; GFX9: ; %bb.0:
126+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127+
; GFX9-NEXT: s_mov_b32 s4, 0xc2aeac50
128+
; GFX9-NEXT: v_add_f32_e32 v2, 0x42800000, v0
129+
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
130+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
131+
; GFX9-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0
132+
; GFX9-NEXT: v_exp_f32_e32 v0, v0
133+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
134+
; GFX9-NEXT: s_brev_b32 s4, -2
135+
; GFX9-NEXT: v_mul_f32_e32 v2, 0x114b4ea4, v0
136+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
137+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
138+
; GFX9-NEXT: s_setpc_b64 s[30:31]
139+
%signbit.known.zero = call nnan afn float @llvm.exp.f32(float %x)
140+
%signmask = shl i32 %sign, 31
141+
%signmask.bitcast = bitcast i32 %signmask to float
142+
%result = call float @llvm.copysign.f32(float %signbit.known.zero, float %signmask.bitcast)
143+
ret float %result
144+
}
145+
146+
define float @copysign_known_signmask_f32_known_positive_mag__nnan_exp2(float %x, i32 %sign) {
147+
; GFX9-LABEL: copysign_known_signmask_f32_known_positive_mag__nnan_exp2:
148+
; GFX9: ; %bb.0:
149+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150+
; GFX9-NEXT: s_mov_b32 s4, 0xc2fc0000
151+
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
152+
; GFX9-NEXT: v_mov_b32_e32 v3, 0x42800000
153+
; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
154+
; GFX9-NEXT: v_add_f32_e32 v0, v0, v3
155+
; GFX9-NEXT: v_exp_f32_e32 v0, v0
156+
; GFX9-NEXT: v_not_b32_e32 v2, 63
157+
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
158+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
159+
; GFX9-NEXT: v_ldexp_f32 v0, v0, v2
160+
; GFX9-NEXT: s_brev_b32 s4, -2
161+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
162+
; GFX9-NEXT: s_setpc_b64 s[30:31]
163+
%signbit.known.zero = call nnan afn float @llvm.exp2.f32(float %x)
164+
%signmask = shl i32 %sign, 31
165+
%signmask.bitcast = bitcast i32 %signmask to float
166+
%result = call float @llvm.copysign.f32(float %signbit.known.zero, float %signmask.bitcast)
167+
ret float %result
168+
}
169+
170+
define float @copysign_known_signmask_f32_known_positive_mag__nnan_exp10(float %x, i32 %sign) {
171+
; GFX9-LABEL: copysign_known_signmask_f32_known_positive_mag__nnan_exp10:
172+
; GFX9: ; %bb.0:
173+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174+
; GFX9-NEXT: s_mov_b32 s4, 0xc2fc0000
175+
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
176+
; GFX9-NEXT: v_mov_b32_e32 v3, 0x42800000
177+
; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
178+
; GFX9-NEXT: v_add_f32_e32 v0, v0, v3
179+
; GFX9-NEXT: v_exp_f32_e32 v0, v0
180+
; GFX9-NEXT: v_not_b32_e32 v2, 63
181+
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
182+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
183+
; GFX9-NEXT: v_ldexp_f32 v0, v0, v2
184+
; GFX9-NEXT: s_brev_b32 s4, -2
185+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
186+
; GFX9-NEXT: s_setpc_b64 s[30:31]
187+
%signbit.known.zero = call nnan afn float @llvm.exp2.f32(float %x)
188+
%signmask = shl i32 %sign, 31
189+
%signmask.bitcast = bitcast i32 %signmask to float
190+
%result = call float @llvm.copysign.f32(float %signbit.known.zero, float %signmask.bitcast)
191+
ret float %result
192+
}
193+
194+
define float @copysign_known_signmask_f32_known_positive_mag_through_fence(float nofpclass(nan ninf nzero nsub nnorm) %sign.bit.known.zero, i32 %sign) {
195+
; GFX9-LABEL: copysign_known_signmask_f32_known_positive_mag_through_fence:
196+
; GFX9: ; %bb.0:
197+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
199+
; GFX9-NEXT: ;ARITH_FENCE
200+
; GFX9-NEXT: s_brev_b32 s4, -2
201+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
202+
; GFX9-NEXT: s_setpc_b64 s[30:31]
203+
%signmask = shl i32 %sign, 31
204+
%signmask.bitcast = bitcast i32 %signmask to float
205+
%fence = call float @llvm.arithmetic.fence.f32(float %sign.bit.known.zero)
206+
%result = call float @llvm.copysign.f32(float %fence, float %signmask.bitcast)
207+
ret float %result
208+
}

0 commit comments

Comments
 (0)