@@ -11,32 +11,21 @@ target triple = "aarch64-unknown-linux-gnu"
1111define void @test_copysign_f16 (ptr %ap , ptr %bp ) {
1212; SVE-LABEL: test_copysign_f16:
1313; SVE: // %bb.0:
14- ; SVE-NEXT: adrp x8, .LCPI0_0
14+ ; SVE-NEXT: ldr h0, [x1]
1515; SVE-NEXT: ldr h1, [x0]
16- ; SVE-NEXT: ldr h2, [x1]
17- ; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
18- ; SVE-NEXT: adrp x8, .LCPI0_1
19- ; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]
20- ; SVE-NEXT: mov z3.d, z0.d
21- ; SVE-NEXT: fmov s0, s1
22- ; SVE-NEXT: fmov s3, s2
23- ; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
16+ ; SVE-NEXT: and z0.h, z0.h, #0x8000
17+ ; SVE-NEXT: and z1.h, z1.h, #0x7fff
18+ ; SVE-NEXT: orr z0.d, z1.d, z0.d
2419; SVE-NEXT: str h0, [x0]
2520; SVE-NEXT: ret
2621;
2722; SVE2-LABEL: test_copysign_f16:
2823; SVE2: // %bb.0:
29- ; SVE2-NEXT: adrp x8, .LCPI0_0
30- ; SVE2-NEXT: ldr h1, [x0]
31- ; SVE2-NEXT: ldr h2, [x1]
32- ; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
33- ; SVE2-NEXT: adrp x8, .LCPI0_1
34- ; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]
35- ; SVE2-NEXT: mov z3.d, z0.d
36- ; SVE2-NEXT: fmov s0, s1
37- ; SVE2-NEXT: fmov s3, s2
38- ; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
39- ; SVE2-NEXT: str h0, [x0]
24+ ; SVE2-NEXT: mov z0.h, #32767 // =0x7fff
25+ ; SVE2-NEXT: ldr h1, [x1]
26+ ; SVE2-NEXT: ldr h2, [x0]
27+ ; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
28+ ; SVE2-NEXT: str h2, [x0]
4029; SVE2-NEXT: ret
4130;
4231; NONEON-NOSVE-LABEL: test_copysign_f16:
@@ -66,32 +55,40 @@ define void @test_copysign_f16(ptr %ap, ptr %bp) {
6655define void @test_copysign_bf16 (ptr %ap , ptr %bp ) {
6756; SVE-LABEL: test_copysign_bf16:
6857; SVE: // %bb.0:
69- ; SVE-NEXT: adrp x8, .LCPI1_0
70- ; SVE-NEXT: ldr h1, [x0]
71- ; SVE-NEXT: ldr h2, [x1]
72- ; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
73- ; SVE-NEXT: adrp x8, .LCPI1_1
74- ; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
75- ; SVE-NEXT: mov z3.d, z0.d
76- ; SVE-NEXT: fmov s0, s1
77- ; SVE-NEXT: fmov s3, s2
78- ; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
58+ ; SVE-NEXT: sub sp, sp, #16
59+ ; SVE-NEXT: .cfi_def_cfa_offset 16
60+ ; SVE-NEXT: ldr h0, [x0]
61+ ; SVE-NEXT: ldr h1, [x1]
62+ ; SVE-NEXT: fmov w8, s0
63+ ; SVE-NEXT: str h1, [sp, #12]
64+ ; SVE-NEXT: ldrb w9, [sp, #13]
65+ ; SVE-NEXT: and w8, w8, #0x7fff
66+ ; SVE-NEXT: tst w9, #0x80
67+ ; SVE-NEXT: fmov s0, w8
68+ ; SVE-NEXT: eor w8, w8, #0x8000
69+ ; SVE-NEXT: fmov s1, w8
70+ ; SVE-NEXT: fcsel h0, h1, h0, ne
7971; SVE-NEXT: str h0, [x0]
72+ ; SVE-NEXT: add sp, sp, #16
8073; SVE-NEXT: ret
8174;
8275; SVE2-LABEL: test_copysign_bf16:
8376; SVE2: // %bb.0:
84- ; SVE2-NEXT: adrp x8, .LCPI1_0
85- ; SVE2-NEXT: ldr h1, [x0]
86- ; SVE2-NEXT: ldr h2, [x1]
87- ; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
88- ; SVE2-NEXT: adrp x8, .LCPI1_1
89- ; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
90- ; SVE2-NEXT: mov z3.d, z0.d
91- ; SVE2-NEXT: fmov s0, s1
92- ; SVE2-NEXT: fmov s3, s2
93- ; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
77+ ; SVE2-NEXT: sub sp, sp, #16
78+ ; SVE2-NEXT: .cfi_def_cfa_offset 16
79+ ; SVE2-NEXT: ldr h0, [x0]
80+ ; SVE2-NEXT: ldr h1, [x1]
81+ ; SVE2-NEXT: fmov w8, s0
82+ ; SVE2-NEXT: str h1, [sp, #12]
83+ ; SVE2-NEXT: ldrb w9, [sp, #13]
84+ ; SVE2-NEXT: and w8, w8, #0x7fff
85+ ; SVE2-NEXT: tst w9, #0x80
86+ ; SVE2-NEXT: fmov s0, w8
87+ ; SVE2-NEXT: eor w8, w8, #0x8000
88+ ; SVE2-NEXT: fmov s1, w8
89+ ; SVE2-NEXT: fcsel h0, h1, h0, ne
9490; SVE2-NEXT: str h0, [x0]
91+ ; SVE2-NEXT: add sp, sp, #16
9592; SVE2-NEXT: ret
9693;
9794; NONEON-NOSVE-LABEL: test_copysign_bf16:
@@ -139,32 +136,21 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) {
139136define void @test_copysign_f32 (ptr %ap , ptr %bp ) {
140137; SVE-LABEL: test_copysign_f32:
141138; SVE: // %bb.0:
142- ; SVE-NEXT: adrp x8, .LCPI2_0
139+ ; SVE-NEXT: ldr s0, [x1]
143140; SVE-NEXT: ldr s1, [x0]
144- ; SVE-NEXT: ldr s2, [x1]
145- ; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
146- ; SVE-NEXT: adrp x8, .LCPI2_1
147- ; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]
148- ; SVE-NEXT: mov z3.d, z0.d
149- ; SVE-NEXT: fmov s0, s1
150- ; SVE-NEXT: fmov s3, s2
151- ; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
141+ ; SVE-NEXT: and z0.s, z0.s, #0x80000000
142+ ; SVE-NEXT: and z1.s, z1.s, #0x7fffffff
143+ ; SVE-NEXT: orr z0.d, z1.d, z0.d
152144; SVE-NEXT: str s0, [x0]
153145; SVE-NEXT: ret
154146;
155147; SVE2-LABEL: test_copysign_f32:
156148; SVE2: // %bb.0:
157- ; SVE2-NEXT: adrp x8, .LCPI2_0
158- ; SVE2-NEXT: ldr s1, [x0]
159- ; SVE2-NEXT: ldr s2, [x1]
160- ; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
161- ; SVE2-NEXT: adrp x8, .LCPI2_1
162- ; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]
163- ; SVE2-NEXT: mov z3.d, z0.d
164- ; SVE2-NEXT: fmov s0, s1
165- ; SVE2-NEXT: fmov s3, s2
166- ; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
167- ; SVE2-NEXT: str s0, [x0]
149+ ; SVE2-NEXT: mov z0.s, #0x7fffffff
150+ ; SVE2-NEXT: ldr s1, [x1]
151+ ; SVE2-NEXT: ldr s2, [x0]
152+ ; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
153+ ; SVE2-NEXT: str s2, [x0]
168154; SVE2-NEXT: ret
169155;
170156; NONEON-NOSVE-LABEL: test_copysign_f32:
@@ -187,36 +173,21 @@ define void @test_copysign_f32(ptr %ap, ptr %bp) {
187173define void @test_copysign_f64 (ptr %ap , ptr %bp ) {
188174; SVE-LABEL: test_copysign_f64:
189175; SVE: // %bb.0:
190- ; SVE-NEXT: adrp x8, .LCPI3_1
191- ; SVE-NEXT: ptrue p0.d, vl2
192- ; SVE-NEXT: ldr d2, [x0]
193- ; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
194- ; SVE-NEXT: adrp x8, .LCPI3_0
195- ; SVE-NEXT: ldr d3, [x1]
196- ; SVE-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
197- ; SVE-NEXT: fneg z0.d, p0/m, z0.d
198- ; SVE-NEXT: mov z4.d, z1.d
199- ; SVE-NEXT: fmov d1, d2
200- ; SVE-NEXT: fmov d4, d3
201- ; SVE-NEXT: bsl v0.16b, v1.16b, v4.16b
176+ ; SVE-NEXT: ldr d0, [x1]
177+ ; SVE-NEXT: ldr d1, [x0]
178+ ; SVE-NEXT: and z0.d, z0.d, #0x8000000000000000
179+ ; SVE-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
180+ ; SVE-NEXT: orr z0.d, z1.d, z0.d
202181; SVE-NEXT: str d0, [x0]
203182; SVE-NEXT: ret
204183;
205184; SVE2-LABEL: test_copysign_f64:
206185; SVE2: // %bb.0:
207- ; SVE2-NEXT: adrp x8, .LCPI3_1
208- ; SVE2-NEXT: ptrue p0.d, vl2
186+ ; SVE2-NEXT: mov z0.d, #0x7fffffffffffffff
187+ ; SVE2-NEXT: ldr d1, [x1]
209188; SVE2-NEXT: ldr d2, [x0]
210- ; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
211- ; SVE2-NEXT: adrp x8, .LCPI3_0
212- ; SVE2-NEXT: ldr d3, [x1]
213- ; SVE2-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
214- ; SVE2-NEXT: fneg z0.d, p0/m, z0.d
215- ; SVE2-NEXT: mov z4.d, z1.d
216- ; SVE2-NEXT: fmov d1, d2
217- ; SVE2-NEXT: fmov d4, d3
218- ; SVE2-NEXT: bsl v0.16b, v1.16b, v4.16b
219- ; SVE2-NEXT: str d0, [x0]
189+ ; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
190+ ; SVE2-NEXT: str d2, [x0]
220191; SVE2-NEXT: ret
221192;
222193; NONEON-NOSVE-LABEL: test_copysign_f64:
0 commit comments