@@ -8,6 +8,234 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
88
99target triple = "aarch64-unknown-linux-gnu"
1010
11+ define void @test_copysign_f16 (ptr %ap , ptr %bp ) {
12+ ; SVE-LABEL: test_copysign_f16:
13+ ; SVE: // %bb.0:
14+ ; SVE-NEXT: adrp x8, .LCPI0_0
15+ ; SVE-NEXT: ldr h1, [x0]
16+ ; SVE-NEXT: ldr h2, [x1]
17+ ; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
18+ ; SVE-NEXT: adrp x8, .LCPI0_1
19+ ; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]
20+ ; SVE-NEXT: mov z3.d, z0.d
21+ ; SVE-NEXT: fmov s0, s1
22+ ; SVE-NEXT: fmov s3, s2
23+ ; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
24+ ; SVE-NEXT: str h0, [x0]
25+ ; SVE-NEXT: ret
26+ ;
27+ ; SVE2-LABEL: test_copysign_f16:
28+ ; SVE2: // %bb.0:
29+ ; SVE2-NEXT: adrp x8, .LCPI0_0
30+ ; SVE2-NEXT: ldr h1, [x0]
31+ ; SVE2-NEXT: ldr h2, [x1]
32+ ; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
33+ ; SVE2-NEXT: adrp x8, .LCPI0_1
34+ ; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]
35+ ; SVE2-NEXT: mov z3.d, z0.d
36+ ; SVE2-NEXT: fmov s0, s1
37+ ; SVE2-NEXT: fmov s3, s2
38+ ; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
39+ ; SVE2-NEXT: str h0, [x0]
40+ ; SVE2-NEXT: ret
41+ ;
42+ ; NONEON-NOSVE-LABEL: test_copysign_f16:
43+ ; NONEON-NOSVE: // %bb.0:
44+ ; NONEON-NOSVE-NEXT: sub sp, sp, #16
45+ ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
46+ ; NONEON-NOSVE-NEXT: ldr h0, [x0]
47+ ; NONEON-NOSVE-NEXT: ldr h1, [x1]
48+ ; NONEON-NOSVE-NEXT: fcvt s0, h0
49+ ; NONEON-NOSVE-NEXT: str h1, [sp, #12]
50+ ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
51+ ; NONEON-NOSVE-NEXT: tst w8, #0x80
52+ ; NONEON-NOSVE-NEXT: fabs s0, s0
53+ ; NONEON-NOSVE-NEXT: fneg s1, s0
54+ ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
55+ ; NONEON-NOSVE-NEXT: fcvt h0, s0
56+ ; NONEON-NOSVE-NEXT: str h0, [x0]
57+ ; NONEON-NOSVE-NEXT: add sp, sp, #16
58+ ; NONEON-NOSVE-NEXT: ret
59+ %a = load half , ptr %ap
60+ %b = load half , ptr %bp
61+ %r = call half @llvm.copysign.f16 (half %a , half %b )
62+ store half %r , ptr %ap
63+ ret void
64+ }
65+
66+ define void @test_copysign_bf16 (ptr %ap , ptr %bp ) {
67+ ; SVE-LABEL: test_copysign_bf16:
68+ ; SVE: // %bb.0:
69+ ; SVE-NEXT: adrp x8, .LCPI1_0
70+ ; SVE-NEXT: ldr h1, [x0]
71+ ; SVE-NEXT: ldr h2, [x1]
72+ ; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
73+ ; SVE-NEXT: adrp x8, .LCPI1_1
74+ ; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
75+ ; SVE-NEXT: mov z3.d, z0.d
76+ ; SVE-NEXT: fmov s0, s1
77+ ; SVE-NEXT: fmov s3, s2
78+ ; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
79+ ; SVE-NEXT: str h0, [x0]
80+ ; SVE-NEXT: ret
81+ ;
82+ ; SVE2-LABEL: test_copysign_bf16:
83+ ; SVE2: // %bb.0:
84+ ; SVE2-NEXT: adrp x8, .LCPI1_0
85+ ; SVE2-NEXT: ldr h1, [x0]
86+ ; SVE2-NEXT: ldr h2, [x1]
87+ ; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
88+ ; SVE2-NEXT: adrp x8, .LCPI1_1
89+ ; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
90+ ; SVE2-NEXT: mov z3.d, z0.d
91+ ; SVE2-NEXT: fmov s0, s1
92+ ; SVE2-NEXT: fmov s3, s2
93+ ; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
94+ ; SVE2-NEXT: str h0, [x0]
95+ ; SVE2-NEXT: ret
96+ ;
97+ ; NONEON-NOSVE-LABEL: test_copysign_bf16:
98+ ; NONEON-NOSVE: // %bb.0:
99+ ; NONEON-NOSVE-NEXT: sub sp, sp, #80
100+ ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
101+ ; NONEON-NOSVE-NEXT: ldr h0, [x0]
102+ ; NONEON-NOSVE-NEXT: ldr h1, [x1]
103+ ; NONEON-NOSVE-NEXT: str h0, [sp, #40]
104+ ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
105+ ; NONEON-NOSVE-NEXT: str h1, [sp, #76]
106+ ; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
107+ ; NONEON-NOSVE-NEXT: str q0, [sp]
108+ ; NONEON-NOSVE-NEXT: ldr w8, [sp, #12]
109+ ; NONEON-NOSVE-NEXT: lsl w9, w8, #16
110+ ; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
111+ ; NONEON-NOSVE-NEXT: lsl w8, w8, #16
112+ ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
113+ ; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
114+ ; NONEON-NOSVE-NEXT: lsl w9, w8, #16
115+ ; NONEON-NOSVE-NEXT: ldr w8, [sp]
116+ ; NONEON-NOSVE-NEXT: lsl w8, w8, #16
117+ ; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
118+ ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #77]
119+ ; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
120+ ; NONEON-NOSVE-NEXT: tst w8, #0x80
121+ ; NONEON-NOSVE-NEXT: str q0, [sp, #48]
122+ ; NONEON-NOSVE-NEXT: ldr s0, [sp, #48]
123+ ; NONEON-NOSVE-NEXT: fabs s0, s0
124+ ; NONEON-NOSVE-NEXT: fneg s1, s0
125+ ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
126+ ; NONEON-NOSVE-NEXT: fmov w8, s0
127+ ; NONEON-NOSVE-NEXT: lsr w8, w8, #16
128+ ; NONEON-NOSVE-NEXT: fmov s0, w8
129+ ; NONEON-NOSVE-NEXT: str h0, [x0]
130+ ; NONEON-NOSVE-NEXT: add sp, sp, #80
131+ ; NONEON-NOSVE-NEXT: ret
132+ %a = load bfloat, ptr %ap
133+ %b = load bfloat, ptr %bp
134+ %r = call bfloat @llvm.copysign.bf16 (bfloat %a , bfloat %b )
135+ store bfloat %r , ptr %ap
136+ ret void
137+ }
138+
139+ define void @test_copysign_f32 (ptr %ap , ptr %bp ) {
140+ ; SVE-LABEL: test_copysign_f32:
141+ ; SVE: // %bb.0:
142+ ; SVE-NEXT: adrp x8, .LCPI2_0
143+ ; SVE-NEXT: ldr s1, [x0]
144+ ; SVE-NEXT: ldr s2, [x1]
145+ ; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
146+ ; SVE-NEXT: adrp x8, .LCPI2_1
147+ ; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]
148+ ; SVE-NEXT: mov z3.d, z0.d
149+ ; SVE-NEXT: fmov s0, s1
150+ ; SVE-NEXT: fmov s3, s2
151+ ; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
152+ ; SVE-NEXT: str s0, [x0]
153+ ; SVE-NEXT: ret
154+ ;
155+ ; SVE2-LABEL: test_copysign_f32:
156+ ; SVE2: // %bb.0:
157+ ; SVE2-NEXT: adrp x8, .LCPI2_0
158+ ; SVE2-NEXT: ldr s1, [x0]
159+ ; SVE2-NEXT: ldr s2, [x1]
160+ ; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
161+ ; SVE2-NEXT: adrp x8, .LCPI2_1
162+ ; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]
163+ ; SVE2-NEXT: mov z3.d, z0.d
164+ ; SVE2-NEXT: fmov s0, s1
165+ ; SVE2-NEXT: fmov s3, s2
166+ ; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
167+ ; SVE2-NEXT: str s0, [x0]
168+ ; SVE2-NEXT: ret
169+ ;
170+ ; NONEON-NOSVE-LABEL: test_copysign_f32:
171+ ; NONEON-NOSVE: // %bb.0:
172+ ; NONEON-NOSVE-NEXT: ldr s0, [x0]
173+ ; NONEON-NOSVE-NEXT: ldr w8, [x1]
174+ ; NONEON-NOSVE-NEXT: fabs s0, s0
175+ ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
176+ ; NONEON-NOSVE-NEXT: fneg s1, s0
177+ ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
178+ ; NONEON-NOSVE-NEXT: str s0, [x0]
179+ ; NONEON-NOSVE-NEXT: ret
180+ %a = load float , ptr %ap
181+ %b = load float , ptr %bp
182+ %r = call float @llvm.copysign.f32 (float %a , float %b )
183+ store float %r , ptr %ap
184+ ret void
185+ }
186+
187+ define void @test_copysign_f64 (ptr %ap , ptr %bp ) {
188+ ; SVE-LABEL: test_copysign_f64:
189+ ; SVE: // %bb.0:
190+ ; SVE-NEXT: adrp x8, .LCPI3_1
191+ ; SVE-NEXT: ptrue p0.d, vl2
192+ ; SVE-NEXT: ldr d2, [x0]
193+ ; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
194+ ; SVE-NEXT: adrp x8, .LCPI3_0
195+ ; SVE-NEXT: ldr d3, [x1]
196+ ; SVE-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
197+ ; SVE-NEXT: fneg z0.d, p0/m, z0.d
198+ ; SVE-NEXT: mov z4.d, z1.d
199+ ; SVE-NEXT: fmov d1, d2
200+ ; SVE-NEXT: fmov d4, d3
201+ ; SVE-NEXT: bsl v0.16b, v1.16b, v4.16b
202+ ; SVE-NEXT: str d0, [x0]
203+ ; SVE-NEXT: ret
204+ ;
205+ ; SVE2-LABEL: test_copysign_f64:
206+ ; SVE2: // %bb.0:
207+ ; SVE2-NEXT: adrp x8, .LCPI3_1
208+ ; SVE2-NEXT: ptrue p0.d, vl2
209+ ; SVE2-NEXT: ldr d2, [x0]
210+ ; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
211+ ; SVE2-NEXT: adrp x8, .LCPI3_0
212+ ; SVE2-NEXT: ldr d3, [x1]
213+ ; SVE2-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
214+ ; SVE2-NEXT: fneg z0.d, p0/m, z0.d
215+ ; SVE2-NEXT: mov z4.d, z1.d
216+ ; SVE2-NEXT: fmov d1, d2
217+ ; SVE2-NEXT: fmov d4, d3
218+ ; SVE2-NEXT: bsl v0.16b, v1.16b, v4.16b
219+ ; SVE2-NEXT: str d0, [x0]
220+ ; SVE2-NEXT: ret
221+ ;
222+ ; NONEON-NOSVE-LABEL: test_copysign_f64:
223+ ; NONEON-NOSVE: // %bb.0:
224+ ; NONEON-NOSVE-NEXT: ldr d0, [x0]
225+ ; NONEON-NOSVE-NEXT: ldr x8, [x1]
226+ ; NONEON-NOSVE-NEXT: fabs d0, d0
227+ ; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000
228+ ; NONEON-NOSVE-NEXT: fneg d1, d0
229+ ; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne
230+ ; NONEON-NOSVE-NEXT: str d0, [x0]
231+ ; NONEON-NOSVE-NEXT: ret
232+ %a = load double , ptr %ap
233+ %b = load double , ptr %bp
234+ %r = call double @llvm.copysign.f64 (double %a , double %b )
235+ store double %r , ptr %ap
236+ ret void
237+ }
238+
11239;============ f16
12240
13241define void @test_copysign_v4f16_v4f16 (ptr %ap , ptr %bp ) {
0 commit comments