Skip to content

Commit 13a313f

Browse files
authored
[libclc] Move sinpi/cospi/tanpi to the CLC library (llvm#133889)
Additionally, these builtins are now vectorized. This also moves the native_recip and native_divide builtins as they are used by the tanpi builtin.
1 parent 1d9ad99 commit 13a313f

36 files changed

+797
-568
lines changed

libclc/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,11 +265,13 @@ endif()
265265
set_source_files_properties(
266266
# CLC builtins
267267
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_cos.cl
268+
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_divide.cl
268269
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_exp2.cl
269270
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_exp.cl
270271
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_log10.cl
271272
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_log2.cl
272273
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_log.cl
274+
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_recip.cl
273275
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_rsqrt.cl
274276
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_sin.cl
275277
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_sqrt.cl
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_COSPI_H__
10+
#define __CLC_MATH_CLC_COSPI_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_cospi
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_COSPI_H__
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_NATIVE_DIVIDE_H__
10+
#define __CLC_MATH_CLC_NATIVE_DIVIDE_H__
11+
12+
#define __FLOAT_ONLY
13+
#define __CLC_FUNCTION __clc_native_divide
14+
#define __CLC_BODY <clc/shared/binary_decl.inc>
15+
16+
#include <clc/math/gentype.inc>
17+
18+
#undef __CLC_BODY
19+
#undef __CLC_FUNCTION
20+
#undef __FLOAT_ONLY
21+
22+
#endif // __CLC_MATH_CLC_NATIVE_DIVIDE_H__
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_NATIVE_RECIP_H__
10+
#define __CLC_MATH_CLC_NATIVE_RECIP_H__
11+
12+
#define __FLOAT_ONLY
13+
#define __CLC_FUNCTION __clc_native_recip
14+
#define __CLC_BODY <clc/shared/unary_decl.inc>
15+
16+
#include <clc/math/gentype.inc>
17+
18+
#undef __CLC_BODY
19+
#undef __CLC_FUNCTION
20+
#undef __FLOAT_ONLY
21+
22+
#endif // __CLC_MATH_CLC_NATIVE_RECIP_H__

libclc/clc/include/clc/math/clc_sincos_helpers.inc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ _CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_sinf_piby4(__CLC_FLOATN x,
1010
__CLC_FLOATN y);
1111
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x,
1212
__CLC_FLOATN y);
13+
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
14+
__CLC_INTN regn);
1315

1416
_CLC_DECL _CLC_OVERLOAD __CLC_INTN __clc_argReductionS(private __CLC_FLOATN *r,
1517
private __CLC_FLOATN *rr,
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/math/clc_fma.h>
10+
#include <clc/math/clc_mad.h>
11+
#include <clc/math/math.h>
12+
13+
#define __CLC_BODY <clc/math/clc_sincos_piby4.inc>
14+
#include <clc/math/gentype.inc>
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_FPSIZE == 32
10+
11+
// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4]
12+
_CLC_INLINE _CLC_OVERLOAD void
13+
__clc_sincos_piby4(__CLC_GENTYPE x, private __CLC_GENTYPE *sinval,
14+
private __CLC_GENTYPE *cosval) {
15+
// Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
16+
// = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
17+
// = x * f(w)
18+
// where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
19+
// We use a minimax approximation of (f(w) - 1) / w
20+
// because this produces an expansion in even powers of x.
21+
22+
// Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
23+
// = f(w)
24+
// where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
25+
// We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
26+
// because this produces an expansion in even powers of x.
27+
28+
const __CLC_GENTYPE sc1 = -0.166666666638608441788607926e0F;
29+
const __CLC_GENTYPE sc2 = 0.833333187633086262120839299e-2F;
30+
const __CLC_GENTYPE sc3 = -0.198400874359527693921333720e-3F;
31+
const __CLC_GENTYPE sc4 = 0.272500015145584081596826911e-5F;
32+
33+
const __CLC_GENTYPE cc1 = 0.41666666664325175238031e-1F;
34+
const __CLC_GENTYPE cc2 = -0.13888887673175665567647e-2F;
35+
const __CLC_GENTYPE cc3 = 0.24800600878112441958053e-4F;
36+
const __CLC_GENTYPE cc4 = -0.27301013343179832472841e-6F;
37+
38+
__CLC_GENTYPE x2 = x * x;
39+
40+
*sinval = __clc_mad(
41+
x * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, sc4, sc3), sc2), sc1),
42+
x);
43+
*cosval = __clc_mad(
44+
x2 * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, cc4, cc3), cc2), cc1),
45+
__clc_mad(x2, -0.5f, 1.0f));
46+
}
47+
48+
#elif __CLC_FPSIZE == 64
49+
50+
_CLC_INLINE _CLC_OVERLOAD void
51+
__clc_sincos_piby4(__CLC_GENTYPE x, __CLC_GENTYPE xx,
52+
private __CLC_GENTYPE *sinval,
53+
private __CLC_GENTYPE *cosval) {
54+
// Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
55+
// = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
56+
// = x * f(w)
57+
// where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
58+
// We use a minimax approximation of (f(w) - 1) / w
59+
// because this produces an expansion in even powers of x.
60+
// If xx (the tail of x) is non-zero, we add a correction
61+
// term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx)
62+
// is an approximation to cos(x)*sin(xx) valid because
63+
// xx is tiny relative to x.
64+
65+
// Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
66+
// = f(w)
67+
// where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
68+
// We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
69+
// because this produces an expansion in even powers of x.
70+
// If xx (the tail of x) is non-zero, we subtract a correction
71+
// term g(x,xx) = x*xx to the result, where g(x,xx)
72+
// is an approximation to sin(x)*sin(xx) valid because
73+
// xx is tiny relative to x.
74+
75+
const __CLC_GENTYPE sc1 = -0.166666666666666646259241729;
76+
const __CLC_GENTYPE sc2 = 0.833333333333095043065222816e-2;
77+
const __CLC_GENTYPE sc3 = -0.19841269836761125688538679e-3;
78+
const __CLC_GENTYPE sc4 = 0.275573161037288022676895908448e-5;
79+
const __CLC_GENTYPE sc5 = -0.25051132068021699772257377197e-7;
80+
const __CLC_GENTYPE sc6 = 0.159181443044859136852668200e-9;
81+
82+
const __CLC_GENTYPE cc1 = 0.41666666666666665390037e-1;
83+
const __CLC_GENTYPE cc2 = -0.13888888888887398280412e-2;
84+
const __CLC_GENTYPE cc3 = 0.248015872987670414957399e-4;
85+
const __CLC_GENTYPE cc4 = -0.275573172723441909470836e-6;
86+
const __CLC_GENTYPE cc5 = 0.208761463822329611076335e-8;
87+
const __CLC_GENTYPE cc6 = -0.113826398067944859590880e-10;
88+
89+
__CLC_GENTYPE x2 = x * x;
90+
__CLC_GENTYPE x3 = x2 * x;
91+
__CLC_GENTYPE r = (__CLC_GENTYPE)0.5 * x2;
92+
__CLC_GENTYPE t = (__CLC_GENTYPE)1.0 - r;
93+
94+
__CLC_GENTYPE sp = __clc_fma(
95+
__clc_fma(__clc_fma(__clc_fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2);
96+
97+
__CLC_GENTYPE cp =
98+
t +
99+
__clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(cc6, x2, cc5),
100+
x2, cc4),
101+
x2, cc3),
102+
x2, cc2),
103+
x2, cc1),
104+
x2 * x2, __clc_fma(x, xx, (1.0 - t) - r));
105+
106+
*sinval =
107+
x - __clc_fma(-x3, sc1, __clc_fma(__clc_fma(-x3, sp, 0.5 * xx), x2, -xx));
108+
*cosval = cp;
109+
}
110+
111+
_CLC_INLINE _CLC_OVERLOAD void __clc_tan_piby4(__CLC_GENTYPE x,
112+
__CLC_GENTYPE xx,
113+
private __CLC_GENTYPE *leadval,
114+
private __CLC_GENTYPE *tailval) {
115+
// 0x3fe921fb54442d18
116+
const __CLC_GENTYPE piby4_lead = 7.85398163397448278999e-01;
117+
// 0x3c81a62633145c06
118+
const __CLC_GENTYPE piby4_tail = 3.06161699786838240164e-17;
119+
120+
// In order to maintain relative precision transform using the identity:
121+
// tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4.
122+
// Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4.
123+
124+
__CLC_LONGN ca = x > 0.68;
125+
__CLC_LONGN cb = x < -0.68;
126+
__CLC_GENTYPE transform = ca ? 1.0 : 0.0;
127+
transform = cb ? -1.0 : transform;
128+
129+
__CLC_GENTYPE tx = __clc_fma(-transform, x, piby4_lead) +
130+
__clc_fma(-transform, xx, piby4_tail);
131+
__CLC_LONGN c = ca | cb;
132+
x = c ? tx : x;
133+
xx = c ? 0.0 : xx;
134+
135+
// Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68].
136+
__CLC_GENTYPE t1 = x;
137+
__CLC_GENTYPE r = __clc_fma(2.0, x * xx, x * x);
138+
139+
__CLC_GENTYPE a = __clc_fma(r,
140+
__clc_fma(r, 0.224044448537022097264602535574e-3,
141+
-0.229345080057565662883358588111e-1),
142+
0.372379159759792203640806338901e0);
143+
144+
__CLC_GENTYPE b =
145+
__clc_fma(r,
146+
__clc_fma(r,
147+
__clc_fma(r, -0.232371494088563558304549252913e-3,
148+
0.260656620398645407524064091208e-1),
149+
-0.515658515729031149329237816945e0),
150+
0.111713747927937668539901657944e1);
151+
152+
__CLC_GENTYPE t2 = __clc_fma(MATH_DIVIDE(a, b), x * r, xx);
153+
154+
__CLC_GENTYPE tp = t1 + t2;
155+
156+
// Compute -1.0/(t1 + t2) accurately
157+
__CLC_GENTYPE z1 =
158+
__CLC_AS_GENTYPE(__CLC_AS_ULONGN(tp) & 0xffffffff00000000L);
159+
__CLC_GENTYPE z2 = t2 - (z1 - t1);
160+
__CLC_GENTYPE trec = -MATH_RECIP(tp);
161+
__CLC_GENTYPE trec_top =
162+
__CLC_AS_GENTYPE(__CLC_AS_ULONGN(trec) & 0xffffffff00000000L);
163+
164+
__CLC_GENTYPE tpr = __clc_fma(
165+
__clc_fma(trec_top, z2, __clc_fma(trec_top, z1, 1.0)), trec, trec_top);
166+
167+
__CLC_GENTYPE tpt = transform * (1.0 - MATH_DIVIDE(2.0 * tp, 1.0 + tp));
168+
__CLC_GENTYPE tptr = transform * (MATH_DIVIDE(2.0 * tp, tp - 1.0) - 1.0);
169+
170+
*leadval = c ? tpt : tp;
171+
*tailval = c ? tptr : tpr;
172+
}
173+
174+
#endif
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_SINPI_H__
10+
#define __CLC_MATH_CLC_SINPI_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_sinpi
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_SINPI_H__
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_TANPI_H__
10+
#define __CLC_MATH_CLC_TANPI_H__
11+
12+
#define __CLC_BODY <clc/math/unary_decl.inc>
13+
#define __CLC_FUNCTION __clc_tanpi
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __CLC_FUNCTION
19+
20+
#endif // __CLC_MATH_CLC_TANPI_H__

libclc/clc/lib/generic/SOURCES

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ math/clc_atanh.cl
3030
math/clc_atanpi.cl
3131
math/clc_ceil.cl
3232
math/clc_copysign.cl
33+
math/clc_cospi.cl
3334
math/clc_ep_log.cl
3435
math/clc_fabs.cl
3536
math/clc_fma.cl
@@ -46,12 +47,14 @@ math/clc_mad.cl
4647
math/clc_modf.cl
4748
math/clc_nan.cl
4849
math/clc_native_cos.cl
50+
math/clc_native_divide.cl
4951
math/clc_native_exp.cl
5052
math/clc_native_exp2.cl
5153
math/clc_native_log.cl
5254
math/clc_native_log10.cl
5355
math/clc_native_log2.cl
5456
math/clc_native_rsqrt.cl
57+
math/clc_native_recip.cl
5558
math/clc_native_sin.cl
5659
math/clc_native_sqrt.cl
5760
math/clc_nextafter.cl
@@ -65,9 +68,11 @@ math/clc_rootn.cl
6568
math/clc_round.cl
6669
math/clc_rsqrt.cl
6770
math/clc_sincos_helpers.cl
71+
math/clc_sinpi.cl
6872
math/clc_sqrt.cl
6973
math/clc_sw_fma.cl
7074
math/clc_tables.cl
75+
math/clc_tanpi.cl
7176
math/clc_trunc.cl
7277
relational/clc_all.cl
7378
relational/clc_any.cl

0 commit comments

Comments
 (0)