Skip to content

Commit f1ebe57

Browse files
committed
[libc][math] Refactor acosf16 implementation to header-only in src/__support/math folder.
1 parent 2837d2f commit f1ebe57

File tree

7 files changed

+231
-155
lines changed

7 files changed

+231
-155
lines changed

libc/shared/math.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "math/acos.h"
1515
#include "math/acosf.h"
16+
#include "math/acosf16.h"
1617
#include "math/exp.h"
1718
#include "math/exp10.h"
1819
#include "math/exp10f.h"

libc/shared/math/acosf16.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//===-- Shared acosf16 function ---------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SHARED_MATH_ACOSF16_H
10+
#define LLVM_LIBC_SHARED_MATH_ACOSF16_H
11+
12+
#include "include/llvm-libc-macros/float16-macros.h"
13+
14+
#ifdef LIBC_TYPES_HAS_FLOAT16
15+
16+
#include "shared/libc_common.h"
17+
#include "src/__support/math/acosf16.h"
18+
19+
namespace LIBC_NAMESPACE_DECL {
20+
namespace shared {
21+
22+
using math::acosf16;
23+
24+
} // namespace shared
25+
} // namespace LIBC_NAMESPACE_DECL
26+
27+
#endif // LIBC_TYPES_HAS_FLOAT16
28+
29+
#endif // LLVM_LIBC_SHARED_MATH_ACOSF16_H

libc/src/__support/math/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,22 @@ add_header_library(
3131
libc.src.__support.macros.optimization
3232
)
3333

34+
add_header_library(
35+
acosf16
36+
HDRS
37+
acosf16.h
38+
DEPENDS
39+
libc.src.__support.FPUtil.cast
40+
libc.src.__support.FPUtil.except_value_utils
41+
libc.src.__support.FPUtil.fenv_impl
42+
libc.src.__support.FPUtil.fp_bits
43+
libc.src.__support.FPUtil.multiply_add
44+
libc.src.__support.FPUtil.polyeval
45+
libc.src.__support.FPUtil.sqrt
46+
libc.src.__support.macros.optimization
47+
libc.src.__support.macros.properties.types
48+
)
49+
3450
add_header_library(
3551
asin_utils
3652
HDRS

libc/src/__support/math/acosf16.h

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
//===-- Implementation header for acosf16 -----------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSF16_H
10+
#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSF16_H
11+
12+
#include "include/llvm-libc-macros/float16-macros.h"
13+
14+
#ifdef LIBC_TYPES_HAS_FLOAT16
15+
16+
#include "src/__support/FPUtil/FEnvImpl.h"
17+
#include "src/__support/FPUtil/FPBits.h"
18+
#include "src/__support/FPUtil/PolyEval.h"
19+
#include "src/__support/FPUtil/cast.h"
20+
#include "src/__support/FPUtil/except_value_utils.h"
21+
#include "src/__support/FPUtil/multiply_add.h"
22+
#include "src/__support/FPUtil/sqrt.h"
23+
#include "src/__support/macros/optimization.h"
24+
25+
26+
namespace LIBC_NAMESPACE_DECL {
27+
28+
namespace math {
29+
30+
// Generated by Sollya using the following command:
31+
// > round(pi/2, SG, RN);
32+
// > round(pi, SG, RN);
33+
static constexpr float PI_OVER_2 = 0x1.921fb6p0f;
34+
static constexpr float PI = 0x1.921fb6p1f;
35+
36+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
37+
static constexpr size_t N_EXCEPTS = 2;
38+
39+
static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ACOSF16_EXCEPTS{{
40+
// (input, RZ output, RU offset, RD offset, RN offset)
41+
{0xacaf, 0x3e93, 1, 0, 0},
42+
{0xb874, 0x4052, 1, 0, 1},
43+
}};
44+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
45+
46+
static constexpr float16 acosf16(float16 x) {
47+
using FPBits = fputil::FPBits<float16>;
48+
FPBits xbits(x);
49+
50+
uint16_t x_u = xbits.uintval();
51+
uint16_t x_abs = x_u & 0x7fff;
52+
uint16_t x_sign = x_u >> 15;
53+
54+
// |x| > 0x1p0, |x| > 1, or x is NaN.
55+
if (LIBC_UNLIKELY(x_abs > 0x3c00)) {
56+
// acosf16(NaN) = NaN
57+
if (xbits.is_nan()) {
58+
if (xbits.is_signaling_nan()) {
59+
fputil::raise_except_if_required(FE_INVALID);
60+
return FPBits::quiet_nan().get_val();
61+
}
62+
63+
return x;
64+
}
65+
66+
// 1 < |x| <= +/-inf
67+
fputil::raise_except_if_required(FE_INVALID);
68+
fputil::set_errno_if_required(EDOM);
69+
70+
return FPBits::quiet_nan().get_val();
71+
}
72+
73+
float xf = x;
74+
75+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
76+
// Handle exceptional values
77+
if (auto r = ACOSF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
78+
return r.value();
79+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
80+
81+
// |x| == 0x1p0, x is 1 or -1
82+
// if x is (-)1, return pi, else
83+
// if x is (+)1, return 0
84+
if (LIBC_UNLIKELY(x_abs == 0x3c00))
85+
return fputil::cast<float16>(x_sign ? PI : 0.0f);
86+
87+
float xsq = xf * xf;
88+
89+
// |x| <= 0x1p-1, |x| <= 0.5
90+
if (x_abs <= 0x3800) {
91+
// if x is 0, return pi/2
92+
if (LIBC_UNLIKELY(x_abs == 0))
93+
return fputil::cast<float16>(PI_OVER_2);
94+
95+
// Note that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
96+
// Degree-6 minimax polynomial of asin(x) generated by Sollya with:
97+
// > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
98+
float interm =
99+
fputil::polyeval(xsq, 0x1.000002p0f, 0x1.554c2ap-3f, 0x1.3541ccp-4f,
100+
0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
101+
return fputil::cast<float16>(fputil::multiply_add(-xf, interm, PI_OVER_2));
102+
}
103+
104+
// When |x| > 0.5, assume that 0.5 < |x| <= 1
105+
//
106+
// Step-by-step range-reduction proof:
107+
// 1: Let y = asin(x), such that, x = sin(y)
108+
// 2: From complimentary angle identity:
109+
// x = sin(y) = cos(pi/2 - y)
110+
// 3: Let z = pi/2 - y, such that x = cos(z)
111+
// 4: From double angle formula; cos(2A) = 1 - 2 * sin^2(A):
112+
// z = 2A, z/2 = A
113+
// cos(z) = 1 - 2 * sin^2(z/2)
114+
// 5: Make sin(z/2) subject of the formula:
115+
// sin(z/2) = sqrt((1 - cos(z))/2)
116+
// 6: Recall [3]; x = cos(z). Therefore:
117+
// sin(z/2) = sqrt((1 - x)/2)
118+
// 7: Let u = (1 - x)/2
119+
// 8: Therefore:
120+
// asin(sqrt(u)) = z/2
121+
// 2 * asin(sqrt(u)) = z
122+
// 9: Recall [3]; z = pi/2 - y. Therefore:
123+
// y = pi/2 - z
124+
// y = pi/2 - 2 * asin(sqrt(u))
125+
// 10: Recall [1], y = asin(x). Therefore:
126+
// asin(x) = pi/2 - 2 * asin(sqrt(u))
127+
// 11: Recall that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
128+
// Therefore:
129+
// acos(x) = pi/2 - (pi/2 - 2 * asin(sqrt(u)))
130+
// acos(x) = 2 * asin(sqrt(u))
131+
//
132+
// THE RANGE REDUCTION, HOW?
133+
// 12: Recall [7], u = (1 - x)/2
134+
// 13: Since 0.5 < x <= 1, therefore:
135+
// 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5
136+
//
137+
// Hence, we can reuse the same [0, 0.5] domain polynomial approximation for
138+
// Step [11] as `sqrt(u)` is in range.
139+
// When -1 < x <= -0.5, the identity:
140+
// acos(x) = pi - acos(-x)
141+
// allows us to compute for the negative x value (lhs)
142+
// with a positive x value instead (rhs).
143+
144+
float xf_abs = (xf < 0 ? -xf : xf);
145+
float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f);
146+
float sqrt_u = fputil::sqrt<float>(u);
147+
148+
// Degree-6 minimax polynomial of asin(x) generated by Sollya with:
149+
// > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
150+
float asin_sqrt_u =
151+
sqrt_u * fputil::polyeval(u, 0x1.000002p0f, 0x1.554c2ap-3f,
152+
0x1.3541ccp-4f, 0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
153+
154+
return fputil::cast<float16>(
155+
x_sign ? fputil::multiply_add(-2.0f, asin_sqrt_u, PI) : 2 * asin_sqrt_u);
156+
}
157+
158+
} // namespace math
159+
160+
} // namespace LIBC_NAMESPACE_DECL
161+
162+
#endif // LIBC_TYPES_HAS_FLOAT16
163+
164+
#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOS_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4042,17 +4042,8 @@ add_entrypoint_object(
40424042
HDRS
40434043
../acosf16.h
40444044
DEPENDS
4045-
libc.hdr.errno_macros
4046-
libc.hdr.fenv_macros
4047-
libc.src.__support.FPUtil.cast
4048-
libc.src.__support.FPUtil.except_value_utils
4049-
libc.src.__support.FPUtil.fenv_impl
4050-
libc.src.__support.FPUtil.fp_bits
4051-
libc.src.__support.FPUtil.multiply_add
4052-
libc.src.__support.FPUtil.polyeval
4053-
libc.src.__support.FPUtil.sqrt
4054-
libc.src.__support.macros.optimization
4055-
libc.src.__support.macros.properties.types
4045+
libc.src.__support.math.acosf16
4046+
libc.src.errno.errno
40564047
)
40574048

40584049
add_entrypoint_object(

libc/src/math/generic/acosf16.cpp

Lines changed: 3 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -8,144 +8,11 @@
88
//===----------------------------------------------------------------------===//
99

1010
#include "src/math/acosf16.h"
11-
#include "hdr/errno_macros.h"
12-
#include "hdr/fenv_macros.h"
13-
#include "src/__support/FPUtil/FEnvImpl.h"
14-
#include "src/__support/FPUtil/FPBits.h"
15-
#include "src/__support/FPUtil/PolyEval.h"
16-
#include "src/__support/FPUtil/cast.h"
17-
#include "src/__support/FPUtil/except_value_utils.h"
18-
#include "src/__support/FPUtil/multiply_add.h"
19-
#include "src/__support/FPUtil/sqrt.h"
20-
#include "src/__support/macros/optimization.h"
11+
#include "src/__support/math/acosf16.h"
2112

22-
namespace LIBC_NAMESPACE_DECL {
23-
24-
// Generated by Sollya using the following command:
25-
// > round(pi/2, SG, RN);
26-
// > round(pi, SG, RN);
27-
static constexpr float PI_OVER_2 = 0x1.921fb6p0f;
28-
static constexpr float PI = 0x1.921fb6p1f;
29-
30-
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
31-
static constexpr size_t N_EXCEPTS = 2;
32-
33-
static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ACOSF16_EXCEPTS{{
34-
// (input, RZ output, RU offset, RD offset, RN offset)
35-
{0xacaf, 0x3e93, 1, 0, 0},
36-
{0xb874, 0x4052, 1, 0, 1},
37-
}};
38-
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
39-
40-
LLVM_LIBC_FUNCTION(float16, acosf16, (float16 x)) {
41-
using FPBits = fputil::FPBits<float16>;
42-
FPBits xbits(x);
43-
44-
uint16_t x_u = xbits.uintval();
45-
uint16_t x_abs = x_u & 0x7fff;
46-
uint16_t x_sign = x_u >> 15;
47-
48-
// |x| > 0x1p0, |x| > 1, or x is NaN.
49-
if (LIBC_UNLIKELY(x_abs > 0x3c00)) {
50-
// acosf16(NaN) = NaN
51-
if (xbits.is_nan()) {
52-
if (xbits.is_signaling_nan()) {
53-
fputil::raise_except_if_required(FE_INVALID);
54-
return FPBits::quiet_nan().get_val();
55-
}
56-
57-
return x;
58-
}
59-
60-
// 1 < |x| <= +/-inf
61-
fputil::raise_except_if_required(FE_INVALID);
62-
fputil::set_errno_if_required(EDOM);
6313

64-
return FPBits::quiet_nan().get_val();
65-
}
66-
67-
float xf = x;
68-
69-
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
70-
// Handle exceptional values
71-
if (auto r = ACOSF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
72-
return r.value();
73-
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
74-
75-
// |x| == 0x1p0, x is 1 or -1
76-
// if x is (-)1, return pi, else
77-
// if x is (+)1, return 0
78-
if (LIBC_UNLIKELY(x_abs == 0x3c00))
79-
return fputil::cast<float16>(x_sign ? PI : 0.0f);
80-
81-
float xsq = xf * xf;
82-
83-
// |x| <= 0x1p-1, |x| <= 0.5
84-
if (x_abs <= 0x3800) {
85-
// if x is 0, return pi/2
86-
if (LIBC_UNLIKELY(x_abs == 0))
87-
return fputil::cast<float16>(PI_OVER_2);
88-
89-
// Note that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
90-
// Degree-6 minimax polynomial of asin(x) generated by Sollya with:
91-
// > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
92-
float interm =
93-
fputil::polyeval(xsq, 0x1.000002p0f, 0x1.554c2ap-3f, 0x1.3541ccp-4f,
94-
0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
95-
return fputil::cast<float16>(fputil::multiply_add(-xf, interm, PI_OVER_2));
96-
}
97-
98-
// When |x| > 0.5, assume that 0.5 < |x| <= 1
99-
//
100-
// Step-by-step range-reduction proof:
101-
// 1: Let y = asin(x), such that, x = sin(y)
102-
// 2: From complimentary angle identity:
103-
// x = sin(y) = cos(pi/2 - y)
104-
// 3: Let z = pi/2 - y, such that x = cos(z)
105-
// 4: From double angle formula; cos(2A) = 1 - 2 * sin^2(A):
106-
// z = 2A, z/2 = A
107-
// cos(z) = 1 - 2 * sin^2(z/2)
108-
// 5: Make sin(z/2) subject of the formula:
109-
// sin(z/2) = sqrt((1 - cos(z))/2)
110-
// 6: Recall [3]; x = cos(z). Therefore:
111-
// sin(z/2) = sqrt((1 - x)/2)
112-
// 7: Let u = (1 - x)/2
113-
// 8: Therefore:
114-
// asin(sqrt(u)) = z/2
115-
// 2 * asin(sqrt(u)) = z
116-
// 9: Recall [3]; z = pi/2 - y. Therefore:
117-
// y = pi/2 - z
118-
// y = pi/2 - 2 * asin(sqrt(u))
119-
// 10: Recall [1], y = asin(x). Therefore:
120-
// asin(x) = pi/2 - 2 * asin(sqrt(u))
121-
// 11: Recall that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
122-
// Therefore:
123-
// acos(x) = pi/2 - (pi/2 - 2 * asin(sqrt(u)))
124-
// acos(x) = 2 * asin(sqrt(u))
125-
//
126-
// THE RANGE REDUCTION, HOW?
127-
// 12: Recall [7], u = (1 - x)/2
128-
// 13: Since 0.5 < x <= 1, therefore:
129-
// 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5
130-
//
131-
// Hence, we can reuse the same [0, 0.5] domain polynomial approximation for
132-
// Step [11] as `sqrt(u)` is in range.
133-
// When -1 < x <= -0.5, the identity:
134-
// acos(x) = pi - acos(-x)
135-
// allows us to compute for the negative x value (lhs)
136-
// with a positive x value instead (rhs).
137-
138-
float xf_abs = (xf < 0 ? -xf : xf);
139-
float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f);
140-
float sqrt_u = fputil::sqrt<float>(u);
14+
namespace LIBC_NAMESPACE_DECL {
14115

142-
// Degree-6 minimax polynomial of asin(x) generated by Sollya with:
143-
// > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
144-
float asin_sqrt_u =
145-
sqrt_u * fputil::polyeval(u, 0x1.000002p0f, 0x1.554c2ap-3f,
146-
0x1.3541ccp-4f, 0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
16+
LLVM_LIBC_FUNCTION(float16, acosf16, (float16 x)) { return math::acosf16(x); }
14717

148-
return fputil::cast<float16>(
149-
x_sign ? fputil::multiply_add(-2.0f, asin_sqrt_u, PI) : 2 * asin_sqrt_u);
150-
}
15118
} // namespace LIBC_NAMESPACE_DECL

0 commit comments

Comments
 (0)