Skip to content

Commit c3dea04

Browse files
bojlesvkeerthy
authored andcommitted
[libc][stdfix] Implement fxdivi functions (rdivi) (#154914)
This PR includes only one of the fxdivi functions (rdivi). It uses a polynomial function for initial approximation followed by 4 newton-raphson iterations to calculate the reciprocal and finally multiplies the numerator with it to get the result. --------- Signed-off-by: Shreeyash Pandey <[email protected]>
1 parent 93c9d90 commit c3dea04

File tree

11 files changed

+282
-1
lines changed

11 files changed

+282
-1
lines changed

libc/config/linux/riscv/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,7 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
10221022
libc.src.stdfix.idivulr
10231023
libc.src.stdfix.idivuk
10241024
libc.src.stdfix.idivulk
1025+
libc.src.stdfix.rdivi
10251026
)
10261027
endif()
10271028

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,7 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
10581058
libc.src.stdfix.idivulr
10591059
libc.src.stdfix.idivuk
10601060
libc.src.stdfix.idivulk
1061+
libc.src.stdfix.rdivi
10611062
)
10621063
endif()
10631064

libc/docs/headers/stdfix.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ The following functions are included in the ISO/IEC TR 18037:2008 standard.
8181
+---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+
8282
| muli | | | | | | | | | | | | |
8383
+---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+
84-
| \*divi | | | | | | | | | | | | |
84+
| \*divi | | | | |check| | | | | | | | | |
8585
+---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+
8686
| round | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| | |check| |
8787
+---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+

libc/include/stdfix.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,3 +544,11 @@ functions:
544544
arguments:
545545
- type: unsigned long accum
546546
guard: LIBC_COMPILER_HAS_FIXED_POINT
547+
- name: rdivi
548+
standards:
549+
- stdc_ext
550+
return_type: fract
551+
arguments:
552+
- type: int
553+
- type: int
554+
guard: LIBC_COMPILER_HAS_FIXED_POINT

libc/src/__support/fixed_point/fx_bits.h

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@
1010
#define LLVM_LIBC_SRC___SUPPORT_FIXED_POINT_FX_BITS_H
1111

1212
#include "include/llvm-libc-macros/stdfix-macros.h"
13+
#include "src/__support/CPP/algorithm.h"
1314
#include "src/__support/CPP/bit.h"
1415
#include "src/__support/CPP/limits.h" // numeric_limits
1516
#include "src/__support/CPP/type_traits.h"
17+
#include "src/__support/libc_assert.h"
1618
#include "src/__support/macros/attributes.h" // LIBC_INLINE
1719
#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL
1820
#include "src/__support/macros/null_check.h" // LIBC_CRASH_ON_VALUE
@@ -21,6 +23,8 @@
2123

2224
#include "fx_rep.h"
2325

26+
#include <stdio.h>
27+
2428
#ifdef LIBC_COMPILER_HAS_FIXED_POINT
2529

2630
namespace LIBC_NAMESPACE_DECL {
@@ -224,6 +228,113 @@ idiv(T x, T y) {
224228
return static_cast<XType>(result);
225229
}
226230

231+
LIBC_INLINE long accum nrstep(long accum d, long accum x0) {
232+
auto v = x0 * (2.lk - (d * x0));
233+
return v;
234+
}
235+
236+
// Divide the two integers and return a fixed_point value
237+
//
238+
// For reference, see:
239+
// https://en.wikipedia.org/wiki/Division_algorithm#Newton%E2%80%93Raphson_division
240+
// https://stackoverflow.com/a/9231996
241+
242+
template <typename XType> LIBC_INLINE constexpr XType divi(int n, int d) {
243+
// If the value of the second operand of the / operator is zero, the
244+
// behavior is undefined. Ref: ISO/IEC TR 18037:2008(E) p.g. 16
245+
LIBC_CRASH_ON_VALUE(d, 0);
246+
247+
if (LIBC_UNLIKELY(n == 0)) {
248+
return FXRep<XType>::ZERO();
249+
}
250+
auto is_power_of_two = [](int n) { return (n > 0) && ((n & (n - 1)) == 0); };
251+
long accum max_val = static_cast<long accum>(FXRep<XType>::MAX());
252+
long accum min_val = static_cast<long accum>(FXRep<XType>::MIN());
253+
254+
if (is_power_of_two(cpp::abs(d))) {
255+
int k = cpp::countr_zero<uint32_t>(static_cast<uint32_t>(cpp::abs(d)));
256+
constexpr int F = FXRep<XType>::FRACTION_LEN;
257+
int64_t scaled_n = static_cast<int64_t>(n) << F;
258+
int64_t res64 = scaled_n >> k;
259+
constexpr int TOTAL_BITS = sizeof(XType) * 8;
260+
const int64_t max_limit = (1LL << (TOTAL_BITS - 1)) - 1;
261+
const int64_t min_limit = -(1LL << (TOTAL_BITS - 1));
262+
if (res64 > max_limit) {
263+
return FXRep<XType>::MAX();
264+
} else if (res64 < min_limit) {
265+
return FXRep<XType>::MIN();
266+
}
267+
long accum res_accum =
268+
static_cast<long accum>(res64) / static_cast<long accum>(1 << F);
269+
res_accum = (d < 0) ? static_cast<long accum>(-1) * res_accum : res_accum;
270+
if (res_accum > max_val) {
271+
return FXRep<XType>::MAX();
272+
} else if (res_accum < min_val) {
273+
return FXRep<XType>::MIN();
274+
}
275+
return static_cast<XType>(res_accum);
276+
}
277+
278+
bool result_is_negative = ((n < 0) != (d < 0));
279+
int64_t n64 = static_cast<int64_t>(n);
280+
int64_t d64 = static_cast<int64_t>(d);
281+
282+
uint64_t nv = static_cast<uint64_t>(n64 < 0 ? -n64 : n64);
283+
uint64_t dv = static_cast<uint64_t>(d64 < 0 ? -d64 : d64);
284+
285+
if (d == INT_MIN) {
286+
nv <<= 1;
287+
dv >>= 1;
288+
}
289+
290+
uint32_t clz = cpp::countl_zero<uint32_t>(static_cast<uint32_t>(dv)) - 1;
291+
uint64_t scaled_val = dv << clz;
292+
// Scale denominator to be in the range of [0.5,1]
293+
FXBits<long accum> d_scaled{scaled_val};
294+
uint64_t scaled_val_n = nv << clz;
295+
// Scale the numerator as much as the denominator to maintain correctness of
296+
// the original equation
297+
FXBits<long accum> n_scaled{scaled_val_n};
298+
long accum n_scaled_val = n_scaled.get_val();
299+
long accum d_scaled_val = d_scaled.get_val();
300+
// x0 = (48/17) - (32/17) * d_n
301+
long accum a = 0x2.d89d89d8p0lk; // 48/17 = 2.8235294...
302+
long accum b = 0x1.e1e1e1e1p0lk; // 32/17 = 1.8823529...
303+
// Error of the initial approximation, as derived
304+
// from the wikipedia article is
305+
// E0 = 1/17 = 0.059 (5.9%)
306+
long accum initial_approx = a - (b * d_scaled_val);
307+
// Since, 0.5 <= d_scaled_val <= 1.0, 0.9412 <= initial_approx <= 1.88235
308+
LIBC_ASSERT((initial_approx >= 0x0.78793dd9p0lk) &&
309+
(initial_approx <= 0x1.f0f0d845p0lk));
310+
// Each newton-raphson iteration will square the error, due
311+
// to quadratic convergence. So,
312+
// E1 = (0.059)^2 = 0.0034
313+
long accum val = nrstep(d_scaled_val, initial_approx);
314+
if constexpr (FXRep<XType>::FRACTION_LEN > 8) {
315+
// E2 = 0.0000121
316+
val = nrstep(d_scaled_val, val);
317+
if constexpr (FXRep<XType>::FRACTION_LEN > 16) {
318+
// E3 = 1.468e−10
319+
val = nrstep(d_scaled_val, val);
320+
}
321+
}
322+
long accum res = n_scaled_val * val;
323+
324+
if (result_is_negative) {
325+
res *= static_cast<long accum>(-1);
326+
}
327+
328+
// Per clause 7.18a.6.1, saturate values on overflow
329+
if (res > max_val) {
330+
return FXRep<XType>::MAX();
331+
} else if (res < min_val) {
332+
return FXRep<XType>::MIN();
333+
} else {
334+
return static_cast<XType>(res);
335+
}
336+
}
337+
227338
} // namespace fixed_point
228339
} // namespace LIBC_NAMESPACE_DECL
229340

libc/src/stdfix/CMakeLists.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,20 @@ foreach(suffix IN ITEMS r lr k lk ur ulr uk ulk)
8989
)
9090
endforeach()
9191

92+
foreach(suffix IN ITEMS r)
93+
add_entrypoint_object(
94+
${suffix}divi
95+
HDRS
96+
${suffix}divi.h
97+
SRCS
98+
${suffix}divi.cpp
99+
COMPILE_OPTIONS
100+
${libc_opt_high_flag}
101+
DEPENDS
102+
libc.src.__support.fixed_point.fx_bits
103+
)
104+
endforeach()
105+
92106
add_entrypoint_object(
93107
uhksqrtus
94108
HDRS

libc/src/stdfix/rdivi.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//===-- Implementation of rdivi function ---------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "rdivi.h"
10+
#include "include/llvm-libc-macros/stdfix-macros.h" // fract
11+
#include "src/__support/common.h" // LLVM_LIBC_FUNCTION
12+
#include "src/__support/fixed_point/fx_bits.h" // fixed_point
13+
#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL
14+
15+
namespace LIBC_NAMESPACE_DECL {
16+
17+
LLVM_LIBC_FUNCTION(fract, rdivi, (int a, int b)) {
18+
return fixed_point::divi<fract>(a, b);
19+
}
20+
21+
} // namespace LIBC_NAMESPACE_DECL

libc/src/stdfix/rdivi.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//===-- Implementation header for rdivi ------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_STDFIX_RDIVI_H
10+
#define LLVM_LIBC_SRC_STDFIX_RDIVI_H
11+
12+
#include "include/llvm-libc-macros/stdfix-macros.h"
13+
#include "src/__support/macros/config.h"
14+
15+
namespace LIBC_NAMESPACE_DECL {
16+
17+
fract rdivi(int a, int b);
18+
19+
} // namespace LIBC_NAMESPACE_DECL
20+
21+
#endif // LLVM_LIBC_SRC_STDFIX_RDIVI_H

libc/test/src/stdfix/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,22 @@ foreach(suffix IN ITEMS r lr k lk ur ulr uk ulk)
120120
)
121121
endforeach()
122122

123+
foreach(suffix IN ITEMS r)
124+
add_libc_test(
125+
${suffix}divi_test
126+
SUITE
127+
libc-stdfix-tests
128+
HDRS
129+
DivITest.h
130+
SRCS
131+
${suffix}divi_test.cpp
132+
DEPENDS
133+
libc.src.stdfix.${suffix}divi
134+
libc.src.__support.fixed_point.fx_bits
135+
libc.hdr.signal_macros
136+
)
137+
endforeach()
138+
123139
add_libc_test(
124140
uhksqrtus_test
125141
SUITE

libc/test/src/stdfix/DivITest.h

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
//===-- Utility class to test fxdivi functions ------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/__support/CPP/type_traits.h"
10+
#include "src/__support/fixed_point/fx_bits.h"
11+
#include "src/__support/fixed_point/fx_rep.h"
12+
#include "test/UnitTest/Test.h"
13+
14+
template <typename XType> XType get_epsilon() = delete;
15+
template <> fract get_epsilon() { return FRACT_EPSILON; }
16+
template <> unsigned fract get_epsilon() { return UFRACT_EPSILON; }
17+
template <> long fract get_epsilon() { return LFRACT_EPSILON; }
18+
19+
template <typename XType>
20+
class DivITest : public LIBC_NAMESPACE::testing::Test {
21+
using FXRep = LIBC_NAMESPACE::fixed_point::FXRep<XType>;
22+
using FXBits = LIBC_NAMESPACE::fixed_point::FXBits<XType>;
23+
24+
public:
25+
typedef XType (*DivIFunc)(int, int);
26+
27+
void testBasic(DivIFunc func) {
28+
XType epsilon = get_epsilon<XType>();
29+
EXPECT_LT((func(2, 3) - 0.666656494140625r), epsilon);
30+
EXPECT_LT((func(3, 4) - 0.75r), epsilon);
31+
EXPECT_LT((func(1043, 2764) - 0.3773516643r), epsilon);
32+
EXPECT_LT((func(60000, 720293) - 0.08329943509r), epsilon);
33+
34+
EXPECT_EQ(func(128, 256), 0.5r);
35+
EXPECT_EQ(func(1, 2), 0.5r);
36+
EXPECT_EQ(func(1, 4), 0.25r);
37+
EXPECT_EQ(func(1, 8), 0.125r);
38+
EXPECT_EQ(func(1, 16), 0.0625r);
39+
40+
EXPECT_EQ(func(-1, 2), -0.5r);
41+
EXPECT_EQ(func(1, -4), -0.25r);
42+
EXPECT_EQ(func(-1, 8), -0.125r);
43+
EXPECT_EQ(func(1, -16), -0.0625r);
44+
}
45+
46+
void testSpecial(DivIFunc func) {
47+
XType epsilon = get_epsilon<XType>();
48+
EXPECT_EQ(func(0, 10), 0.r);
49+
EXPECT_EQ(func(0, -10), 0.r);
50+
EXPECT_EQ(func(-(1 << FRACT_FBIT), 1 << FRACT_FBIT), FRACT_MIN);
51+
EXPECT_EQ(func((1 << FRACT_FBIT) - 1, 1 << FRACT_FBIT), FRACT_MAX);
52+
// From Section 7.18a.6.1, functions returning a fixed-point value, the
53+
// return value is saturated on overflow.
54+
EXPECT_EQ(func(INT_MAX, INT_MAX), FRACT_MAX);
55+
EXPECT_LT(func(INT_MAX - 1, INT_MAX) - 0.99999999r, epsilon);
56+
EXPECT_EQ(func(INT_MIN, INT_MAX), FRACT_MIN);
57+
// Expecting 0 here as fract is not precise enough to
58+
// handle 1/INT_MAX
59+
EXPECT_LT(func(1, INT_MAX) - 0.r, epsilon);
60+
// This results in 1.1739, which should be saturated to FRACT_MAX
61+
EXPECT_EQ(func(27, 23), FRACT_MAX);
62+
63+
EXPECT_EQ(func(INT_MIN, 1), FRACT_MIN);
64+
EXPECT_LT(func(1, INT_MIN) - 0.r, epsilon);
65+
66+
EXPECT_EQ(func(INT_MIN, INT_MIN), 1.r);
67+
}
68+
};
69+
70+
#define LIST_DIVI_TESTS(Name, XType, func) \
71+
using LlvmLibc##Name##diviTest = DivITest<XType>; \
72+
TEST_F(LlvmLibc##Name##diviTest, Basic) { testBasic(&func); } \
73+
TEST_F(LlvmLibc##Name##diviTest, Special) { testSpecial(&func); } \
74+
static_assert(true, "Require semicolon.")

0 commit comments

Comments
 (0)