|
10 | 10 | #define LLVM_LIBC_SRC___SUPPORT_FIXED_POINT_FX_BITS_H
|
11 | 11 |
|
12 | 12 | #include "include/llvm-libc-macros/stdfix-macros.h"
|
| 13 | +#include "src/__support/CPP/algorithm.h" |
13 | 14 | #include "src/__support/CPP/bit.h"
|
14 | 15 | #include "src/__support/CPP/limits.h" // numeric_limits
|
15 | 16 | #include "src/__support/CPP/type_traits.h"
|
| 17 | +#include "src/__support/libc_assert.h" |
16 | 18 | #include "src/__support/macros/attributes.h" // LIBC_INLINE
|
17 | 19 | #include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL
|
18 | 20 | #include "src/__support/macros/null_check.h" // LIBC_CRASH_ON_VALUE
|
|
21 | 23 |
|
22 | 24 | #include "fx_rep.h"
|
23 | 25 |
|
| 26 | +#include <stdio.h> |
| 27 | + |
24 | 28 | #ifdef LIBC_COMPILER_HAS_FIXED_POINT
|
25 | 29 |
|
26 | 30 | namespace LIBC_NAMESPACE_DECL {
|
@@ -224,6 +228,113 @@ idiv(T x, T y) {
|
224 | 228 | return static_cast<XType>(result);
|
225 | 229 | }
|
226 | 230 |
|
| 231 | +LIBC_INLINE long accum nrstep(long accum d, long accum x0) { |
| 232 | + auto v = x0 * (2.lk - (d * x0)); |
| 233 | + return v; |
| 234 | +} |
| 235 | + |
| 236 | +// Divide the two integers and return a fixed_point value |
| 237 | +// |
| 238 | +// For reference, see: |
| 239 | +// https://en.wikipedia.org/wiki/Division_algorithm#Newton%E2%80%93Raphson_division |
| 240 | +// https://stackoverflow.com/a/9231996 |
| 241 | + |
| 242 | +template <typename XType> LIBC_INLINE constexpr XType divi(int n, int d) { |
| 243 | + // If the value of the second operand of the / operator is zero, the |
| 244 | + // behavior is undefined. Ref: ISO/IEC TR 18037:2008(E) p.g. 16 |
| 245 | + LIBC_CRASH_ON_VALUE(d, 0); |
| 246 | + |
| 247 | + if (LIBC_UNLIKELY(n == 0)) { |
| 248 | + return FXRep<XType>::ZERO(); |
| 249 | + } |
| 250 | + auto is_power_of_two = [](int n) { return (n > 0) && ((n & (n - 1)) == 0); }; |
| 251 | + long accum max_val = static_cast<long accum>(FXRep<XType>::MAX()); |
| 252 | + long accum min_val = static_cast<long accum>(FXRep<XType>::MIN()); |
| 253 | + |
| 254 | + if (is_power_of_two(cpp::abs(d))) { |
| 255 | + int k = cpp::countr_zero<uint32_t>(static_cast<uint32_t>(cpp::abs(d))); |
| 256 | + constexpr int F = FXRep<XType>::FRACTION_LEN; |
| 257 | + int64_t scaled_n = static_cast<int64_t>(n) << F; |
| 258 | + int64_t res64 = scaled_n >> k; |
| 259 | + constexpr int TOTAL_BITS = sizeof(XType) * 8; |
| 260 | + const int64_t max_limit = (1LL << (TOTAL_BITS - 1)) - 1; |
| 261 | + const int64_t min_limit = -(1LL << (TOTAL_BITS - 1)); |
| 262 | + if (res64 > max_limit) { |
| 263 | + return FXRep<XType>::MAX(); |
| 264 | + } else if (res64 < min_limit) { |
| 265 | + return FXRep<XType>::MIN(); |
| 266 | + } |
| 267 | + long accum res_accum = |
| 268 | + static_cast<long accum>(res64) / static_cast<long accum>(1 << F); |
| 269 | + res_accum = (d < 0) ? static_cast<long accum>(-1) * res_accum : res_accum; |
| 270 | + if (res_accum > max_val) { |
| 271 | + return FXRep<XType>::MAX(); |
| 272 | + } else if (res_accum < min_val) { |
| 273 | + return FXRep<XType>::MIN(); |
| 274 | + } |
| 275 | + return static_cast<XType>(res_accum); |
| 276 | + } |
| 277 | + |
| 278 | + bool result_is_negative = ((n < 0) != (d < 0)); |
| 279 | + int64_t n64 = static_cast<int64_t>(n); |
| 280 | + int64_t d64 = static_cast<int64_t>(d); |
| 281 | + |
| 282 | + uint64_t nv = static_cast<uint64_t>(n64 < 0 ? -n64 : n64); |
| 283 | + uint64_t dv = static_cast<uint64_t>(d64 < 0 ? -d64 : d64); |
| 284 | + |
| 285 | + if (d == INT_MIN) { |
| 286 | + nv <<= 1; |
| 287 | + dv >>= 1; |
| 288 | + } |
| 289 | + |
| 290 | + uint32_t clz = cpp::countl_zero<uint32_t>(static_cast<uint32_t>(dv)) - 1; |
| 291 | + uint64_t scaled_val = dv << clz; |
| 292 | + // Scale denominator to be in the range of [0.5,1] |
| 293 | + FXBits<long accum> d_scaled{scaled_val}; |
| 294 | + uint64_t scaled_val_n = nv << clz; |
| 295 | + // Scale the numerator as much as the denominator to maintain correctness of |
| 296 | + // the original equation |
| 297 | + FXBits<long accum> n_scaled{scaled_val_n}; |
| 298 | + long accum n_scaled_val = n_scaled.get_val(); |
| 299 | + long accum d_scaled_val = d_scaled.get_val(); |
| 300 | + // x0 = (48/17) - (32/17) * d_n |
| 301 | + long accum a = 0x2.d89d89d8p0lk; // 48/17 = 2.8235294... |
| 302 | + long accum b = 0x1.e1e1e1e1p0lk; // 32/17 = 1.8823529... |
| 303 | + // Error of the initial approximation, as derived |
| 304 | + // from the wikipedia article is |
| 305 | + // E0 = 1/17 = 0.059 (5.9%) |
| 306 | + long accum initial_approx = a - (b * d_scaled_val); |
| 307 | + // Since, 0.5 <= d_scaled_val <= 1.0, 0.9412 <= initial_approx <= 1.88235 |
| 308 | + LIBC_ASSERT((initial_approx >= 0x0.78793dd9p0lk) && |
| 309 | + (initial_approx <= 0x1.f0f0d845p0lk)); |
| 310 | + // Each newton-raphson iteration will square the error, due |
| 311 | + // to quadratic convergence. So, |
| 312 | + // E1 = (0.059)^2 = 0.0034 |
| 313 | + long accum val = nrstep(d_scaled_val, initial_approx); |
| 314 | + if constexpr (FXRep<XType>::FRACTION_LEN > 8) { |
| 315 | + // E2 = 0.0000121 |
| 316 | + val = nrstep(d_scaled_val, val); |
| 317 | + if constexpr (FXRep<XType>::FRACTION_LEN > 16) { |
| 318 | + // E3 = 1.468e−10 |
| 319 | + val = nrstep(d_scaled_val, val); |
| 320 | + } |
| 321 | + } |
| 322 | + long accum res = n_scaled_val * val; |
| 323 | + |
| 324 | + if (result_is_negative) { |
| 325 | + res *= static_cast<long accum>(-1); |
| 326 | + } |
| 327 | + |
| 328 | + // Per clause 7.18a.6.1, saturate values on overflow |
| 329 | + if (res > max_val) { |
| 330 | + return FXRep<XType>::MAX(); |
| 331 | + } else if (res < min_val) { |
| 332 | + return FXRep<XType>::MIN(); |
| 333 | + } else { |
| 334 | + return static_cast<XType>(res); |
| 335 | + } |
| 336 | +} |
| 337 | + |
227 | 338 | } // namespace fixed_point
|
228 | 339 | } // namespace LIBC_NAMESPACE_DECL
|
229 | 340 |
|
|
0 commit comments