Skip to content

Commit 73a93c3

Browse files
committed
[libc] add special handling for po2
Signed-off-by: Shreeyash Pandey <[email protected]>
1 parent b631411 commit 73a93c3

File tree

1 file changed

+32
-11
lines changed

1 file changed

+32
-11
lines changed

libc/src/__support/fixed_point/fx_bits.h

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
#include "fx_rep.h"
2424

25+
#include <stdio.h>
26+
2527
#ifdef LIBC_COMPILER_HAS_FIXED_POINT
2628

2729
namespace LIBC_NAMESPACE_DECL {
@@ -244,8 +246,35 @@ template <typename XType> LIBC_INLINE constexpr XType divi(int n, int d) {
244246
if (LIBC_UNLIKELY(n == 0)) {
245247
return FXRep<XType>::ZERO();
246248
}
247-
bool result_is_negative = ((n < 0) != (d < 0));
249+
auto isPowerOfTwo = [](int n) { return (n > 0) && ((n & (n - 1)) == 0); };
250+
long accum max_val = static_cast<long accum>(FXRep<XType>::MAX());
251+
long accum min_val = static_cast<long accum>(FXRep<XType>::MIN());
252+
253+
if (isPowerOfTwo(cpp::abs(d))) {
254+
int k = cpp::countr_zero<uint32_t>(static_cast<uint32_t>(cpp::abs(d)));
255+
constexpr int F = FXRep<XType>::FRACTION_LEN;
256+
int64_t scaled_n = static_cast<int64_t>(n) << F;
257+
int64_t res64 = scaled_n >> k;
258+
constexpr int TotalBits = sizeof(XType) * 8;
259+
const int64_t max_limit = (1LL << (TotalBits - 1)) - 1;
260+
const int64_t min_limit = -(1LL << (TotalBits - 1));
261+
if (res64 > max_limit) {
262+
return FXRep<XType>::MAX();
263+
} else if (res64 < min_limit) {
264+
return FXRep<XType>::MIN();
265+
}
266+
long accum res_accum =
267+
static_cast<long accum>(res64) / static_cast<long accum>(1 << F);
268+
res_accum = (d < 0) ? static_cast<long accum>(-1) * res_accum : res_accum;
269+
if (res_accum > max_val) {
270+
return FXRep<XType>::MAX();
271+
} else if (res_accum < min_val) {
272+
return FXRep<XType>::MIN();
273+
}
274+
return static_cast<XType>(res_accum);
275+
}
248276

277+
bool result_is_negative = ((n < 0) != (d < 0));
249278
int64_t n64 = static_cast<int64_t>(n);
250279
int64_t d64 = static_cast<int64_t>(d);
251280

@@ -281,14 +310,10 @@ template <typename XType> LIBC_INLINE constexpr XType divi(int n, int d) {
281310
// to quadratic convergence. So,
282311
// E1 = (0.059)^2 = 0.0034
283312
long accum val = nrstep(d_scaled_val, initial_approx);
284-
auto isPowerOfTwo = [](int n) { return (n > 0) && ((n & (n - 1)) == 0); };
285-
// Division with a power of 2 would generally be expected to be
286-
// exact, we handle this by specially treating po2 cases and having
287-
// extra iterations for them.
288-
if (FXRep<XType>::FRACTION_LEN > 8 || isPowerOfTwo(cpp::abs(d))) {
313+
if constexpr (FXRep<XType>::FRACTION_LEN > 8) {
289314
// E2 = 0.0000121
290315
val = nrstep(d_scaled_val, val);
291-
if (FXRep<XType>::FRACTION_LEN > 16 || isPowerOfTwo(cpp::abs(d))) {
316+
if constexpr (FXRep<XType>::FRACTION_LEN > 16) {
292317
// E3 = 1.468e−10
293318
val = nrstep(d_scaled_val, val);
294319
}
@@ -299,10 +324,6 @@ template <typename XType> LIBC_INLINE constexpr XType divi(int n, int d) {
299324
res *= static_cast<long accum>(-1);
300325
}
301326

302-
// Check for overflow before returning
303-
long accum max_val = static_cast<long accum>(FXRep<XType>::MAX());
304-
long accum min_val = static_cast<long accum>(FXRep<XType>::MIN());
305-
306327
// Per clause 7.18a.6.1, saturate values on overflow
307328
if (res > max_val) {
308329
return FXRep<XType>::MAX();

0 commit comments

Comments
 (0)