-
Notifications
You must be signed in to change notification settings - Fork 8k
ext/bcmath: Performance improvement bcsqrt()
#18771
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
57b91d6
e1eccaa
df858ae
c1303df
b04b189
b7cf057
1cfa420
4940980
dbf83da
c40b249
8c63551
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,10 +32,75 @@ | |
#include "bcmath.h" | ||
#include <stdbool.h> | ||
#include <stddef.h> | ||
#include "private.h" | ||
|
||
/* Take the square root NUM and return it in NUM with SCALE digits | ||
after the decimal place. */ | ||
|
||
static inline BC_VECTOR bc_sqrt_get_pow_10(size_t exponent) | ||
{ | ||
BC_VECTOR value = 1; | ||
while (exponent >= 8) { | ||
value *= BC_POW_10_LUT[8]; | ||
exponent -= 8; | ||
} | ||
value *= BC_POW_10_LUT[exponent]; | ||
return value; | ||
} | ||
|
||
static BC_VECTOR bc_fast_sqrt_vector(BC_VECTOR n_vector) | ||
{ | ||
/* Use sqrt() from <math.h> to determine the initial value. */ | ||
BC_VECTOR guess_vector = (BC_VECTOR) round(sqrt((double) n_vector)); | ||
|
||
/* Newton's algorithm. Iterative expression is `x_{n+1} = (x_n + a / x_n) / 2` */ | ||
BC_VECTOR guess1_vector; | ||
size_t diff; | ||
do { | ||
guess1_vector = guess_vector; | ||
guess_vector = (guess1_vector + n_vector / guess1_vector) / 2; /* Iterative expression */ | ||
diff = guess1_vector > guess_vector ? guess1_vector - guess_vector : guess_vector - guess1_vector; | ||
} while (diff > 1); | ||
return guess_vector; | ||
} | ||
|
||
static inline void bc_fast_sqrt(bc_num *num, size_t scale, size_t num_calc_full_len, size_t num_use_full_len, size_t leading_zeros) | ||
{ | ||
BC_VECTOR n_vector = 0; | ||
const char *nptr = (*num)->n_value + leading_zeros; | ||
for (size_t i = 0; i < num_use_full_len; i++) { | ||
n_vector = n_vector * BASE + *nptr++; | ||
} | ||
/* When calculating the square root of a number using only integer operations, | ||
* need to adjust the digit scale accordingly. | ||
* Considering that the original number is the square of the result, | ||
* if the desired scale of the result is 5, the input number should be scaled | ||
* by twice that, i.e., scale 10. */ | ||
n_vector *= bc_sqrt_get_pow_10(num_calc_full_len - num_use_full_len); | ||
|
||
/* Get sqrt */ | ||
BC_VECTOR guess_vector = bc_fast_sqrt_vector(n_vector); | ||
|
||
size_t ret_len; | ||
if (leading_zeros > 0) { | ||
ret_len = 1; | ||
} else { | ||
ret_len = ((*num)->n_len + 1) / 2; | ||
} | ||
|
||
bc_num ret = bc_new_num_nonzeroed(ret_len, scale); | ||
char *rptr = ret->n_value; | ||
char *rend = rptr + ret_len + scale - 1; | ||
|
||
guess_vector /= BASE; /* Since the scale of guess_vector is scale + 1, reduce the scale by 1. */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Didn't we have a common function to do this loop? |
||
while (rend >= rptr) { | ||
*rend-- = guess_vector % BASE; | ||
guess_vector /= BASE; | ||
} | ||
bc_free_num(num); | ||
*num = ret; | ||
} | ||
|
||
static inline void bc_standard_sqrt(bc_num *num, size_t scale, bcmath_compare_result num_cmp_one) | ||
{ | ||
size_t cscale; | ||
|
@@ -100,6 +165,10 @@ bool bc_sqrt(bc_num *num, size_t scale) | |
/* Cannot take the square root of a negative number */ | ||
return false; | ||
} | ||
|
||
size_t num_calc_scale = (scale + 1) * 2; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there cases where this can overflow? The maximum value of scale is, as far as I know, ZEND_LONG_MAX. So theoretically it could overflow? |
||
size_t num_use_scale = MIN((*num)->n_scale, num_calc_scale); | ||
|
||
/* Square root of 0 is 0 */ | ||
if (bc_is_zero(*num)) { | ||
bc_free_num (num); | ||
|
@@ -115,7 +184,26 @@ bool bc_sqrt(bc_num *num, size_t scale) | |
return true; | ||
} | ||
|
||
bc_standard_sqrt(num, scale, num_cmp_one); | ||
/* Initialize the variables. */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs a better comment. |
||
size_t leading_zeros = 0; | ||
size_t num_calc_full_len = (*num)->n_len + num_calc_scale; | ||
size_t num_use_full_len = (*num)->n_len + num_use_scale; | ||
if (num_cmp_one == BCMATH_RIGHT_GREATER) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't this comparison always true? The code checks early if the number is negative, 0, or 1. |
||
const char *nptr = (*num)->n_value; | ||
while (*nptr == 0) { | ||
leading_zeros++; | ||
nptr++; | ||
} | ||
num_calc_full_len -= leading_zeros; | ||
num_use_full_len -= leading_zeros; | ||
} | ||
|
||
/* Find the square root using Newton's algorithm. */ | ||
if (num_calc_full_len < MAX_LENGTH_OF_LONG) { | ||
bc_fast_sqrt(num, scale, num_calc_full_len, num_use_full_len, leading_zeros); | ||
} else { | ||
bc_standard_sqrt(num, scale, num_cmp_one); | ||
} | ||
|
||
return true; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On 32-bit systems, this is only overflow-safe if exponent<=9.
Is this guaranteed?