-
Notifications
You must be signed in to change notification settings - Fork 8k
ext/bcmath: Improving bcpow() performance
#18099
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
3ff726a
788a884
186214b
c1568f1
a42d2e1
f303d7d
757e3bc
d203371
2556951
b96f2d9
54729fd
91e3890
20c9309
9e5c9a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -74,6 +74,10 @@ typedef struct bc_struct { | |
| #define MAX(a, b) ((a)>(b)?(a):(b)) | ||
| #define MIN(a, b) ((a)>(b)?(b):(a)) | ||
|
|
||
| #ifndef SIZE_T_MAX | ||
|
||
| #define SIZE_T_MAX (~((size_t) 0)) | ||
| #endif | ||
|
|
||
| /* Function Prototypes */ | ||
|
|
||
| void bc_init_numbers(void); | ||
|
|
@@ -147,8 +151,6 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale); | |
| *(result) = mul_ex; \ | ||
| } while (0) | ||
|
|
||
| bc_num bc_square(bc_num n1, size_t scale); | ||
|
|
||
| bool bc_divide(bc_num n1, bc_num n2, bc_num *quot, size_t scale); | ||
|
|
||
| bool bc_modulo(bc_num num1, bc_num num2, bc_num *resul, size_t scale); | ||
|
|
@@ -159,6 +161,14 @@ bc_num bc_floor_or_ceil(bc_num num, bool is_floor); | |
|
|
||
| size_t bc_round(bc_num num, zend_long places, zend_long mode, bc_num *result); | ||
|
|
||
| typedef enum { | ||
| BC_RAISE_STATUS_OK, | ||
| BC_RAISE_STATUS_LEN_IS_OVERFLOW, | ||
| BC_RAISE_STATUS_SCALE_IS_OVERFLOW, | ||
| BC_RAISE_STATUS_FULLLEN_IS_OVERFLOW, | ||
| BC_RAISE_STATUS_DIVIDE_BY_ZERO, | ||
| } bc_raise_status; | ||
|
|
||
| typedef enum { | ||
| OK, | ||
| BASE_HAS_FRACTIONAL, | ||
|
|
@@ -170,7 +180,7 @@ typedef enum { | |
|
|
||
| raise_mod_status bc_raisemod(bc_num base, bc_num exponent, bc_num mod, bc_num *result, size_t scale); | ||
|
|
||
| bool bc_raise(bc_num base, long exponent, bc_num *result, size_t scale); | ||
| bc_raise_status bc_raise(bc_num base, long exponent, bc_num *result, size_t scale); | ||
|
|
||
| void bc_raise_bc_exponent(bc_num base, bc_num exponent, bc_num *resul, size_t scale); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,31 +30,155 @@ | |
| *************************************************************************/ | ||
|
|
||
| #include "bcmath.h" | ||
| #include "convert.h" | ||
| #include "private.h" | ||
| #include <assert.h> | ||
| #include <stdbool.h> | ||
| #include <stddef.h> | ||
|
|
||
| void bc_square_ex(bc_num n1, bc_num *result, size_t scale_min) { | ||
| bc_num square_ex = bc_square(n1, scale_min); | ||
| bc_free_num(result); | ||
| *(result) = square_ex; | ||
| static inline size_t bc_multiply_vector_ex( | ||
| BC_VECTOR **n1_vector, size_t n1_arr_size, BC_VECTOR *n2_vector, size_t n2_arr_size, BC_VECTOR **result_vector) | ||
| { | ||
| size_t result_arr_size = n1_arr_size + n2_arr_size; | ||
| bc_multiply_vector(*n1_vector, n1_arr_size, n2_vector, n2_arr_size, *result_vector, result_arr_size); | ||
|
|
||
| /* Eliminate extra zeros because they increase the number of calculations. */ | ||
| while ((*result_vector)[result_arr_size - 1] == 0) { | ||
| result_arr_size--; | ||
| } | ||
|
|
||
| /* Swap n1_vector and result_vector. */ | ||
| BC_VECTOR *tmp = *n1_vector; | ||
| *n1_vector = *result_vector; | ||
| *result_vector = tmp; | ||
|
|
||
| return result_arr_size; | ||
| } | ||
|
|
||
| static inline size_t bc_square_vector_ex(BC_VECTOR **base_vector, size_t base_arr_size, BC_VECTOR **result_vector) | ||
| { | ||
| return bc_multiply_vector_ex(base_vector, base_arr_size, *base_vector, base_arr_size, result_vector); | ||
| } | ||
|
|
||
| /* Use "exponentiation by squaring". This is the fast path when the results are small. */ | ||
| static inline bc_num bc_fast_raise( | ||
| const char *base_end, long exponent, size_t base_len, size_t power_len, size_t power_scale, size_t power_full_len) | ||
| { | ||
| BC_VECTOR base_vector = 0; | ||
|
|
||
| /* Convert to BC_VECTOR[] */ | ||
| bc_convert_to_vector(&base_vector, base_end, base_len); | ||
|
|
||
| while ((exponent & 1) == 0) { | ||
| base_vector *= base_vector; | ||
| exponent >>= 1; | ||
| } | ||
|
|
||
| /* copy base to power */ | ||
| BC_VECTOR power_vector = base_vector; | ||
| exponent >>= 1; | ||
|
|
||
| while (exponent > 0) { | ||
| base_vector *= base_vector; | ||
| if ((exponent & 1) == 1) { | ||
| power_vector *= base_vector; | ||
| } | ||
| exponent >>= 1; | ||
| } | ||
|
|
||
| bc_num power = bc_new_num_nonzeroed(power_len, power_scale); | ||
| char *pptr = power->n_value; | ||
| char *pend = pptr + power_full_len - 1; | ||
|
|
||
| while (pend >= pptr) { | ||
| *pend-- = power_vector % BASE; | ||
| power_vector /= BASE; | ||
| } | ||
| return power; | ||
| } | ||
|
|
||
| /* Use "exponentiation by squaring". This is the standard path. */ | ||
| static bc_num bc_standard_raise( | ||
| const char *base_ptr, const char *base_end, long exponent, size_t base_len, size_t power_scale) | ||
| { | ||
| /* Remove the leading zeros as they will be filled in later. */ | ||
| while (*base_ptr == 0) { | ||
| base_ptr++; | ||
| base_len--; | ||
| } | ||
|
|
||
| size_t base_arr_size = BC_ARR_SIZE_FROM_LEN(base_len); | ||
| /* Since it is guaranteed that base_len * exponent does not overflow, there is no possibility of overflow here. */ | ||
| size_t max_power_arr_size = base_arr_size * exponent; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this overflow? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added check for |
||
|
|
||
| /* The allocated memory area is reused on a rotational basis, so the same size is required. */ | ||
| BC_VECTOR *buf = safe_emalloc(max_power_arr_size, sizeof(BC_VECTOR) * 3, 0); | ||
| BC_VECTOR *base_vector = buf; | ||
| BC_VECTOR *power_vector = base_vector + max_power_arr_size; | ||
| BC_VECTOR *tmp_result_vector = power_vector + max_power_arr_size; | ||
|
|
||
| /* Convert to BC_VECTOR[] */ | ||
| bc_convert_to_vector(base_vector, base_end, base_len); | ||
|
|
||
| while ((exponent & 1) == 0) { | ||
| base_arr_size = bc_square_vector_ex(&base_vector, base_arr_size, &tmp_result_vector); | ||
| exponent >>= 1; | ||
| } | ||
|
|
||
| /* copy base to power */ | ||
| size_t power_arr_size = base_arr_size; | ||
| for (size_t i = 0; i < base_arr_size; i++) { | ||
| power_vector[i] = base_vector[i]; | ||
| } | ||
| exponent >>= 1; | ||
|
|
||
| while (exponent > 0) { | ||
| base_arr_size = bc_square_vector_ex(&base_vector, base_arr_size, &tmp_result_vector); | ||
| if ((exponent & 1) == 1) { | ||
| power_arr_size = bc_multiply_vector_ex(&power_vector, power_arr_size, base_vector, base_arr_size, &tmp_result_vector); | ||
| } | ||
| exponent >>= 1; | ||
| } | ||
|
|
||
| /* Convert to bc_num */ | ||
| size_t power_leading_zeros = 0; | ||
| size_t power_len; | ||
| size_t power_full_len = power_arr_size * BC_VECTOR_SIZE; | ||
| if (power_full_len > power_scale) { | ||
| power_len = power_full_len - power_scale; | ||
| } else { | ||
| power_len = 1; | ||
| power_leading_zeros = power_scale - power_full_len + 1; | ||
| power_full_len = power_scale + 1; | ||
| } | ||
| bc_num power = bc_new_num_nonzeroed(power_len, power_scale); | ||
|
|
||
| char *pptr = power->n_value; | ||
| char *pend = pptr + power_full_len - 1; | ||
|
|
||
| /* Pad with leading zeros if necessary. */ | ||
| memset(pptr, 0, power_leading_zeros); | ||
| pptr += power_leading_zeros; | ||
|
|
||
| bc_convert_vector_to_char(power_vector, pptr, pend, power_arr_size); | ||
|
|
||
| efree(buf); | ||
|
|
||
| return power; | ||
| } | ||
|
|
||
| /* Raise "base" to the "exponent" power. The result is placed in RESULT. | ||
| Maximum exponent is LONG_MAX. If a "exponent" is not an integer, | ||
| only the integer part is used. */ | ||
| bool bc_raise(bc_num base, long exponent, bc_num *result, size_t scale) { | ||
| bc_num temp, power; | ||
| bc_raise_status bc_raise(bc_num base, long exponent, bc_num *result, size_t scale) { | ||
| size_t rscale; | ||
| size_t pwrscale; | ||
| size_t calcscale; | ||
| bool is_neg; | ||
|
|
||
| /* Special case if exponent is a zero. */ | ||
| if (exponent == 0) { | ||
| bc_free_num (result); | ||
| *result = bc_copy_num(BCG(_one_)); | ||
| return true; | ||
| return BC_RAISE_STATUS_OK; | ||
| } | ||
|
|
||
| /* Other initializations. */ | ||
|
|
@@ -67,44 +191,74 @@ bool bc_raise(bc_num base, long exponent, bc_num *result, size_t scale) { | |
| rscale = MIN (base->n_scale * exponent, MAX(scale, base->n_scale)); | ||
| } | ||
|
|
||
| /* Set initial value of temp. */ | ||
| power = bc_copy_num(base); | ||
| pwrscale = base->n_scale; | ||
| while ((exponent & 1) == 0) { | ||
| pwrscale = 2 * pwrscale; | ||
| bc_square_ex(power, &power, pwrscale); | ||
| exponent = exponent >> 1; | ||
| if (bc_is_zero(base)) { | ||
| bc_free_num(result); | ||
| *result = bc_copy_num(BCG(_zero_)); | ||
| /* If the exponent is negative, it divides by 0 */ | ||
| return is_neg ? BC_RAISE_STATUS_DIVIDE_BY_ZERO : BC_RAISE_STATUS_OK; | ||
| } | ||
| temp = bc_copy_num(power); | ||
| calcscale = pwrscale; | ||
| exponent = exponent >> 1; | ||
|
|
||
| /* Do the calculation. */ | ||
| while (exponent > 0) { | ||
| pwrscale = 2 * pwrscale; | ||
| bc_square_ex(power, &power, pwrscale); | ||
| if ((exponent & 1) == 1) { | ||
| calcscale = pwrscale + calcscale; | ||
| bc_multiply_ex(temp, power, &temp, calcscale); | ||
| } | ||
| exponent = exponent >> 1; | ||
| /* check overflow */ | ||
| if (UNEXPECTED(base->n_len > SIZE_T_MAX / exponent)) { | ||
| bc_free_num (result); | ||
| *result = bc_copy_num(BCG(_one_)); | ||
|
||
| return BC_RAISE_STATUS_LEN_IS_OVERFLOW; | ||
| } | ||
| if (UNEXPECTED(base->n_scale > SIZE_T_MAX / exponent)) { | ||
| bc_free_num (result); | ||
| *result = bc_copy_num(BCG(_one_)); | ||
| return BC_RAISE_STATUS_SCALE_IS_OVERFLOW; | ||
| } | ||
|
|
||
| size_t base_len = base->n_len + base->n_scale; | ||
| size_t power_len = base->n_len * exponent; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder which of these can overflow There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added check in b96f2d9 |
||
| size_t power_scale = base->n_scale * exponent; | ||
|
|
||
| /* check overflow */ | ||
| if (UNEXPECTED(power_len > SIZE_T_MAX - power_scale)) { | ||
| bc_free_num (result); | ||
| *result = bc_copy_num(BCG(_one_)); | ||
| return BC_RAISE_STATUS_FULLLEN_IS_OVERFLOW; | ||
| } | ||
| size_t power_full_len = power_len + power_scale; | ||
|
|
||
| sign power_sign; | ||
| if (base->n_sign == MINUS && (exponent & 1) == 1) { | ||
| power_sign = MINUS; | ||
| } else { | ||
| power_sign = PLUS; | ||
| } | ||
|
|
||
| const char *base_end = base->n_value + base_len - 1; | ||
|
|
||
| bc_num power; | ||
| if (base_len <= BC_VECTOR_SIZE && power_full_len <= BC_VECTOR_SIZE * 2) { | ||
| power = bc_fast_raise(base_end, exponent, base_len, power_len, power_scale, power_full_len); | ||
| } else { | ||
| power = bc_standard_raise(base->n_value, base_end, exponent, base_len, power_scale); | ||
| } | ||
|
|
||
| _bc_rm_leading_zeros(power); | ||
| if (bc_is_zero(power)) { | ||
| power->n_sign = PLUS; | ||
| power->n_scale = 0; | ||
| } else { | ||
| power->n_sign = power_sign; | ||
| } | ||
|
|
||
| /* Assign the value. */ | ||
| if (is_neg) { | ||
| if (bc_divide(BCG(_one_), temp, result, rscale) == false) { | ||
| bc_free_num (&temp); | ||
| if (bc_divide(BCG(_one_), power, result, rscale) == false) { | ||
| bc_free_num (&power); | ||
| return false; | ||
| return BC_RAISE_STATUS_DIVIDE_BY_ZERO; | ||
| } | ||
| bc_free_num (&temp); | ||
| bc_free_num (&power); | ||
| } else { | ||
| bc_free_num (result); | ||
| *result = temp; | ||
| *result = power; | ||
| (*result)->n_scale = MIN(scale, (*result)->n_scale); | ||
| } | ||
| bc_free_num (&power); | ||
| return true; | ||
| return BC_RAISE_STATUS_OK; | ||
| } | ||
|
|
||
| /* This is used internally by BCMath */ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code to handle the errors is duplicated. I think it would be great to have a separate function that throws the correct exception when bc_raise fails. Then, here and in
bcmath_number_pow_internalyou can call that function is bc_raise return value != BC_RAISE_STATUS_OK.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in 20c9309