-
Notifications
You must be signed in to change notification settings - Fork 8k
ext/bcmath: Improving bcpow() performance
#18099
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
3ff726a
788a884
186214b
c1568f1
a42d2e1
f303d7d
757e3bc
d203371
2556951
b96f2d9
54729fd
91e3890
20c9309
9e5c9a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,24 +30,152 @@ | |
| *************************************************************************/ | ||
|
|
||
| #include "bcmath.h" | ||
| #include "convert.h" | ||
| #include "private.h" | ||
| #include <assert.h> | ||
| #include <stdbool.h> | ||
| #include <stddef.h> | ||
|
|
||
| void bc_square_ex(bc_num n1, bc_num *result, size_t scale_min) { | ||
| bc_num square_ex = bc_square(n1, scale_min); | ||
| bc_free_num(result); | ||
| *(result) = square_ex; | ||
| static inline size_t bc_multiply_vector_ex( | ||
| BC_VECTOR **n1_vector, size_t n1_arr_size, BC_VECTOR *n2_vector, size_t n2_arr_size, BC_VECTOR **result_vector) | ||
| { | ||
| size_t result_arr_size = n1_arr_size + n2_arr_size; | ||
| bc_multiply_vector(*n1_vector, n1_arr_size, n2_vector, n2_arr_size, *result_vector, result_arr_size); | ||
|
|
||
| /* Eliminate extra zeros because they increase the number of calculations. */ | ||
| while ((*result_vector)[result_arr_size - 1] == 0) { | ||
| result_arr_size--; | ||
| } | ||
|
|
||
| /* Swap n1_vector and result_vector. */ | ||
| BC_VECTOR *tmp = *n1_vector; | ||
| *n1_vector = *result_vector; | ||
| *result_vector = tmp; | ||
|
|
||
| return result_arr_size; | ||
| } | ||
|
|
||
| static inline size_t bc_square_vector_ex(BC_VECTOR **base_vector, size_t base_arr_size, BC_VECTOR **result_vector) | ||
| { | ||
| return bc_multiply_vector_ex(base_vector, base_arr_size, *base_vector, base_arr_size, result_vector); | ||
| } | ||
|
|
||
| /* Use "exponentiation by squaring". This is the fast path when the results are small. */ | ||
| static inline bc_num bc_fast_raise( | ||
| const char *base_end, long exponent, size_t base_len, size_t power_len, size_t power_scale, size_t power_full_len) | ||
| { | ||
| BC_VECTOR base_vector = 0; | ||
|
|
||
| /* Convert to BC_VECTOR[] */ | ||
| bc_convert_to_vector(&base_vector, base_end, base_len); | ||
|
|
||
| while ((exponent & 1) == 0) { | ||
| base_vector *= base_vector; | ||
| exponent >>= 1; | ||
| } | ||
|
|
||
| /* copy base to power */ | ||
| BC_VECTOR power_vector = base_vector; | ||
| exponent >>= 1; | ||
|
|
||
| while (exponent > 0) { | ||
| base_vector *= base_vector; | ||
| if ((exponent & 1) == 1) { | ||
| power_vector *= base_vector; | ||
| } | ||
| exponent >>= 1; | ||
| } | ||
|
|
||
| bc_num power = bc_new_num_nonzeroed(power_len, power_scale); | ||
| char *pptr = power->n_value; | ||
| char *pend = pptr + power_full_len - 1; | ||
|
|
||
| while (pend >= pptr) { | ||
| *pend-- = power_vector % BASE; | ||
| power_vector /= BASE; | ||
| } | ||
| return power; | ||
| } | ||
|
|
||
| /* Use "exponentiation by squaring". This is the standard path. */ | ||
| static bc_num bc_standard_raise( | ||
| const char *base_ptr, const char *base_end, long exponent, size_t base_len, size_t power_scale) | ||
| { | ||
| /* Remove the leading zeros as they will be filled in later. */ | ||
| while (*base_ptr++ == 0) { | ||
| base_len--; | ||
| } | ||
|
|
||
| size_t base_arr_size = BC_ARR_SIZE_FROM_LEN(base_len); | ||
| size_t max_power_arr_size = base_arr_size * exponent; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this overflow? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added check for |
||
|
|
||
| /* The allocated memory area is reused on a rotational basis, so the same size is required. */ | ||
| BC_VECTOR *buf = safe_emalloc(max_power_arr_size * 3, sizeof(BC_VECTOR), 0); | ||
|
||
| BC_VECTOR *base_vector = buf; | ||
| BC_VECTOR *power_vector = base_vector + max_power_arr_size; | ||
| BC_VECTOR *tmp_result_vector = power_vector + max_power_arr_size; | ||
|
|
||
| /* Convert to BC_VECTOR[] */ | ||
| bc_convert_to_vector(base_vector, base_end, base_len); | ||
|
|
||
| while ((exponent & 1) == 0) { | ||
| base_arr_size = bc_square_vector_ex(&base_vector, base_arr_size, &tmp_result_vector); | ||
| exponent >>= 1; | ||
| } | ||
|
|
||
| /* copy base to power */ | ||
| size_t power_arr_size = base_arr_size; | ||
| for (size_t i = 0; i < base_arr_size; i++) { | ||
| power_vector[i] = base_vector[i]; | ||
| } | ||
| exponent >>= 1; | ||
|
|
||
| while (exponent > 0) { | ||
| base_arr_size = bc_square_vector_ex(&base_vector, base_arr_size, &tmp_result_vector); | ||
| if ((exponent & 1) == 1) { | ||
| power_arr_size = bc_multiply_vector_ex(&power_vector, power_arr_size, base_vector, base_arr_size, &tmp_result_vector); | ||
| } | ||
| exponent >>= 1; | ||
| } | ||
|
|
||
| /* Convert to bc_num */ | ||
| size_t power_leading_zeros = 0; | ||
| size_t power_len; | ||
| size_t power_full_len = power_arr_size * BC_VECTOR_SIZE; | ||
| if (power_full_len > power_scale) { | ||
| power_len = power_full_len - power_scale; | ||
| } else { | ||
| power_len = 1; | ||
| power_leading_zeros = power_scale - power_full_len + 1; | ||
| power_full_len = power_scale + 1; | ||
| } | ||
| bc_num power = bc_new_num_nonzeroed(power_len, power_scale); | ||
|
|
||
| char *pptr = power->n_value; | ||
| char *pend = pptr + power_full_len - 1; | ||
|
|
||
| /* Pad with leading zeros if necessary. */ | ||
| while (power_leading_zeros > sizeof(uint32_t)) { | ||
| bc_write_bcd_representation(0, pptr); | ||
|
||
| pptr += sizeof(uint32_t); | ||
| power_leading_zeros -= sizeof(uint32_t); | ||
| } | ||
| for (size_t i = 0; i < power_leading_zeros; i++) { | ||
| *pptr++ = 0; | ||
| } | ||
|
|
||
| bc_convert_vector_to_char(power_vector, pptr, pend, power_arr_size); | ||
|
|
||
| efree(buf); | ||
|
|
||
| return power; | ||
| } | ||
|
|
||
| /* Raise "base" to the "exponent" power. The result is placed in RESULT. | ||
| Maximum exponent is LONG_MAX. If a "exponent" is not an integer, | ||
| only the integer part is used. */ | ||
| bool bc_raise(bc_num base, long exponent, bc_num *result, size_t scale) { | ||
| bc_num temp, power; | ||
| size_t rscale; | ||
| size_t pwrscale; | ||
| size_t calcscale; | ||
| bool is_neg; | ||
|
|
||
| /* Special case if exponent is a zero. */ | ||
|
|
@@ -67,43 +195,54 @@ bool bc_raise(bc_num base, long exponent, bc_num *result, size_t scale) { | |
| rscale = MIN (base->n_scale * exponent, MAX(scale, base->n_scale)); | ||
| } | ||
|
|
||
| /* Set initial value of temp. */ | ||
| power = bc_copy_num(base); | ||
| pwrscale = base->n_scale; | ||
| while ((exponent & 1) == 0) { | ||
| pwrscale = 2 * pwrscale; | ||
| bc_square_ex(power, &power, pwrscale); | ||
| exponent = exponent >> 1; | ||
| if (bc_is_zero(base)) { | ||
| bc_free_num(result); | ||
| *result = bc_copy_num(BCG(_zero_)); | ||
| /* If the exponent is negative, it divides by 0, so it is false. */ | ||
| return !is_neg; | ||
| } | ||
| temp = bc_copy_num(power); | ||
| calcscale = pwrscale; | ||
| exponent = exponent >> 1; | ||
|
|
||
| /* Do the calculation. */ | ||
| while (exponent > 0) { | ||
| pwrscale = 2 * pwrscale; | ||
| bc_square_ex(power, &power, pwrscale); | ||
| if ((exponent & 1) == 1) { | ||
| calcscale = pwrscale + calcscale; | ||
| bc_multiply_ex(temp, power, &temp, calcscale); | ||
| } | ||
| exponent = exponent >> 1; | ||
| size_t base_len = base->n_len + base->n_scale; | ||
| size_t power_len = base->n_len * exponent; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder which of these can overflow There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added check in b96f2d9 |
||
| size_t power_scale = base->n_scale * exponent; | ||
| size_t power_full_len = power_len + power_scale; | ||
|
|
||
| sign power_sign; | ||
| if (base->n_sign == MINUS && (exponent & 1) == 1) { | ||
| power_sign = MINUS; | ||
| } else { | ||
| power_sign = PLUS; | ||
| } | ||
|
|
||
| const char *base_end = base->n_value + base_len - 1; | ||
|
|
||
| bc_num power; | ||
| if (base_len <= BC_VECTOR_SIZE && power_full_len <= BC_VECTOR_SIZE * 2) { | ||
| power = bc_fast_raise(base_end, exponent, base_len, power_len, power_scale, power_full_len); | ||
| } else { | ||
| power = bc_standard_raise(base->n_value, base_end, exponent, base_len, power_scale); | ||
| } | ||
|
|
||
| _bc_rm_leading_zeros(power); | ||
| if (bc_is_zero(power)) { | ||
| power->n_sign = PLUS; | ||
| power->n_scale = 0; | ||
| } else { | ||
| power->n_sign = power_sign; | ||
| } | ||
|
|
||
| /* Assign the value. */ | ||
| if (is_neg) { | ||
| if (bc_divide(BCG(_one_), temp, result, rscale) == false) { | ||
| bc_free_num (&temp); | ||
| if (bc_divide(BCG(_one_), power, result, rscale) == false) { | ||
| bc_free_num (&power); | ||
| return false; | ||
| } | ||
| bc_free_num (&temp); | ||
| bc_free_num (&power); | ||
| } else { | ||
| bc_free_num (result); | ||
| *result = temp; | ||
| *result = power; | ||
| (*result)->n_scale = MIN(scale, (*result)->n_scale); | ||
| } | ||
| bc_free_num (&power); | ||
| return true; | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will already move base_ptr even if the first element is not 0. Did you mean this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, damn, you're absolutely right. Thanks!
Fixed in 2556951