Skip to content

Commit ca58cde

Browse files
authored
ENH: Optimize np.power for integer type (numpy#26045)
In this PR we optimize np.power(x, n) for integer types and a scalar argument n. The current implementation is a generic binary loop for the arguments. In the case n is a scalar (stride 0) we can optimize the loop.
1 parent dd33199 commit ca58cde

File tree

2 files changed

+58
-11
lines changed

2 files changed

+58
-11
lines changed

benchmarks/benchmarks/bench_ufunc.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -573,8 +573,26 @@ def time_pow(self, dtype):
573573
def time_pow_2(self, dtype):
574574
np.power(self.a, 2.0)
575575

576-
def time_pow_half(self, dype):
576+
def time_pow_half(self, dtype):
577577
np.power(self.a, 0.5)
578578

579579
def time_atan2(self, dtype):
580580
np.arctan2(self.a, self.b)
581+
582+
class BinaryBenchInteger(Benchmark):
583+
params = [np.int32, np.int64]
584+
param_names = ['dtype']
585+
586+
def setup(self, dtype):
587+
N = 1000000
588+
self.a = np.random.randint(20, size=N).astype(dtype)
589+
self.b = np.random.randint(4, size=N).astype(dtype)
590+
591+
def time_pow(self, dtype):
592+
np.power(self.a, self.b)
593+
594+
def time_pow_two(self, dtype):
595+
np.power(self.a, 2)
596+
597+
def time_pow_five(self, dtype):
598+
np.power(self.a, 5)

numpy/_core/src/umath/loops.c.src

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -471,13 +471,49 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 int
471471
}
472472
/**end repeat1**/
473473

474+
static inline @type@
475+
_@TYPE@_squared_exponentiation_helper(@type@ base, @type@ exponent_two, int first_bit) {
476+
// Helper method to calculate power using squared exponentiation
477+
// The algorithm is partly unrolled. The second and third argument are the exponent//2 and the first bit of the exponent
478+
@type@ out = first_bit ? base : 1;
479+
while (exponent_two > 0) {
480+
base *= base;
481+
if (exponent_two & 1) {
482+
out *= base;
483+
}
484+
exponent_two >>= 1;
485+
}
486+
return out;
487+
}
488+
474489
NPY_NO_EXPORT void
475490
@TYPE@_power(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
476491
{
492+
if (steps[1]==0) {
493+
// stride for second argument is 0
494+
BINARY_DEFS
495+
const @type@ in2 = *(@type@ *)ip2;
496+
#if @SIGNED@
497+
if (in2 < 0) {
498+
npy_gil_error(PyExc_ValueError,
499+
"Integers to negative integer powers are not allowed.");
500+
return;
501+
}
502+
#endif
503+
504+
int first_bit = in2 & 1;
505+
@type@ in2start = in2 >> 1;
506+
507+
BINARY_LOOP_SLIDING {
508+
@type@ in1 = *(@type@ *)ip1;
509+
510+
*((@type@ *) op1) = _@TYPE@_squared_exponentiation_helper(in1, in2start, first_bit);
511+
}
512+
return;
513+
}
477514
BINARY_LOOP {
478515
@type@ in1 = *(@type@ *)ip1;
479516
@type@ in2 = *(@type@ *)ip2;
480-
@type@ out;
481517

482518
#if @SIGNED@
483519
if (in2 < 0) {
@@ -495,16 +531,9 @@ NPY_NO_EXPORT void
495531
continue;
496532
}
497533

498-
out = in2 & 1 ? in1 : 1;
534+
int first_bit = in2 & 1;
499535
in2 >>= 1;
500-
while (in2 > 0) {
501-
in1 *= in1;
502-
if (in2 & 1) {
503-
out *= in1;
504-
}
505-
in2 >>= 1;
506-
}
507-
*((@type@ *) op1) = out;
536+
*((@type@ *) op1) = _@TYPE@_squared_exponentiation_helper(in1, in2, first_bit);
508537
}
509538
}
510539
/**end repeat**/

0 commit comments

Comments
 (0)