Skip to content

Commit 5251791

Browse files
committed
optimize: pow when base is a power of two
if base == 2 ** k, then (2 ** k) ** n == 2 ** (k * n) == 1 << (k * n)
1 parent 759e81a commit 5251791

File tree

5 files changed

+87
-2
lines changed

5 files changed

+87
-2
lines changed

library/core/src/num/uint_macros.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2078,6 +2078,17 @@ macro_rules! uint_impl {
20782078
let mut base = self;
20792079
let mut acc: Self = 1;
20802080

2081+
if intrinsics::is_val_statically_known(base) && base.is_power_of_two() {
2082+
// change of base:
2083+
// if base == 2 ** k, then
2084+
// (2 ** k) ** n
2085+
// == 2 ** (k * n)
2086+
// == 1 << (k * n)
2087+
let k = base.ilog2();
2088+
let shift = try_opt!(k.checked_mul(exp));
2089+
return (1 as Self).checked_shl(shift);
2090+
}
2091+
20812092
if intrinsics::is_val_statically_known(exp) {
20822093
while exp > 1 {
20832094
if (exp & 1) == 1 {
@@ -3246,6 +3257,19 @@ macro_rules! uint_impl {
32463257
let mut overflow = false;
32473258
let mut tmp_overflow;
32483259

3260+
if intrinsics::is_val_statically_known(base) && base.is_power_of_two() {
3261+
// change of base:
3262+
// if base == 2 ** k, then
3263+
// (2 ** k) ** n
3264+
// == 2 ** (k * n)
3265+
// == 1 << (k * n)
3266+
let k = base.ilog2();
3267+
let Some(shift) = k.checked_mul(exp) else {
3268+
return (0, true)
3269+
};
3270+
return ((1 as Self).unbounded_shl(shift), shift >= Self::BITS)
3271+
}
3272+
32493273
if intrinsics::is_val_statically_known(exp) {
32503274
while exp > 1 {
32513275
if (exp & 1) == 1 {
@@ -3301,6 +3325,20 @@ macro_rules! uint_impl {
33013325
let mut base = self;
33023326
let mut acc = 1;
33033327

3328+
if intrinsics::is_val_statically_known(base) && base.is_power_of_two() {
3329+
// change of base:
3330+
// if base == 2 ** k, then
3331+
// (2 ** k) ** n
3332+
// == 2 ** (k * n)
3333+
// == 1 << (k * n)
3334+
let k = base.ilog2();
3335+
let shift = k * exp;
3336+
// Panic on overflow if `-C overflow-checks` is enabled.
3337+
// Otherwise will be optimized out
3338+
let _overflow_check = (1 as Self) << shift;
3339+
return (1 as Self).unbounded_shl(shift)
3340+
}
3341+
33043342
if intrinsics::is_val_statically_known(exp) {
33053343
while exp > 1 {
33063344
if (exp & 1) == 1 {

library/coretests/tests/num/uint_macros.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,17 @@ macro_rules! uint_module {
443443
assert_eq_const_safe!($T: R.saturating_pow(129), $T::MAX);
444444
}
445445

446+
// overflow in the shift caclculation should result in the final
447+
// result being 0 rather than accidentally succeeding due to a
448+
// shift within the word size
449+
// ie `4 ** 0x8000_0000` should give 0 rather than 1 << 0
450+
{
451+
const R: $T = 4;
452+
const HALF: u32 = u32::MAX / 2 + 1;
453+
assert_eq_const_safe!($T: R.wrapping_pow(HALF), 0 as $T);
454+
assert_eq_const_safe!(($T, bool): R.overflowing_pow(HALF), (0 as $T, true));
455+
}
456+
446457
{
447458
const R: $T = $T::MAX;
448459
assert_eq_const_safe!($T: R.wrapping_pow(0), 1 as $T);
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//@ compile-flags: -Copt-level=3
2+
// Test that `pow` can use a faster implementation when `base` is a
3+
// known power of two
4+
5+
#![crate_type = "lib"]
6+
7+
// CHECK-LABEL: @pow2
8+
#[no_mangle]
9+
pub fn pow2(exp: u32) -> u32 {
10+
// CHECK: %[[SHIFT_AMOUNT:.+]] = and i32 %exp, 31
11+
// CHECK: %[[POW2:.+]] = shl nuw i32 1, %[[SHIFT_AMOUNT]]
12+
// CHECK: ret i32 %[[POW2]]
13+
2u32.pow(exp)
14+
}
15+
16+
// 4 ** n == 2 ** (2 * n) == 1 << (2 * n)
17+
// CHECK-LABEL: @pow4
18+
#[no_mangle]
19+
pub fn pow4(exp: u32) -> u32 {
20+
// CHECK: %[[EXP2:.+]] = shl i32 %exp, 1
21+
// CHECK: %[[SHIFT_AMOUNT:.+]] = and i32 %[[EXP2]], 30
22+
// CHECK: %[[POW4:.+]] = shl nuw nsw i32 1, %[[SHIFT_AMOUNT]]
23+
// CHECK: ret i32 %[[POW4]]
24+
4u32.pow(exp)
25+
}
26+
27+
// 16 ** n == 2 ** (4 * n) == 1 << (4 * n)
28+
// CHECK-LABEL: @pow16
29+
#[no_mangle]
30+
pub fn pow16(exp: u32) -> u32 {
31+
// CHECK: %[[EXP2:.+]] = shl i32 %exp, 2
32+
// CHECK: %[[SHIFT_AMOUNT:.+]] = and i32 %[[EXP2]], 28
33+
// CHECK: %[[POW16:.+]] = shl nuw nsw i32 1, %[[SHIFT_AMOUNT]]
34+
// CHECK: ret i32 %[[POW16]]
35+
16u32.pow(exp)
36+
}

tests/ui/numbers-arithmetic/overflowing-pow-signed.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//@ run-fail
22
//@ regex-error-pattern: thread 'main'.*panicked
3-
//@ error-pattern: attempt to multiply with overflow
3+
//@ regex-error-pattern: attempt to (multiply|shift left) with overflow
44
//@ needs-subprocess
55
//@ compile-flags: -C debug-assertions
66

tests/ui/numbers-arithmetic/overflowing-pow-unsigned.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//@ run-fail
22
//@ regex-error-pattern: thread 'main'.*panicked
3-
//@ error-pattern: attempt to multiply with overflow
3+
//@ regex-error-pattern: attempt to (multiply|shift left) with overflow
44
//@ needs-subprocess
55
//@ compile-flags: -C debug-assertions
66

0 commit comments

Comments
 (0)