Skip to content

Commit 55312b6

Browse files
authored
Bring the overflow behavior in bit shifts in sync with std (#395)
- `const fn` bit shifts for `Uint` return the overflow status as `CtChoice` (and set the result to zero in that case, which is documented, so it's a part of the API now). `Option` would be better for the vartime shifts, but its methods are not `const` yet in stable. - `shl/shr` for `BoxedUint` return `(Self, Choice)` (not `CtOption` since most of its methods need the type to be `ConditionallySelectable`, which `BoxedUint` isn't). The vartime equivalents return `Option<Self>`. - operator impls panic on overflow (which is the default behavior for built-in integers) - made the implementations in `uint/shl.rs` and `shr.rs` more uniform and improved vartime shift performance (before it was calling a constant-time shift-by-no-more-than-limb which added some overhead) - improved constant-time shift performance for `BoxedUint` by reducing the amount of allocations - added an optimized `BoxedUint::shl1()` implementation - added some inlines for `Limb` methods which improved shift performance noticeably - added more benchmarks for shifts and simplify benchmark hierarchy a little (create test group directly in the respective function) - fixed an inefficiency in `Uint` shifts: we need to iterate to log2(BITS-1), not log2(BITS), because that's the maximum size of the shift. - Renamed `sh(r/l)1_with_overflow()` to `sh(r/l)1_with_carry` to avoid confusion - in the context of shifts we call the shift being too large an overflow.
1 parent 7359ebc commit 55312b6

26 files changed

+641
-251
lines changed

Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ name = "boxed_residue"
5656
harness = false
5757
required-features = ["alloc"]
5858

59+
[[bench]]
60+
name = "boxed_uint"
61+
harness = false
62+
required-features = ["alloc"]
63+
5964
[[bench]]
6065
name = "dyn_residue"
6166
harness = false

benches/boxed_uint.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion};
2+
use crypto_bigint::BoxedUint;
3+
use rand_core::OsRng;
4+
5+
/// Size of `BoxedUint` to use in benchmark.
6+
const UINT_BITS: u32 = 4096;
7+
8+
fn bench_shifts(c: &mut Criterion) {
9+
let mut group = c.benchmark_group("bit shifts");
10+
11+
group.bench_function("shl_vartime", |b| {
12+
b.iter_batched(
13+
|| BoxedUint::random(&mut OsRng, UINT_BITS),
14+
|x| black_box(x.shl_vartime(UINT_BITS / 2 + 10)),
15+
BatchSize::SmallInput,
16+
)
17+
});
18+
19+
group.bench_function("shl", |b| {
20+
b.iter_batched(
21+
|| BoxedUint::random(&mut OsRng, UINT_BITS),
22+
|x| x.shl(UINT_BITS / 2 + 10),
23+
BatchSize::SmallInput,
24+
)
25+
});
26+
27+
group.bench_function("shr_vartime", |b| {
28+
b.iter_batched(
29+
|| BoxedUint::random(&mut OsRng, UINT_BITS),
30+
|x| black_box(x.shr_vartime(UINT_BITS / 2 + 10)),
31+
BatchSize::SmallInput,
32+
)
33+
});
34+
35+
group.bench_function("shr", |b| {
36+
b.iter_batched(
37+
|| BoxedUint::random(&mut OsRng, UINT_BITS),
38+
|x| x.shr(UINT_BITS / 2 + 10),
39+
BatchSize::SmallInput,
40+
)
41+
});
42+
43+
group.finish();
44+
}
45+
46+
criterion_group!(benches, bench_shifts);
47+
48+
criterion_main!(benches);

benches/uint.rs

Lines changed: 50 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
use criterion::{
2-
black_box, criterion_group, criterion_main, measurement::Measurement, BatchSize,
3-
BenchmarkGroup, Criterion,
4-
};
5-
use crypto_bigint::{Limb, NonZero, Random, Reciprocal, U128, U2048, U256};
1+
use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion};
2+
use crypto_bigint::{Limb, NonZero, Random, Reciprocal, Uint, U128, U2048, U256};
63
use rand_core::OsRng;
74

8-
fn bench_division<M: Measurement>(group: &mut BenchmarkGroup<'_, M>) {
5+
fn bench_division(c: &mut Criterion) {
6+
let mut group = c.benchmark_group("wrapping ops");
7+
98
group.bench_function("div/rem, U256/U128, full size", |b| {
109
b.iter_batched(
1110
|| {
@@ -69,9 +68,13 @@ fn bench_division<M: Measurement>(group: &mut BenchmarkGroup<'_, M>) {
6968
BatchSize::SmallInput,
7069
)
7170
});
71+
72+
group.finish();
7273
}
7374

74-
fn bench_shifts<M: Measurement>(group: &mut BenchmarkGroup<'_, M>) {
75+
fn bench_shl(c: &mut Criterion) {
76+
let mut group = c.benchmark_group("left shift");
77+
7578
group.bench_function("shl_vartime, small, U2048", |b| {
7679
b.iter_batched(|| U2048::ONE, |x| x.shl_vartime(10), BatchSize::SmallInput)
7780
});
@@ -84,16 +87,54 @@ fn bench_shifts<M: Measurement>(group: &mut BenchmarkGroup<'_, M>) {
8487
)
8588
});
8689

90+
group.bench_function("shl_vartime_wide, large, U2048", |b| {
91+
b.iter_batched(
92+
|| (U2048::ONE, U2048::ONE),
93+
|x| Uint::shl_vartime_wide(x, 1024 + 10),
94+
BatchSize::SmallInput,
95+
)
96+
});
97+
8798
group.bench_function("shl, U2048", |b| {
8899
b.iter_batched(|| U2048::ONE, |x| x.shl(1024 + 10), BatchSize::SmallInput)
89100
});
90101

102+
group.finish();
103+
}
104+
105+
fn bench_shr(c: &mut Criterion) {
106+
let mut group = c.benchmark_group("right shift");
107+
108+
group.bench_function("shr_vartime, small, U2048", |b| {
109+
b.iter_batched(|| U2048::ONE, |x| x.shr_vartime(10), BatchSize::SmallInput)
110+
});
111+
112+
group.bench_function("shr_vartime, large, U2048", |b| {
113+
b.iter_batched(
114+
|| U2048::ONE,
115+
|x| x.shr_vartime(1024 + 10),
116+
BatchSize::SmallInput,
117+
)
118+
});
119+
120+
group.bench_function("shr_vartime_wide, large, U2048", |b| {
121+
b.iter_batched(
122+
|| (U2048::ONE, U2048::ONE),
123+
|x| Uint::shr_vartime_wide(x, 1024 + 10),
124+
BatchSize::SmallInput,
125+
)
126+
});
127+
91128
group.bench_function("shr, U2048", |b| {
92129
b.iter_batched(|| U2048::ONE, |x| x.shr(1024 + 10), BatchSize::SmallInput)
93130
});
131+
132+
group.finish();
94133
}
95134

96-
fn bench_inv_mod<M: Measurement>(group: &mut BenchmarkGroup<'_, M>) {
135+
fn bench_inv_mod(c: &mut Criterion) {
136+
let mut group = c.benchmark_group("modular ops");
137+
97138
group.bench_function("inv_odd_mod, U256", |b| {
98139
b.iter_batched(
99140
|| {
@@ -144,21 +185,10 @@ fn bench_inv_mod<M: Measurement>(group: &mut BenchmarkGroup<'_, M>) {
144185
BatchSize::SmallInput,
145186
)
146187
});
147-
}
148188

149-
fn bench_wrapping_ops(c: &mut Criterion) {
150-
let mut group = c.benchmark_group("wrapping ops");
151-
bench_division(&mut group);
152-
group.finish();
153-
}
154-
155-
fn bench_modular_ops(c: &mut Criterion) {
156-
let mut group = c.benchmark_group("modular ops");
157-
bench_shifts(&mut group);
158-
bench_inv_mod(&mut group);
159189
group.finish();
160190
}
161191

162-
criterion_group!(benches, bench_wrapping_ops, bench_modular_ops);
192+
criterion_group!(benches, bench_shl, bench_shr, bench_division, bench_inv_mod);
163193

164194
criterion_main!(benches);

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
//! U256::from_be_hex("ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551");
4646
//!
4747
//! // Compute `MODULUS` shifted right by 1 at compile time
48-
//! pub const MODULUS_SHR1: U256 = MODULUS.shr(1);
48+
//! pub const MODULUS_SHR1: U256 = MODULUS.shr(1).0;
4949
//! ```
5050
//!
5151
//! Note that large constant computations may accidentally trigger a the `const_eval_limit` of the compiler.

src/limb/bit_not.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use core::ops::Not;
55

66
impl Limb {
77
/// Calculates `!a`.
8+
#[inline(always)]
89
pub const fn not(self) -> Self {
910
Limb(!self.0)
1011
}

src/limb/bit_or.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use core::ops::{BitOr, BitOrAssign};
55

66
impl Limb {
77
/// Calculates `a | b`.
8+
#[inline(always)]
89
pub const fn bitor(self, rhs: Self) -> Self {
910
Limb(self.0 | rhs.0)
1011
}

src/limb/bit_xor.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use core::ops::BitXor;
55

66
impl Limb {
77
/// Calculates `a ^ b`.
8+
#[inline(always)]
89
pub const fn bitxor(self, rhs: Self) -> Self {
910
Limb(self.0 ^ rhs.0)
1011
}

src/limb/bits.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,25 @@ use super::Limb;
22

33
impl Limb {
44
/// Calculate the number of bits needed to represent this number.
5+
#[inline(always)]
56
pub const fn bits(self) -> u32 {
67
Limb::BITS - self.0.leading_zeros()
78
}
89

910
/// Calculate the number of leading zeros in the binary representation of this number.
11+
#[inline(always)]
1012
pub const fn leading_zeros(self) -> u32 {
1113
self.0.leading_zeros()
1214
}
1315

1416
/// Calculate the number of trailing zeros in the binary representation of this number.
17+
#[inline(always)]
1518
pub const fn trailing_zeros(self) -> u32 {
1619
self.0.trailing_zeros()
1720
}
1821

1922
/// Calculate the number of trailing ones the binary representation of this number.
23+
#[inline(always)]
2024
pub const fn trailing_ones(self) -> u32 {
2125
self.0.trailing_ones()
2226
}

src/limb/mul.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ impl Limb {
1717
}
1818

1919
/// Perform saturating multiplication.
20-
#[inline]
20+
#[inline(always)]
2121
pub const fn saturating_mul(&self, rhs: Self) -> Self {
2222
Limb(self.0.saturating_mul(rhs.0))
2323
}

src/limb/shl.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ impl Limb {
1010
pub const fn shl(self, shift: u32) -> Self {
1111
Limb(self.0 << shift)
1212
}
13+
14+
/// Computes `self << 1` and return the result and the carry (0 or 1).
15+
#[inline(always)]
16+
pub(crate) const fn shl1(self) -> (Self, Self) {
17+
(Self(self.0 << 1), Self(self.0 >> Self::HI_BIT))
18+
}
1319
}
1420

1521
impl Shl<u32> for Limb {

0 commit comments

Comments
 (0)