Skip to content

Commit a0b43e0

Browse files
Move schoolbook squaring implementation into a macro (#648)
Signed-off-by: Andrew Whitehead <[email protected]>
1 parent 759684d commit a0b43e0

File tree

4 files changed

+170
-69
lines changed

4 files changed

+170
-69
lines changed

benches/boxed_uint.rs

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,31 @@ fn bench_shifts(c: &mut Criterion) {
4343
group.finish();
4444
}
4545

46+
fn bench_mul(c: &mut Criterion) {
47+
let mut group = c.benchmark_group("wrapping ops");
48+
49+
group.bench_function("boxed_mul", |b| {
50+
b.iter_batched(
51+
|| {
52+
(
53+
BoxedUint::random_bits(&mut OsRng, UINT_BITS),
54+
BoxedUint::random_bits(&mut OsRng, UINT_BITS),
55+
)
56+
},
57+
|(x, y)| black_box(x.mul(&y)),
58+
BatchSize::SmallInput,
59+
)
60+
});
61+
62+
group.bench_function("boxed_square", |b| {
63+
b.iter_batched(
64+
|| BoxedUint::random_bits(&mut OsRng, UINT_BITS),
65+
|x| black_box(x.square()),
66+
BatchSize::SmallInput,
67+
)
68+
});
69+
}
70+
4671
fn bench_division(c: &mut Criterion) {
4772
let mut group = c.benchmark_group("wrapping ops");
4873

@@ -156,6 +181,12 @@ fn bench_boxed_sqrt(c: &mut Criterion) {
156181
});
157182
}
158183

159-
criterion_group!(benches, bench_division, bench_shifts, bench_boxed_sqrt);
184+
criterion_group!(
185+
benches,
186+
bench_mul,
187+
bench_division,
188+
bench_shifts,
189+
bench_boxed_sqrt
190+
);
160191

161192
criterion_main!(benches);

benches/uint.rs

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,43 @@
11
use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion};
2-
use crypto_bigint::{Limb, NonZero, Odd, Random, Reciprocal, Uint, U128, U2048, U256};
2+
use crypto_bigint::{Limb, NonZero, Odd, Random, Reciprocal, Uint, U128, U2048, U256, U4096};
33
use rand_core::OsRng;
44

5+
fn bench_mul(c: &mut Criterion) {
6+
let mut group = c.benchmark_group("wrapping ops");
7+
8+
group.bench_function("split_mul, U256xU256", |b| {
9+
b.iter_batched(
10+
|| (U256::random(&mut OsRng), U256::random(&mut OsRng)),
11+
|(x, y)| black_box(x.split_mul(&y)),
12+
BatchSize::SmallInput,
13+
)
14+
});
15+
16+
group.bench_function("split_mul, U4096xU4096", |b| {
17+
b.iter_batched(
18+
|| (U4096::random(&mut OsRng), U4096::random(&mut OsRng)),
19+
|(x, y)| black_box(x.split_mul(&y)),
20+
BatchSize::SmallInput,
21+
)
22+
});
23+
24+
group.bench_function("square_wide, U256", |b| {
25+
b.iter_batched(
26+
|| U256::random(&mut OsRng),
27+
|x| black_box(x.square_wide()),
28+
BatchSize::SmallInput,
29+
)
30+
});
31+
32+
group.bench_function("square_wide, U4096", |b| {
33+
b.iter_batched(
34+
|| U4096::random(&mut OsRng),
35+
|x| black_box(x.square_wide()),
36+
BatchSize::SmallInput,
37+
)
38+
});
39+
}
40+
541
fn bench_division(c: &mut Criterion) {
642
let mut group = c.benchmark_group("wrapping ops");
743

@@ -288,6 +324,7 @@ fn bench_sqrt(c: &mut Criterion) {
288324

289325
criterion_group!(
290326
benches,
327+
bench_mul,
291328
bench_division,
292329
bench_gcd,
293330
bench_shl,

src/uint/boxed/mul.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
//! [`BoxedUint`] multiplication operations.
22
33
use crate::{
4-
uint::mul::mul_limbs, BoxedUint, CheckedMul, Limb, WideningMul, Wrapping, WrappingMul, Zero,
4+
uint::mul::{mul_limbs, square_limbs},
5+
BoxedUint, CheckedMul, Limb, WideningMul, Wrapping, WrappingMul, Zero,
56
};
67
use core::ops::{Mul, MulAssign};
78
use subtle::{Choice, CtOption};
@@ -23,8 +24,9 @@ impl BoxedUint {
2324

2425
/// Multiply `self` by itself.
2526
pub fn square(&self) -> Self {
26-
// TODO(tarcieri): more optimized implementation (shared with `Uint`?)
27-
self.mul(self)
27+
let mut limbs = vec![Limb::ZERO; self.nlimbs() * 2];
28+
square_limbs(&self.limbs, &mut limbs);
29+
limbs.into()
2830
}
2931
}
3032

src/uint/mul.rs

Lines changed: 95 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use crate::{
88
use core::ops::{Mul, MulAssign};
99
use subtle::CtOption;
1010

11-
/// Impl the core schoolbook multiplication algorithm.
11+
/// Implement the core schoolbook multiplication algorithm.
1212
///
1313
/// This is implemented as a macro to abstract over `const fn` and boxed use cases, since the latter
1414
/// needs mutable references and thus the unstable `const_mut_refs` feature (rust-lang/rust#57349).
@@ -53,113 +53,136 @@ macro_rules! impl_schoolbook_multiplication {
5353
}};
5454
}
5555

56-
impl<const LIMBS: usize> Uint<LIMBS> {
57-
/// Multiply `self` by `rhs`, returning a concatenated "wide" result.
58-
pub const fn widening_mul<const RHS_LIMBS: usize, const WIDE_LIMBS: usize>(
59-
&self,
60-
rhs: &Uint<RHS_LIMBS>,
61-
) -> Uint<WIDE_LIMBS>
62-
where
63-
Self: ConcatMixed<Uint<RHS_LIMBS>, MixedOutput = Uint<WIDE_LIMBS>>,
64-
{
65-
let (lo, hi) = self.split_mul(rhs);
66-
Uint::concat_mixed(&lo, &hi)
67-
}
68-
69-
/// Compute "wide" multiplication as a 2-tuple containing the `(lo, hi)` components of the product, whose sizes
70-
/// correspond to the sizes of the operands.
71-
pub const fn split_mul<const RHS_LIMBS: usize>(
72-
&self,
73-
rhs: &Uint<RHS_LIMBS>,
74-
) -> (Self, Uint<RHS_LIMBS>) {
75-
let mut lo = Self::ZERO;
76-
let mut hi = Uint::<RHS_LIMBS>::ZERO;
77-
impl_schoolbook_multiplication!(&self.limbs, &rhs.limbs, lo.limbs, hi.limbs);
78-
(lo, hi)
79-
}
80-
81-
/// Perform wrapping multiplication, discarding overflow.
82-
pub const fn wrapping_mul<const H: usize>(&self, rhs: &Uint<H>) -> Self {
83-
self.split_mul(rhs).0
84-
}
85-
86-
/// Perform saturating multiplication, returning `MAX` on overflow.
87-
pub const fn saturating_mul<const RHS_LIMBS: usize>(&self, rhs: &Uint<RHS_LIMBS>) -> Self {
88-
let (res, overflow) = self.split_mul(rhs);
89-
Self::select(&res, &Self::MAX, overflow.is_nonzero())
90-
}
91-
92-
/// Square self, returning a "wide" result in two parts as (lo, hi).
93-
pub const fn square_wide(&self) -> (Self, Self) {
56+
/// Implement the schoolbook method for squaring.
57+
///
58+
/// Like schoolbook multiplication, but only considering half of the multiplication grid.
59+
// TODO: change this into a `const fn` when `const_mut_refs` is stable.
60+
macro_rules! impl_schoolbook_squaring {
61+
($limbs:expr, $lo:expr, $hi:expr) => {{
9462
// Translated from https://github.com/ucbrise/jedi-pairing/blob/c4bf151/include/core/bigint.hpp#L410
9563
//
9664
// Permission to relicense the resulting translation as Apache 2.0 + MIT was given
9765
// by the original author Sam Kumar: https://github.com/RustCrypto/crypto-bigint/pull/133#discussion_r1056870411
98-
let mut lo = Self::ZERO;
99-
let mut hi = Self::ZERO;
10066

101-
// Schoolbook multiplication, but only considering half of the multiplication grid
67+
if $limbs.len() != $lo.len() || $lo.len() != $hi.len() {
68+
panic!("schoolbook squaring length mismatch");
69+
}
70+
10271
let mut i = 1;
103-
while i < LIMBS {
72+
while i < $limbs.len() {
10473
let mut j = 0;
10574
let mut carry = Limb::ZERO;
10675

10776
while j < i {
10877
let k = i + j;
10978

110-
if k >= LIMBS {
111-
let (n, c) = hi.limbs[k - LIMBS].mac(self.limbs[i], self.limbs[j], carry);
112-
hi.limbs[k - LIMBS] = n;
79+
if k >= $limbs.len() {
80+
let (n, c) = $hi[k - $limbs.len()].mac($limbs[i], $limbs[j], carry);
81+
$hi[k - $limbs.len()] = n;
11382
carry = c;
11483
} else {
115-
let (n, c) = lo.limbs[k].mac(self.limbs[i], self.limbs[j], carry);
116-
lo.limbs[k] = n;
84+
let (n, c) = $lo[k].mac($limbs[i], $limbs[j], carry);
85+
$lo[k] = n;
11786
carry = c;
11887
}
11988

12089
j += 1;
12190
}
12291

123-
if (2 * i) < LIMBS {
124-
lo.limbs[2 * i] = carry;
92+
if (2 * i) < $limbs.len() {
93+
$lo[2 * i] = carry;
12594
} else {
126-
hi.limbs[2 * i - LIMBS] = carry;
95+
$hi[2 * i - $limbs.len()] = carry;
12796
}
12897

12998
i += 1;
13099
}
131100

132101
// Double the current result, this accounts for the other half of the multiplication grid.
133-
// TODO: The top word is empty so we can also use a special purpose shl.
134-
(lo, hi) = Self::overflowing_shl_vartime_wide((lo, hi), 1).expect("shift within range");
102+
// The top word is empty, so we use a special purpose shl.
103+
let mut carry = Limb::ZERO;
104+
let mut i = 0;
105+
while i < $limbs.len() {
106+
($lo[i].0, carry) = ($lo[i].0 << 1 | carry.0, $lo[i].shr(Limb::BITS - 1));
107+
i += 1;
108+
}
109+
i = 0;
110+
while i < $limbs.len() - 1 {
111+
($hi[i].0, carry) = ($hi[i].0 << 1 | carry.0, $hi[i].shr(Limb::BITS - 1));
112+
i += 1;
113+
}
114+
$hi[$limbs.len() - 1] = carry;
135115

136116
// Handle the diagonal of the multiplication grid, which finishes the multiplication grid.
137117
let mut carry = Limb::ZERO;
138118
let mut i = 0;
139-
while i < LIMBS {
140-
if (i * 2) < LIMBS {
141-
let (n, c) = lo.limbs[i * 2].mac(self.limbs[i], self.limbs[i], carry);
142-
lo.limbs[i * 2] = n;
119+
while i < $limbs.len() {
120+
if (i * 2) < $limbs.len() {
121+
let (n, c) = $lo[i * 2].mac($limbs[i], $limbs[i], carry);
122+
$lo[i * 2] = n;
143123
carry = c;
144124
} else {
145-
let (n, c) = hi.limbs[i * 2 - LIMBS].mac(self.limbs[i], self.limbs[i], carry);
146-
hi.limbs[i * 2 - LIMBS] = n;
125+
let (n, c) = $hi[i * 2 - $limbs.len()].mac($limbs[i], $limbs[i], carry);
126+
$hi[i * 2 - $limbs.len()] = n;
147127
carry = c;
148128
}
149129

150-
if (i * 2 + 1) < LIMBS {
151-
let (n, c) = lo.limbs[i * 2 + 1].overflowing_add(carry);
152-
lo.limbs[i * 2 + 1] = n;
130+
if (i * 2 + 1) < $limbs.len() {
131+
let (n, c) = $lo[i * 2 + 1].overflowing_add(carry);
132+
$lo[i * 2 + 1] = n;
153133
carry = c;
154134
} else {
155-
let (n, c) = hi.limbs[i * 2 + 1 - LIMBS].overflowing_add(carry);
156-
hi.limbs[i * 2 + 1 - LIMBS] = n;
135+
let (n, c) = $hi[i * 2 + 1 - $limbs.len()].overflowing_add(carry);
136+
$hi[i * 2 + 1 - $limbs.len()] = n;
157137
carry = c;
158138
}
159139

160140
i += 1;
161141
}
142+
}};
143+
}
144+
145+
impl<const LIMBS: usize> Uint<LIMBS> {
146+
/// Multiply `self` by `rhs`, returning a concatenated "wide" result.
147+
pub const fn widening_mul<const RHS_LIMBS: usize, const WIDE_LIMBS: usize>(
148+
&self,
149+
rhs: &Uint<RHS_LIMBS>,
150+
) -> Uint<WIDE_LIMBS>
151+
where
152+
Self: ConcatMixed<Uint<RHS_LIMBS>, MixedOutput = Uint<WIDE_LIMBS>>,
153+
{
154+
let (lo, hi) = self.split_mul(rhs);
155+
Uint::concat_mixed(&lo, &hi)
156+
}
157+
158+
/// Compute "wide" multiplication as a 2-tuple containing the `(lo, hi)` components of the product, whose sizes
159+
/// correspond to the sizes of the operands.
160+
pub const fn split_mul<const RHS_LIMBS: usize>(
161+
&self,
162+
rhs: &Uint<RHS_LIMBS>,
163+
) -> (Self, Uint<RHS_LIMBS>) {
164+
let mut lo = Self::ZERO;
165+
let mut hi = Uint::<RHS_LIMBS>::ZERO;
166+
impl_schoolbook_multiplication!(&self.limbs, &rhs.limbs, lo.limbs, hi.limbs);
167+
(lo, hi)
168+
}
169+
170+
/// Perform wrapping multiplication, discarding overflow.
171+
pub const fn wrapping_mul<const H: usize>(&self, rhs: &Uint<H>) -> Self {
172+
self.split_mul(rhs).0
173+
}
174+
175+
/// Perform saturating multiplication, returning `MAX` on overflow.
176+
pub const fn saturating_mul<const RHS_LIMBS: usize>(&self, rhs: &Uint<RHS_LIMBS>) -> Self {
177+
let (res, overflow) = self.split_mul(rhs);
178+
Self::select(&res, &Self::MAX, overflow.is_nonzero())
179+
}
162180

181+
/// Square self, returning a "wide" result in two parts as (lo, hi).
182+
pub const fn square_wide(&self) -> (Self, Self) {
183+
let mut lo = Self::ZERO;
184+
let mut hi = Self::ZERO;
185+
impl_schoolbook_squaring!(&self.limbs, lo.limbs, hi.limbs);
163186
(lo, hi)
164187
}
165188
}
@@ -280,6 +303,14 @@ pub(crate) fn mul_limbs(lhs: &[Limb], rhs: &[Limb], out: &mut [Limb]) {
280303
impl_schoolbook_multiplication!(lhs, rhs, lo, hi);
281304
}
282305

306+
/// Wrapper function used by `BoxedUint`
307+
#[cfg(feature = "alloc")]
308+
pub(crate) fn square_limbs(limbs: &[Limb], out: &mut [Limb]) {
309+
debug_assert_eq!(limbs.len() * 2, out.len());
310+
let (lo, hi) = out.split_at_mut(limbs.len());
311+
impl_schoolbook_squaring!(limbs, lo, hi);
312+
}
313+
283314
#[cfg(test)]
284315
mod tests {
285316
use crate::{CheckedMul, Zero, U128, U192, U256, U64};

0 commit comments

Comments
 (0)