Skip to content

Commit 65fcc92

Browse files
committed
Share Reciprocal across Uint and BoxedUint
1 parent 3aeab1f commit 65fcc92

File tree

7 files changed

+241
-451
lines changed

7 files changed

+241
-451
lines changed

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ pub use crate::{
180180
limb::{Limb, WideWord, Word},
181181
non_zero::NonZero,
182182
traits::*,
183-
uint::div_limb::Reciprocal,
183+
uint::reciprocal::Reciprocal,
184184
uint::*,
185185
wrapping::Wrapping,
186186
};

src/uint.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ pub(crate) mod mul;
2323
mod mul_mod;
2424
mod neg;
2525
mod neg_mod;
26+
pub mod reciprocal;
2627
mod resize;
2728
mod shl;
2829
mod shr;

src/uint/boxed/div.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
//! [`BoxedUint`] division operations.
22
3-
use crate::{
4-
uint::boxed::div_limb::{self, Reciprocal},
5-
BoxedUint, CheckedDiv, Limb, NonZero, Wrapping,
6-
};
3+
use crate::{uint::boxed::div_limb, BoxedUint, CheckedDiv, Limb, NonZero, Reciprocal, Wrapping};
74
use core::ops::{Div, DivAssign, Rem, RemAssign};
85
use subtle::{Choice, ConstantTimeEq, ConstantTimeLess, CtOption};
96

src/uint/boxed/div_limb.rs

Lines changed: 3 additions & 222 deletions
Original file line numberDiff line numberDiff line change
@@ -1,226 +1,7 @@
11
//! Implementation of constant-time division via reciprocal precomputation, as described in
22
//! "Improved Division by Invariant Integers" by Niels Möller and Torbjorn Granlund
33
//! (DOI: 10.1109/TC.2010.143, <https://gmplib.org/~tege/division-paper.pdf>).
4-
use subtle::{Choice, ConditionallySelectable};
5-
6-
use crate::{BoxedUint, ConstChoice, Limb, NonZero, WideWord, Word};
7-
8-
/// Calculates the reciprocal of the given 32-bit divisor with the highmost bit set.
9-
#[cfg(target_pointer_width = "32")]
10-
pub const fn reciprocal(d: Word) -> Word {
11-
debug_assert!(d >= (1 << (Word::BITS - 1)));
12-
13-
let d0 = d & 1;
14-
let d10 = d >> 22;
15-
let d21 = (d >> 11) + 1;
16-
let d31 = (d >> 1) + d0;
17-
let v0 = short_div((1 << 24) - (1 << 14) + (1 << 9), 24, d10, 10);
18-
let (hi, _lo) = mulhilo(v0 * v0, d21);
19-
let v1 = (v0 << 4) - hi - 1;
20-
21-
// Checks that the expression for `e` can be simplified in the way we did below.
22-
debug_assert!(mulhilo(v1, d31).0 == (1 << 16) - 1);
23-
let e = Word::MAX - v1.wrapping_mul(d31) + 1 + (v1 >> 1) * d0;
24-
25-
let (hi, _lo) = mulhilo(v1, e);
26-
// Note: the paper does not mention a wrapping add here,
27-
// but the 64-bit version has it at this stage, and the function panics without it
28-
// when calculating a reciprocal for `Word::MAX`.
29-
let v2 = (v1 << 15).wrapping_add(hi >> 1);
30-
31-
// The paper has `(v2 + 1) * d / 2^32` (there's another 2^32, but it's accounted for later).
32-
// If `v2 == 2^32-1` this should give `d`, but we can't achieve this in our wrapping arithmetic.
33-
// Hence the `ct_select()`.
34-
let x = v2.wrapping_add(1);
35-
let (hi, _lo) = mulhilo(x, d);
36-
let hi = ConstChoice::from_u32_nonzero(x).select_word(d, hi);
37-
38-
v2.wrapping_sub(hi).wrapping_sub(d)
39-
}
40-
41-
/// Calculates the reciprocal of the given 64-bit divisor with the highmost bit set.
42-
#[cfg(target_pointer_width = "64")]
43-
pub const fn reciprocal(d: Word) -> Word {
44-
debug_assert!(d >= (1 << (Word::BITS - 1)));
45-
46-
let d0 = d & 1;
47-
let d9 = d >> 55;
48-
let d40 = (d >> 24) + 1;
49-
let d63 = (d >> 1) + d0;
50-
let v0 = short_div((1 << 19) - 3 * (1 << 8), 19, d9 as u32, 9) as u64;
51-
let v1 = (v0 << 11) - ((v0 * v0 * d40) >> 40) - 1;
52-
let v2 = (v1 << 13) + ((v1 * ((1 << 60) - v1 * d40)) >> 47);
53-
54-
// Checks that the expression for `e` can be simplified in the way we did below.
55-
debug_assert!(mulhilo(v2, d63).0 == (1 << 32) - 1);
56-
let e = Word::MAX - v2.wrapping_mul(d63) + 1 + (v2 >> 1) * d0;
57-
58-
let (hi, _lo) = mulhilo(v2, e);
59-
let v3 = (v2 << 31).wrapping_add(hi >> 1);
60-
61-
// The paper has `(v3 + 1) * d / 2^64` (there's another 2^64, but it's accounted for later).
62-
// If `v3 == 2^64-1` this should give `d`, but we can't achieve this in our wrapping arithmetic.
63-
// Hence the `ct_select()`.
64-
let x = v3.wrapping_add(1);
65-
let (hi, _lo) = mulhilo(x, d);
66-
let hi = ConstChoice::from_word_nonzero(x).select_word(d, hi);
67-
68-
v3.wrapping_sub(hi).wrapping_sub(d)
69-
}
70-
71-
/// Returns `u32::MAX` if `a < b` and `0` otherwise.
72-
#[inline]
73-
const fn lt(a: u32, b: u32) -> u32 {
74-
let bit = (((!a) & b) | (((!a) | b) & (a.wrapping_sub(b)))) >> (u32::BITS - 1);
75-
bit.wrapping_neg()
76-
}
77-
78-
/// Returns `a` if `c == 0` and `b` if `c == u32::MAX`.
79-
#[inline(always)]
80-
const fn select(a: u32, b: u32, c: u32) -> u32 {
81-
a ^ (c & (a ^ b))
82-
}
83-
84-
/// Calculates `dividend / divisor`, given `dividend` and `divisor`
85-
/// along with their maximum bitsizes.
86-
#[inline(always)]
87-
const fn short_div(dividend: u32, dividend_bits: u32, divisor: u32, divisor_bits: u32) -> u32 {
88-
// TODO: this may be sped up even more using the fact that `dividend` is a known constant.
89-
90-
// In the paper this is a table lookup, but since we want it to be constant-time,
91-
// we have to access all the elements of the table, which is quite large.
92-
// So this shift-and-subtract approach is actually faster.
93-
94-
// Passing `dividend_bits` and `divisor_bits` because calling `.leading_zeros()`
95-
// causes a significant slowdown, and we know those values anyway.
96-
97-
let mut dividend = dividend;
98-
let mut divisor = divisor << (dividend_bits - divisor_bits);
99-
let mut quotient: u32 = 0;
100-
let mut i = dividend_bits - divisor_bits + 1;
101-
102-
while i > 0 {
103-
i -= 1;
104-
let bit = lt(dividend, divisor);
105-
dividend = select(dividend.wrapping_sub(divisor), dividend, bit);
106-
divisor >>= 1;
107-
let inv_bit = !bit;
108-
quotient |= (inv_bit >> (u32::BITS - 1)) << i;
109-
}
110-
111-
quotient
112-
}
113-
114-
/// Multiplies `x` and `y`, returning the most significant
115-
/// and the least significant words as `(hi, lo)`.
116-
#[inline(always)]
117-
const fn mulhilo(x: Word, y: Word) -> (Word, Word) {
118-
let res = (x as WideWord) * (y as WideWord);
119-
((res >> Word::BITS) as Word, res as Word)
120-
}
121-
122-
/// Adds wide numbers represented by pairs of (most significant word, least significant word)
123-
/// and returns the result in the same format `(hi, lo)`.
124-
#[inline(always)]
125-
const fn addhilo(x_hi: Word, x_lo: Word, y_hi: Word, y_lo: Word) -> (Word, Word) {
126-
let res = (((x_hi as WideWord) << Word::BITS) | (x_lo as WideWord))
127-
+ (((y_hi as WideWord) << Word::BITS) | (y_lo as WideWord));
128-
((res >> Word::BITS) as Word, res as Word)
129-
}
130-
131-
/// Calculate the quotient and the remainder of the division of a wide word
132-
/// (supplied as high and low words) by `d`, with a precalculated reciprocal `v`.
133-
#[inline(always)]
134-
const fn div2by1(u1: Word, u0: Word, reciprocal: &Reciprocal) -> (Word, Word) {
135-
let d = reciprocal.divisor_normalized;
136-
137-
debug_assert!(d >= (1 << (Word::BITS - 1)));
138-
debug_assert!(u1 < d);
139-
140-
let (q1, q0) = mulhilo(reciprocal.reciprocal, u1);
141-
let (q1, q0) = addhilo(q1, q0, u1, u0);
142-
let q1 = q1.wrapping_add(1);
143-
let r = u0.wrapping_sub(q1.wrapping_mul(d));
144-
145-
let r_gt_q0 = ConstChoice::from_word_lt(q0, r);
146-
let q1 = r_gt_q0.select_word(q1, q1.wrapping_sub(1));
147-
let r = r_gt_q0.select_word(r, r.wrapping_add(d));
148-
149-
// If this was a normal `if`, we wouldn't need wrapping ops, because there would be no overflow.
150-
// But since we calculate both results either way, we have to wrap.
151-
// Added an assert to still check the lack of overflow in debug mode.
152-
debug_assert!(r < d || q1 < Word::MAX);
153-
let r_ge_d = ConstChoice::from_word_le(d, r);
154-
let q1 = r_ge_d.select_word(q1, q1.wrapping_add(1));
155-
let r = r_ge_d.select_word(r, r.wrapping_sub(d));
156-
157-
(q1, r)
158-
}
159-
160-
/// A pre-calculated reciprocal for division by a single limb.
161-
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
162-
pub struct Reciprocal {
163-
divisor_normalized: Word,
164-
shift: u32,
165-
reciprocal: Word,
166-
}
167-
168-
impl Reciprocal {
169-
/// Pre-calculates a reciprocal for a known divisor,
170-
/// to be used in the single-limb division later.
171-
pub const fn new(divisor: NonZero<Limb>) -> Self {
172-
let divisor = divisor.0;
173-
174-
// Assuming this is constant-time for primitive types.
175-
let shift = divisor.0.leading_zeros();
176-
177-
// Will not panic since divisor is non-zero
178-
let divisor_normalized = divisor.0 << shift;
179-
180-
Self {
181-
divisor_normalized,
182-
shift,
183-
reciprocal: reciprocal(divisor_normalized),
184-
}
185-
}
186-
187-
/// Returns a default instance of this object.
188-
/// It is a self-consistent `Reciprocal` that will not cause panics in functions that take it.
189-
///
190-
/// NOTE: intended for using it as a placeholder during compile-time array generation,
191-
/// don't rely on the contents.
192-
pub const fn default() -> Self {
193-
Self {
194-
divisor_normalized: Word::MAX,
195-
shift: 0,
196-
// The result of calling `reciprocal(Word::MAX)`
197-
// This holds both for 32- and 64-bit versions.
198-
reciprocal: 1,
199-
}
200-
}
201-
}
202-
203-
impl ConditionallySelectable for Reciprocal {
204-
fn conditional_select(a: &Self, b: &Self, choice: Choice) -> Self {
205-
Self {
206-
divisor_normalized: Word::conditional_select(
207-
&a.divisor_normalized,
208-
&b.divisor_normalized,
209-
choice,
210-
),
211-
shift: u32::conditional_select(&a.shift, &b.shift, choice),
212-
reciprocal: Word::conditional_select(&a.reciprocal, &b.reciprocal, choice),
213-
}
214-
}
215-
}
216-
217-
// `CtOption.map()` needs this; for some reason it doesn't use the value it already has
218-
// for the `None` branch.
219-
impl Default for Reciprocal {
220-
fn default() -> Self {
221-
Self::default()
222-
}
223-
}
4+
use crate::{uint::reciprocal::div2by1, BoxedUint, Limb, Reciprocal};
2245

2256
/// Divides `u` by the divisor encoded in the `reciprocal`, and returns
2267
/// the quotient and the remainder.
@@ -229,7 +10,7 @@ pub(crate) fn div_rem_limb_with_reciprocal(
22910
u: &BoxedUint,
23011
reciprocal: &Reciprocal,
23112
) -> (BoxedUint, Limb) {
232-
let (u_shifted, u_hi) = u.shl_limb(reciprocal.shift);
13+
let (u_shifted, u_hi) = u.shl_limb(reciprocal.shift());
23314
let mut r = u_hi.0;
23415
let mut q = vec![Limb::ZERO; u.limbs.len()];
23516

@@ -240,7 +21,7 @@ pub(crate) fn div_rem_limb_with_reciprocal(
24021
q[j] = Limb(qj);
24122
r = rj;
24223
}
243-
(BoxedUint { limbs: q.into() }, Limb(r >> reciprocal.shift))
24+
(BoxedUint { limbs: q.into() }, Limb(r >> reciprocal.shift()))
24425
}
24526

24627
#[cfg(test)]

src/uint/div.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
//! [`Uint`] division operations.
22
3-
use super::div_limb::{div_rem_limb_with_reciprocal, Reciprocal};
4-
use crate::{CheckedDiv, ConstChoice, Limb, NonZero, Uint, Word, Wrapping};
3+
use super::div_limb::div_rem_limb_with_reciprocal;
4+
use crate::{
5+
uint::reciprocal::Reciprocal, CheckedDiv, ConstChoice, Limb, NonZero, Uint, Word, Wrapping,
6+
};
57
use core::ops::{Div, DivAssign, Rem, RemAssign};
68
use subtle::CtOption;
79

0 commit comments

Comments
 (0)