Refactor Bernstein-Yang implementation(s) (#495)

tarcieri · web-flow · commit 336dda1b9676 · 2023-12-22T17:01:04.000-07:00
- Extracts a `divsteps` function in both the stack-allocated and boxed
  implementations which can be reused for both inversions and GCD
- Changes the boxed implementation to use more in-place operations
diff --git a/src/modular/bernstein_yang.rs b/src/modular/bernstein_yang.rs
@@ -75,18 +75,13 @@ impl<const SAT_LIMBS: usize, const UNSAT_LIMBS: usize>
     /// Returns either the adjusted modular multiplicative inverse for the argument or `None`
     /// depending on invertibility of the argument, i.e. its coprimality with the modulus
     pub const fn inv(&self, value: &Uint<SAT_LIMBS>) -> ConstCtOption<Uint<SAT_LIMBS>> {
-        let mut d = Int62L::ZERO;
-        let mut e = self.adjuster;
-        let mut f = self.modulus;
-        let mut g = Int62L::from_uint(value);
-        let mut delta = 1;
-        let mut matrix;
-
-        while !g.eq(&Int62L::ZERO) {
-            (delta, matrix) = jump(&f.0, &g.0, delta);
-            (f, g) = fg(f, g, matrix);
-            (d, e) = de(&self.modulus, self.inverse, d, e, matrix);
-        }
+        let (d, f) = divsteps(
+            self.adjuster,
+            self.modulus,
+            Int62L::from_uint(value),
+            self.inverse,
+        );
+
         // At this point the absolute value of "f" equals the greatest common divisor of the
         // integer to be inverted and the modulus the inverter was created for.
         // Thus, if "f" is neither 1 nor -1, then the sought inverse does not exist.
@@ -102,21 +97,11 @@ impl<const SAT_LIMBS: usize, const UNSAT_LIMBS: usize>
     /// `UNSAT_LIMBS` which are computed when defining `PrecomputeInverter::Inverter` for various
     /// `Uint` limb sizes.
     pub(crate) const fn gcd(f: &Uint<SAT_LIMBS>, g: &Uint<SAT_LIMBS>) -> Uint<SAT_LIMBS> {
-        let f_0 = Int62L::<UNSAT_LIMBS>::from_uint(f);
         let inverse = inv_mod2_62(f.as_words());
-
-        let mut d = Int62L::ZERO;
-        let mut e = Int62L::ONE;
-        let mut f = f_0;
-        let mut g = Int62L::from_uint(g);
-        let mut delta = 1;
-        let mut matrix;
-
-        while !g.eq(&Int62L::ZERO) {
-            (delta, matrix) = jump(&f.0, &g.0, delta);
-            (f, g) = fg(f, g, matrix);
-            (d, e) = de(&f_0, inverse, d, e, matrix);
-        }
+        let e = Int62L::<UNSAT_LIMBS>::ONE;
+        let f = Int62L::from_uint(f);
+        let g = Int62L::from_uint(g);
+        let (_, mut f) = divsteps(e, f, g, inverse);
 
         if f.is_negative() {
             f = f.neg();
@@ -188,6 +173,28 @@ const fn inv_mod2_62(value: &[Word]) -> i64 {
     (x.wrapping_mul(y.wrapping_add(1)) & (u64::MAX >> 2)) as i64
 }
 
+/// Algorithm `divsteps2` to compute (δₙ, fₙ, gₙ) = divstepⁿ(δ, f, g) as described in Figure 10.1
+/// of <https://eprint.iacr.org/2019/266.pdf>.
+const fn divsteps<const LIMBS: usize>(
+    mut e: Int62L<LIMBS>,
+    f_0: Int62L<LIMBS>,
+    mut g: Int62L<LIMBS>,
+    inverse: i64,
+) -> (Int62L<LIMBS>, Int62L<LIMBS>) {
+    let mut d = Int62L::ZERO;
+    let mut f = f_0;
+    let mut delta = 1;
+    let mut matrix;
+
+    while !g.eq(&Int62L::ZERO) {
+        (delta, matrix) = jump(&f.0, &g.0, delta);
+        (f, g) = fg(f, g, matrix);
+        (d, e) = de(&f_0, inverse, matrix, d, e);
+    }
+
+    (d, f)
+}
+
 /// Returns the Bernstein-Yang transition matrix multiplied by 2^62 and the new value of the
 /// delta variable for the 62 basic steps of the Bernstein-Yang method, which are to be
 /// performed sequentially for specified initial values of f, g and delta
@@ -252,9 +259,9 @@ const fn fg<const LIMBS: usize>(
 const fn de<const LIMBS: usize>(
     modulus: &Int62L<LIMBS>,
     inverse: i64,
+    t: Matrix,
     d: Int62L<LIMBS>,
     e: Int62L<LIMBS>,
-    t: Matrix,
 ) -> (Int62L<LIMBS>, Int62L<LIMBS>) {
     let mask = Int62L::<LIMBS>::MASK as i64;
     let mut md = t[0][0] * d.is_negative() as i64 + t[0][1] * e.is_negative() as i64;
diff --git a/src/modular/bernstein_yang/boxed.rs b/src/modular/bernstein_yang/boxed.rs
@@ -60,20 +60,9 @@ impl Inverter for BoxedBernsteinYangInverter {
 
     fn invert(&self, value: &BoxedUint) -> CtOption<Self::Output> {
         let mut d = BoxedInt62L::zero(self.modulus.0.len());
-        let mut e = self.adjuster.clone();
-        let mut f = self.modulus.clone();
         let mut g = BoxedInt62L::from(value).widen(d.0.len());
+        let f = divsteps(&mut d, &self.adjuster, &self.modulus, &mut g, self.inverse);
 
-        debug_assert_eq!(g.0.len(), self.modulus.0.len());
-
-        let mut delta = 1;
-        let mut matrix;
-
-        while !g.is_zero() {
-            (delta, matrix) = jump(&f.0, &g.0, delta);
-            (f, g) = fg(f, g, matrix);
-            (d, e) = de(&self.modulus, self.inverse, d, e, matrix);
-        }
         // At this point the absolute value of "f" equals the greatest common divisor of the
         // integer to be inverted and the modulus the inverter was created for.
         // Thus, if "f" is neither 1 nor -1, then the sought inverse does not exist.
@@ -85,28 +74,48 @@ impl Inverter for BoxedBernsteinYangInverter {
     }
 }
 
+/// Algorithm `divsteps2` to compute (δₙ, fₙ, gₙ) = divstepⁿ(δ, f, g) as described in Figure 10.1
+/// of <https://eprint.iacr.org/2019/266.pdf>.
+fn divsteps(
+    d: &mut BoxedInt62L,
+    e: &BoxedInt62L,
+    f_0: &BoxedInt62L,
+    g: &mut BoxedInt62L,
+    inverse: i64,
+) -> BoxedInt62L {
+    debug_assert_eq!(f_0.0.len(), g.0.len());
+
+    let mut e = e.clone();
+    let mut f = f_0.clone();
+    let mut delta = 1;
+    let mut matrix;
+
+    while !g.is_zero() {
+        (delta, matrix) = jump(&f.0, &g.0, delta);
+        fg(&mut f, g, matrix);
+        de(f_0, inverse, matrix, d, &mut e);
+    }
+
+    f
+}
+
 /// Returns the updated values of the variables f and g for specified initial ones and
 /// Bernstein-Yang transition matrix multiplied by 2^62. The returned vector is
 /// "matrix * (f, g)' / 2^62", where "'" is the transpose operator.
-fn fg(f: BoxedInt62L, g: BoxedInt62L, t: Matrix) -> (BoxedInt62L, BoxedInt62L) {
-    (
-        f.mul(t[0][0]).add(&g.mul(t[0][1])).shr(),
-        f.mul(t[1][0]).add(&g.mul(t[1][1])).shr(),
-    )
+fn fg(f: &mut BoxedInt62L, g: &mut BoxedInt62L, t: Matrix) {
+    // TODO(tarcieri): reduce allocations
+    let f2 = f.mul(t[0][0]).add(&g.mul(t[0][1])).shr();
+    let g2 = f.mul(t[1][0]).add(&g.mul(t[1][1])).shr();
+    *f = f2;
+    *g = g2;
 }
 
 /// Returns the updated values of the variables d and e for specified initial ones and
 /// Bernstein-Yang transition matrix multiplied by 2^62. The returned vector is congruent modulo
 /// M to "matrix * (d, e)' / 2^62 (mod M)", where M is the modulus the inverter was created for
 /// and "'" stands for the transpose operator. Both the input and output values lie in the
 /// interval (-2 * M, M).
-fn de(
-    modulus: &BoxedInt62L,
-    inverse: i64,
-    d: BoxedInt62L,
-    e: BoxedInt62L,
-    t: Matrix,
-) -> (BoxedInt62L, BoxedInt62L) {
+fn de(modulus: &BoxedInt62L, inverse: i64, t: Matrix, d: &mut BoxedInt62L, e: &mut BoxedInt62L) {
     let mask = BoxedInt62L::MASK as i64;
     let mut md = t[0][0] * d.is_negative() as i64 + t[0][1] * e.is_negative() as i64;
     let mut me = t[1][0] * d.is_negative() as i64 + t[1][1] * e.is_negative() as i64;
@@ -127,7 +136,8 @@ fn de(
     let cd = d.mul(t[0][0]).add(&e.mul(t[0][1])).add(&modulus.mul(md));
     let ce = d.mul(t[1][0]).add(&e.mul(t[1][1])).add(&modulus.mul(me));
 
-    (cd.shr(), ce.shr())
+    *d = cd.shr();
+    *e = ce.shr();
 }
 
 /// "Bigint"-like (62 * LIMBS)-bit signed integer type, whose variables store numbers in the two's
@@ -402,7 +412,7 @@ mod tests {
             .into(),
         );
         let inverse = 3687945983376704433;
-        let d = BoxedInt62L(
+        let mut d = BoxedInt62L(
             vec![
                 3490544662636853909,
                 2211268325417683828,
@@ -413,7 +423,7 @@ mod tests {
             ]
             .into(),
         );
-        let e = BoxedInt62L(
+        let mut e = BoxedInt62L(
             vec![
                 4004071259428196451,
                 1262234674432503659,
@@ -426,9 +436,9 @@ mod tests {
         );
         let t = [[-45035996273704960, 409827566090715136], [-14, 25]];
 
-        let (new_d, new_e) = super::de(&modulus, inverse, d, e, t);
+        super::de(&modulus, inverse, t, &mut d, &mut e);
         assert_eq!(
-            new_d,
+            d,
             BoxedInt62L(
                 vec![
                     1211048314408256470,
@@ -442,7 +452,7 @@ mod tests {
             )
         );
 
-        assert_eq!(new_e, BoxedInt62L(vec![0, 0, 0, 0, 0, 0].into()));
+        assert_eq!(e, BoxedInt62L(vec![0, 0, 0, 0, 0, 0].into()));
     }
 
     #[test]