add divideBySmallPowerOf2 function to MontgomeryForm

hurchalla · hurchalla · commit 3afdbb322902 · 2025-09-09T07:42:59.000-07:00
diff --git a/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/MontgomeryForm.h b/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/MontgomeryForm.h
@@ -26,28 +26,28 @@
 namespace hurchalla {
 
 
-// T must be a signed or unsigned integral type.
+// T must be a signed or unsigned integral type. You should normally set T to
+// the same type as the (integer) modulus that you will use for this class's
+// constructor.
 //
 // For InlineAllFunctions, you should usually accept the default rather than
-// specify an argument.  However if you wish to reduce compilation times you can
+// specify an argument. However if you wish to reduce compilation times you can
 // set it to false, which may help.
 //
 // For MontyType, you should just accept the default (this parameter exists to
 // provide you the alias classes in montgomery_form_aliases.h.)
 template <class T,
-          bool InlineAll = (ut_numeric_limits<T>::digits <= HURCHALLA_TARGET_BIT_WIDTH),
+          bool InlineAllFunctions = (ut_numeric_limits<T>::digits <= HURCHALLA_TARGET_BIT_WIDTH),
           class MontyType = typename detail::MontgomeryDefault<T>::type>
 class MontgomeryForm final {
-    const detail::ImplMontgomeryForm<T, InlineAll, MontyType> impl;
+    const detail::ImplMontgomeryForm<T, InlineAllFunctions, MontyType> impl;
     template <class,class> friend struct detail::MontgomeryFormExtensions;
     static_assert(ut_numeric_limits<T>::is_integer, "");
     static_assert(ut_numeric_limits<T>::digits <=
                   ut_numeric_limits<typename MontyType::uint_type>::digits, "");
-    using SV = typename MontyType::squaringvalue_type;
-    using RU = typename MontyType::uint_type;
 public:
     using IntegerType = T;
-    using MontyTag = typename MontyType::MontyTag;
+    using MontType = MontyType;
 
     // If you need to compare MontgomeryValues for equality or inequality, call
     // getCanonicalValue() and compare the resulting CanonicalValues.
@@ -485,7 +485,7 @@ class MontgomeryForm final {
         HPBC_CLOCKWORK_API_PRECONDITION(exponent >= 0);
         std::array<MontgomeryValue, 1> bases = {{ base }};
         std::array<MontgomeryValue, 1> result =
-                detail::montgomery_array_pow<MontyTag,
+                detail::montgomery_array_pow<typename MontyType::MontyTag,
                                    MontgomeryForm>::pow(*this, bases, exponent);
         return result[0];
         //return detail::montgomery_pow<MontgomeryForm>::scalarpow(*this, base, exponent);
@@ -531,23 +531,19 @@ class MontgomeryForm final {
     pow(const std::array<MontgomeryValue, NUM_BASES>& bases, T exponent) const
     {
         HPBC_CLOCKWORK_API_PRECONDITION(exponent >= 0);
-        return detail::montgomery_array_pow<MontyTag,
+        return detail::montgomery_array_pow<typename MontyType::MontyTag,
                                    MontgomeryForm>::pow(*this, bases, exponent);
     }
 
 
-    // Returns the multiplicative inverse of 'x' in the Montgomery domain if
-    // the inverse exists. If the inverse does not exist, it returns zero (or
-    // more precisely, it returns the value equal to getZeroValue()).
-    // This is a convenience function to stay in the Montgomery domain when you
-    // want to find the multiplicative inverse of a MontgomeryValue.
-    //
-    // Performance note: this function has no performance advantage over
-    // hurchalla::modular_multiplicative_inverse if you need the inverse of a
-    // number in standard integer domain - i.e. don't convert into Montgomery
-    // domain just to call this function. However, when you intend to stay in
-    // the Montgomery domain, this function is the fastest way to get the
-    // multiplicative inverse.
+    // Calculates and returns the multiplicative inverse of 'x' as a canonical
+    // Montgomery value, if the inverse exists. If the inverse does not exist,
+    // this function returns zero (more precisely it returns the value equal to
+    // getZeroValue()).
+    // Performance note: there is no performance advantage to converting into
+    // Montgomery form if all you want is the inverse of a number in standard
+    // integer domain - prefer hurchalla::modular_multiplicative_inverse() for
+    // that case.
     template <class PTAG = LowlatencyTag> HURCHALLA_FORCE_INLINE
     CanonicalValue inverse(MontgomeryValue x) const
     {
@@ -558,6 +554,48 @@ class MontgomeryForm final {
     }
 
 
+    // Returns the Montgomery division of x by a small power of two (requires
+    // 0 <= power <= 7, which translates to Montgomery division by 1,2,4,8,16,
+    // 32,64, or 128). This function always produces an exact correct result.
+    // Note that Montgomery division is modular division, which is different
+    // from normal and non modular division - modular division performs modular
+    // multiplication by the modular multiplicative inverse of the divisor. So,
+    // this function calculates and returns the product of x times the modular
+    // multiplicative inverse of 2^power (^ denotes exponentiation), in
+    // Montgomery form. Due to the requirement that every Montgomery modulus
+    // must be odd, the inverse of all powers of two exist in Montgomery form,
+    // and so multiplication by the inverse of 2^power is always valid. And
+    // since all calculation is modular, the division result is exactly correct
+    // - i.e. x is congruent to the return value times 2^power.
+    //
+    // In the common case that you wish to divide a MontgomeryValue x instead of
+    // a CanonicalValue, use getCanonicalValue(x) as your first argument to this
+    // function.
+    //
+    // If you wish to divide by some large power of 2, you can use the following
+    // sequence of calls:
+    //   // Assume "mf" is a MontgomeryForm instance that you have constructed
+    //   // ...for example via...    auto mf = MontgomeryForm(modulus);
+    //   // and assume that the large power is   some_large_exponent.
+    //   auto inv_two = mf.divideBySmallPowerOf2(mf.getUnityValue(), 1);
+    //   auto full_inv = mf.pow(inv_two, some_large_exponent);
+    //   auto desired_result = mf.multiply(full_inv, x);
+    //
+    // Performance note: this function is very efficient. It should ordinarily
+    // be faster than even a single call of multiply().
+    template <class PTAG = LowlatencyTag> HURCHALLA_FORCE_INLINE
+    MontgomeryValue divideBySmallPowerOf2(CanonicalValue x, int power) const
+    {
+        HPBC_CLOCKWORK_API_PRECONDITION(0 <= power && power < 8);
+
+        MontgomeryValue ret= impl.template divideBySmallPowerOf2<PTAG>(x,power);
+
+        HPBC_CLOCKWORK_POSTCONDITION(x ==
+              getCanonicalValue(multiply(ret, two_pow(static_cast<T>(power)))));
+        return ret;
+    }
+
+
     // Returns the "greatest common divisor" of the standard representations
     // (non-montgomery) of both x and the modulus, using the gcd functor that
     // you supply. The functor must take two integral arguments of the same type
diff --git a/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/ImplMontgomeryForm.contents b/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/ImplMontgomeryForm.contents
@@ -248,6 +248,13 @@ public:
     }
 
 
+    template <class PTAG> HURCHALLA_IMF_MAYBE_FORCE_INLINE
+    MontgomeryValue divideBySmallPowerOf2(CanonicalValue x, int power) const
+    {
+        return impl.divideBySmallPowerOf2(x, power, PTAG());
+    }
+
+
     template <class PTAG> HURCHALLA_IMF_MAYBE_FORCE_INLINE
     MontgomeryValue convertInExtended(U a) const
     {
diff --git a/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/MontgomeryFormExtensions.h b/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/MontgomeryFormExtensions.h
@@ -25,14 +25,14 @@ namespace hurchalla { namespace detail {
 template <class MF, class PTAG>
 struct MontgomeryFormExtensions final {
 
-    using RU = typename MF::RU;
+    using RU = typename MF::MontType::uint_type;
     // conceptually, R = 1 << (ut_numeric_limits<RU>::digits)
     static_assert(ut_numeric_limits<RU>::is_integer, "");
     static_assert(!(ut_numeric_limits<RU>::is_signed), "");
 
     using CanonicalValue = typename MF::CanonicalValue;
     using MontgomeryValue = typename MF::MontgomeryValue;
-    using SquaringValue = typename MF::SV;
+    using SquaringValue = typename MF::MontType::squaringvalue_type;
 
     HURCHALLA_FORCE_INLINE
     static MontgomeryValue convertInExtended(const MF& mf, RU a)
diff --git a/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/MontyCommonBase.h b/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/MontyCommonBase.h
@@ -475,6 +475,30 @@ class MontyCommonBase {
     }
 
 
+    // this is close to being a copy/paste of twoPowLimited_times_x, but it's
+    // adjusted for the different meaning of exponent.
+    template <class PTAG> HURCHALLA_FORCE_INLINE
+    V divideBySmallPowerOf2(C cx, int exponent, PTAG) const
+    {
+        static constexpr int digitsT = ut_numeric_limits<T>::digits;
+        HPBC_CLOCKWORK_PRECONDITION2(0 <= exponent && exponent < digitsT);
+        int power = digitsT - exponent;
+        HPBC_CLOCKWORK_ASSERT2(0 < power && power <= digitsT);
+
+        T tmp = cx.get();
+        HPBC_CLOCKWORK_INVARIANT2(tmp < n_);
+        T u_lo = static_cast<T>((tmp << 1) << (power - 1));
+        int rshift = digitsT - power;
+        HPBC_CLOCKWORK_ASSERT2(0 <= rshift && rshift < digitsT);
+        T u_hi = static_cast<T>(tmp >> rshift);
+
+        HPBC_CLOCKWORK_ASSERT2(u_hi < n_);
+        const D* child = static_cast<const D*>(this);
+        V result = child->montyREDC(u_hi, u_lo, PTAG());
+        HPBC_CLOCKWORK_POSTCONDITION2(child->isValid(result));
+        return result;
+    }
+
 
     // returns (R*R) mod N
     HURCHALLA_FORCE_INLINE C getMontvalueR() const
diff --git a/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/MontyWrappedStandardMath.h b/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/MontyWrappedStandardMath.h
@@ -296,6 +296,20 @@ class MontyWrappedStandardMath final {
         return C(inv);
     }
 
+    template <class PTAG> HURCHALLA_FORCE_INLINE
+    V divideBySmallPowerOf2(C cx, int exponent, PTAG) const
+    {
+        V pow_of_two = twoPowLimited(static_cast<size_t>(exponent), PTAG());
+        C inv_pow_of_two = inverse(pow_of_two, PTAG());
+        C zero = getZeroValue();
+        HPBC_CLOCKWORK_ASSERT2(inv_pow_of_two != zero);
+        bool isZero;
+        V product = multiply(inv_pow_of_two, cx, isZero, PTAG());
+        HPBC_CLOCKWORK_ASSERT2((cx == zero) == isZero);
+        C result = getCanonicalValue(product);
+        return result;
+    }
+
     // Returns the greatest common divisor of the standard representations
     // (non-montgomery) of both x and the modulus, using the supplied functor.
     // The functor must take two integral arguments of the same type and return
diff --git a/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/unit_testing_helpers/AbstractMontgomeryForm.h b/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/unit_testing_helpers/AbstractMontgomeryForm.h
@@ -219,6 +219,9 @@ class AbstractMontgomeryForm {
     virtual CanonicalValue inverse(MontgomeryValue x,
         bool useLowlatencyTag) const = 0;
 
+    virtual MontgomeryValue divideBySmallPowerOf2(CanonicalValue cx,
+        int exponent, bool useLowlatencyTag) const = 0;
+
     virtual std::vector<MontgomeryValue> vectorPow(
         const std::vector<MontgomeryValue>& bases, IntegerType exponent) const = 0;
 
@@ -310,6 +313,13 @@ class AbstractMontgomeryForm {
         return inverse(x, std::is_same<PTAG, LowlatencyTag>::value);
     }
 
+    template <class PTAG = LowlatencyTag>
+    MontgomeryValue divideBySmallPowerOf2(CanonicalValue cx, int exponent) const
+    {
+        return divideBySmallPowerOf2(cx, exponent,
+                                     std::is_same<PTAG, LowlatencyTag>::value);
+    }
+
     template <std::size_t NUM_BASES>
     std::array<MontgomeryValue, NUM_BASES>
     pow(const std::array<MontgomeryValue, NUM_BASES>& bases, IntegerType exponent) const
diff --git a/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/unit_testing_helpers/AbstractMontgomeryWrapper.h b/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/unit_testing_helpers/AbstractMontgomeryWrapper.h
@@ -122,6 +122,10 @@ class AbstractMontgomeryWrapper final {
     CanonicalValue inverse(MontgomeryValue x) const
         { return pimpl->template inverse<PTAG>(x); }
 
+    template <class PTAG = LowlatencyTag>
+    MontgomeryValue divideBySmallPowerOf2(CanonicalValue cx, int exponent) const
+        { return pimpl->template divideBySmallPowerOf2<PTAG>(cx, exponent); }
+
     MontgomeryValue pow(MontgomeryValue base, IntegerType exponent) const
         { return pimpl->pow(base, exponent); }
 
diff --git a/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/unit_testing_helpers/ConcreteMontgomeryForm.h b/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/unit_testing_helpers/ConcreteMontgomeryForm.h
@@ -467,7 +467,7 @@ class ConcreteMontgomeryForm final : public AbstractMontgomeryForm<ut_numeric_li
             mfv = mfv2;
         } else {
             OpenMFV mfv2(mf.template multiply<LowuopsTag>(OpenMFV(OpenV(x)), OpenMFV(OpenV(y))));
-            mfv = mfv2;            
+            mfv = mfv2;
         }
         // note: mfv.get() might be signed or unsigned; OpenV::OT is unsigned
         return OpenV(static_cast<typename OpenV::OT>(mfv.get()));
@@ -484,7 +484,7 @@ class ConcreteMontgomeryForm final : public AbstractMontgomeryForm<ut_numeric_li
         } else {
             OpenMFV mfv2(mf.template multiply<LowuopsTag>(OpenMFV(OpenV(x)),
                 OpenMFV(OpenV(y)), resultIsZero));
-            mfv = mfv2;            
+            mfv = mfv2;
         }
         // note: mfv.get() might be signed or unsigned; OpenV::OT is unsigned
         return OpenV(static_cast<typename OpenV::OT>(mfv.get()));
@@ -500,7 +500,7 @@ class ConcreteMontgomeryForm final : public AbstractMontgomeryForm<ut_numeric_li
         } else {
             OpenMFV mfv2(mf.template fmsub<LowuopsTag>(OpenMFV(OpenV(x)),
                 OpenMFV(OpenV(y)), OpenMFC(OpenC(z))));
-            mfv = mfv2;            
+            mfv = mfv2;
         }
         // note: mfv.get() might be signed or unsigned; OpenV::OT is unsigned
         return OpenV(static_cast<typename OpenV::OT>(mfv.get()));
@@ -516,7 +516,7 @@ class ConcreteMontgomeryForm final : public AbstractMontgomeryForm<ut_numeric_li
         } else {
             OpenMFV mfv2(mf.template fmsub<LowuopsTag>(OpenMFV(OpenV(x)),
                 OpenMFV(OpenV(y)), OpenMFFV(OpenFV(z))));
-            mfv = mfv2;            
+            mfv = mfv2;
         }
         // note: mfv.get() might be signed or unsigned; OpenV::OT is unsigned
         return OpenV(static_cast<typename OpenV::OT>(mfv.get()));
@@ -532,7 +532,7 @@ class ConcreteMontgomeryForm final : public AbstractMontgomeryForm<ut_numeric_li
         } else {
             OpenMFV mfv2(mf.template fmadd<LowuopsTag>(OpenMFV(OpenV(x)),
                 OpenMFV(OpenV(y)), OpenMFC(OpenC(z))));
-            mfv = mfv2;            
+            mfv = mfv2;
         }
         // note: mfv.get() might be signed or unsigned; OpenV::OT is unsigned
         return OpenV(static_cast<typename OpenV::OT>(mfv.get()));
@@ -548,7 +548,7 @@ class ConcreteMontgomeryForm final : public AbstractMontgomeryForm<ut_numeric_li
         } else {
             OpenMFV mfv2(mf.template fmadd<LowuopsTag>(OpenMFV(OpenV(x)),
                 OpenMFV(OpenV(y)), OpenMFFV(OpenFV(z))));
-            mfv = mfv2;            
+            mfv = mfv2;
         }
         // note: mfv.get() might be signed or unsigned; OpenV::OT is unsigned
         return OpenV(static_cast<typename OpenV::OT>(mfv.get()));
@@ -562,7 +562,7 @@ class ConcreteMontgomeryForm final : public AbstractMontgomeryForm<ut_numeric_li
             mfv = mfv2;
         } else {
             OpenMFV mfv2(mf.template square<LowuopsTag>(OpenMFV(OpenV(x))));
-            mfv = mfv2;            
+            mfv = mfv2;
         }
         // note: mfv.get() might be signed or unsigned; OpenV::OT is unsigned
         return OpenV(static_cast<typename OpenV::OT>(mfv.get()));
@@ -578,7 +578,7 @@ class ConcreteMontgomeryForm final : public AbstractMontgomeryForm<ut_numeric_li
         } else {
             OpenMFV mfv2(mf.template fusedSquareSub<LowuopsTag>(OpenMFV(OpenV(x)),
                 OpenMFC(OpenC(cv))));
-            mfv = mfv2;            
+            mfv = mfv2;
         }
         // note: mfv.get() might be signed or unsigned; OpenV::OT is unsigned
         return OpenV(static_cast<typename OpenV::OT>(mfv.get()));
@@ -594,7 +594,7 @@ class ConcreteMontgomeryForm final : public AbstractMontgomeryForm<ut_numeric_li
         } else {
             OpenMFV mfv2(mf.template fusedSquareAdd<LowuopsTag>(OpenMFV(OpenV(x)),
                 OpenMFC(OpenC(cv))));
-            mfv = mfv2;            
+            mfv = mfv2;
         }
         // note: mfv.get() might be signed or unsigned; OpenV::OT is unsigned
         return OpenV(static_cast<typename OpenV::OT>(mfv.get()));
@@ -614,6 +614,22 @@ class ConcreteMontgomeryForm final : public AbstractMontgomeryForm<ut_numeric_li
         return OpenC(static_cast<typename OpenC::OT>(mfc.get()));
     }
 
+    virtual V divideBySmallPowerOf2(C cx, int exponent, bool useLowlatencyTag)
+        const override
+    {
+        OpenMFV mfv;
+        if (useLowlatencyTag) {
+            OpenMFV mfv2(mf.template divideBySmallPowerOf2<LowlatencyTag>(
+                                                 OpenMFC(OpenC(cx)), exponent));
+            mfv = mfv2;
+        } else {
+            OpenMFV mfv2(mf.template divideBySmallPowerOf2<LowuopsTag>(
+                                                 OpenMFC(OpenC(cx)), exponent));
+            mfv = mfv2;
+        }
+        // note: mfv.get() might be signed or unsigned; OpenV::OT is unsigned
+        return OpenV(static_cast<typename OpenV::OT>(mfv.get()));
+    }
 
     // This class (ConcreteMontgomeryForm) only supports calling vectorPow()
     // with a std::vector that has size equal to one of the sizes given by the
diff --git a/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/platform_specific/montgomery_two_pow.h b/montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/platform_specific/montgomery_two_pow.h
@@ -151,7 +151,7 @@ struct montgomery_two_pow {
     using U = typename extensible_make_unsigned<T>::type;
     U n = static_cast<U>(nt);
 
-    using MontyTag = typename MF::MontyTag;
+    using MontyTag = typename MF::MontType::MontyTag;
     using RU = typename MontgomeryFormExtensions<MF, LowlatencyTag>::RU;
     constexpr bool isBigPow = ut_numeric_limits<RU>::digits >
                               HURCHALLA_TARGET_BIT_WIDTH;
@@ -176,7 +176,7 @@ struct montgomery_two_pow {
     static_assert(hurchalla::ut_numeric_limits<U>::is_integer, "");
     static_assert(!hurchalla::ut_numeric_limits<U>::is_signed, "");
 
-    using MontyTag = typename MF::MontyTag;
+    using MontyTag = typename MF::MontType::MontyTag;
     using RU = typename MontgomeryFormExtensions<MF, LowlatencyTag>::RU;
     constexpr bool isBigPow = ut_numeric_limits<RU>::digits >
                               HURCHALLA_TARGET_BIT_WIDTH;
diff --git a/test/montgomery_arithmetic/test_MontgomeryForm.h b/test/montgomery_arithmetic/test_MontgomeryForm.h

Original file line number	Diff line number	Diff line change
`@@ -248,6 +248,13 @@ public:`
`248`	`248`	`}`
`249`	`249`
`250`	`250`
	`251`	`+ template <class PTAG> HURCHALLA_IMF_MAYBE_FORCE_INLINE`
	`252`	`+ MontgomeryValue divideBySmallPowerOf2(CanonicalValue x, int power) const`
	`253`	`+ {`
	`254`	`+ return impl.divideBySmallPowerOf2(x, power, PTAG());`
	`255`	`+ }`
	`256`	`+`
	`257`	`+`
`251`	`258`	`template <class PTAG> HURCHALLA_IMF_MAYBE_FORCE_INLINE`
`252`	`259`	`MontgomeryValue convertInExtended(U a) const`
`253`	`260`	`{`