fix compile warnings and errors with gcc and clang

hurchalla · hurchalla · commit 00e1fe5c89d5 · 2025-01-31T15:55:30.000-08:00
diff --git a/build_tests.sh b/build_tests.sh
@@ -9,10 +9,13 @@
 # Description of this script -----------
 # This is a working convenience script for invoking the testing builds and then
 # running the tests.
-# The syntax is 
-# ./build_tests [-c<compiler_name>] [-r] [-a] [-u] [-t] [-m<Release|Debug|Profile>] [-l<standard_library_name>]
+# The syntax is
+# ./build_tests [-c<compiler_name>] [-j<num_jobs>] [-r] [-a] [-u] [-t] [-m<Release|Debug|Profile>] [-l<standard_library_name>]
 #
 # -c allows you to select the compiler, rather than using the default.
+# -j specifies the number of jobs (typically threads) that you want the compiler
+#    to use when building.  If you omit this option, the compiler's default
+#    number of jobs will be used.
 # -r specifies to run all tests after the build.  Without -r, no tests will run.
 # -a specifies you want to compile the code using typically helpful (how much it
 #    helps depends on your compiler) inline asm optimizations, which makes for
@@ -168,17 +171,20 @@
 
 
 
-while getopts ":m:l:c:h-:raut" opt; do
+while getopts ":m:l:c:j:h-:raut" opt; do
   case $opt in
     h)
       ;&
     -)
-      echo "Usage: build_tests [-c<compiler_name>] [-r] [-a] [-u] [-t] [-m<Release|Debug|Profile>] [-l<standard_library_name>]" >&2
+      echo "Usage: build_tests [-c<compiler_name>] [-j<num_jobs>] [-r] [-a] [-u] [-t] [-m<Release|Debug|Profile>] [-l<standard_library_name>]" >&2
       exit 1
       ;;
     c)
       compiler=$OPTARG
       ;;
+    j)
+      num_jobs="-j$OPTARG"
+      ;;
     m)
       mode=$OPTARG
       ;;
@@ -519,7 +525,7 @@ if [ "${mode,,}" = "release" ]; then
             $gcc_static_analysis"  "${clang_static_analysis[@]}" \
             $cmake_cpp_compiler $cmake_c_compiler
     exit_on_failure
-    cmake --build ./$build_dir --config Release
+    cmake --build ./$build_dir $num_jobs --config Release
     exit_on_failure
     popd > /dev/null 2>&1
 elif [ "${mode,,}" = "debug" ]; then
@@ -536,7 +542,7 @@ elif [ "${mode,,}" = "debug" ]; then
             $gcc_static_analysis"  "${clang_static_analysis[@]}" \
             $cmake_cpp_compiler $cmake_c_compiler
     exit_on_failure
-    cmake --build ./$build_dir --config Debug
+    cmake --build ./$build_dir $num_jobs --config Debug
     exit_on_failure
     popd > /dev/null 2>&1
 elif [ "${mode,,}" = "profile" ]; then
@@ -551,7 +557,7 @@ elif [ "${mode,,}" = "profile" ]; then
             $use_inline_asm  $use_all_inline_asm" \
             $cmake_cpp_compiler $cmake_c_compiler
     exit_on_failure
-    cmake --build ./$build_dir --config RelWithDebInfo
+    cmake --build ./$build_dir $num_jobs --config RelWithDebInfo
     exit_on_failure
     popd > /dev/null 2>&1
 else
diff --git a/include/hurchalla/factoring/detail/impl_is_prime_intensive.h b/include/hurchalla/factoring/detail/impl_is_prime_intensive.h
@@ -121,6 +121,13 @@ struct impl_is_prime_intensive {
             return is_prime_miller_rabin_special::
                           case_3317044064679887385961981_128_13<TRIAL_SIZE>(mf);
         }
+        // For the rationale behind the following static_assert, see the struct
+        // MillerRabinMontgomery<MontType, 128, TRIAL_SIZE, 128>
+        // inside is_prime_miller_rabin.h
+        static_assert(ut_numeric_limits<T>::digits == 128 ||
+                           (ut_numeric_limits<T>::is_signed &&
+                            ut_numeric_limits<T>::digits == 127));
+
         // 128 bit miller-rabin with 128 bases is going to be slow no matter
         // what, but a trial size of 3 will usually improve performance over
         // trial size 1, due to more efficient use of the CPU's pipelined and/or
@@ -141,10 +148,14 @@ struct impl_is_prime_intensive {
         // back to a much slower method (pollard-rho) if unexpectedly it's in
         // error.  There's no way to trivially check any primality test, which
         // includes BPSW.]
-        constexpr std::size_t TOTAL_BASES = 128;
         constexpr std::size_t TRIAL_SIZE = 3;
-        const auto bases = MillerRabinBases<128, TOTAL_BASES>::get(x);
+        const auto& bases = MillerRabinProbabilisticBases128<>::bases;
+#if 0
         return IPMR_internal::miller_rabin_trials<TRIAL_SIZE>(mf, bases);
+#else
+        // using miller_rabin_trials128 improves perf by tuning to the modulus size
+        return IPMR_internal::miller_rabin_trials128<TRIAL_SIZE>(mf, bases);
+#endif
     }
     else {
         // C++ treats static_assert in constexpr-if as ill-formed if it is
@@ -223,7 +234,7 @@ struct impl_is_prime_intensive {
         // mont_miller_rabin<>(), so that we instantiate the same miller-rabin
         // hash table in all calls.
 
-        if constexpr (std::is_same<MontgomeryForm<T>::MontyTag,
+        if constexpr (std::is_same<typename MontgomeryForm<T>::MontyTag,
                                    TagMontyQuarterrange>::value) {
             MontgomeryForm<T> mf(x);
             return mont_miller_rabin<decltype(mf),bitsTable>(mf);
diff --git a/include/hurchalla/factoring/detail/is_prime_miller_rabin.h b/include/hurchalla/factoring/detail/is_prime_miller_rabin.h
@@ -408,6 +408,7 @@ struct MillerRabinMontgomery {
   static bool is_prime(const MontType& mf)
   {
     using T = typename MontType::IntegerType;
+    using UT = typename extensible_make_unsigned<T>::type;
     static_assert(ut_numeric_limits<T>::is_integer, "");
     static_assert(LOG2_MODULUS_LIMIT <= ut_numeric_limits<T>::digits, "");
     T modulus = mf.getModulus();
@@ -416,7 +417,7 @@ struct MillerRabinMontgomery {
     static_assert(POW2_LIMIT >= LOG2_MODULUS_LIMIT, "");
     using U = typename sized_uint<POW2_LIMIT>::type;
     // Ensure that 1 < modulus < (1 << LOG2_MODULUS_LIMIT)
-    HPBC_PRECONDITION2(1 < modulus && modulus <=
+    HPBC_PRECONDITION2(1 < modulus && static_cast<UT>(modulus) <=
                              (static_cast<U>(1) << (LOG2_MODULUS_LIMIT-1)) - 1 +
                              (static_cast<U>(1) << (LOG2_MODULUS_LIMIT-1)));
     const auto bases = MillerRabinBases<LOG2_MODULUS_LIMIT, TOTAL_BASES>::
@@ -669,14 +670,16 @@ struct is_prime_miller_rabin {
         // than the 5 base test below.
         // Note: the hashed version covers ~64x larger range than the unhashed.
         // It uses 64 bytes of static memory though, whereas unhashed uses none.
+        using U = typename extensible_make_unsigned<T>::type;
+        auto umodulus = static_cast<U>(mf.getModulus());
 #ifdef HURCHALLA_DEFAULT_TO_UNHASHED_MILLER_RABIN
-        if (mf.getModulus() < UINT64_C(273919523041)) {
+        if (umodulus < UINT64_C(273919523041)) {
             constexpr std::size_t TRIAL_SIZE = 2;
             return is_prime_miller_rabin_special::
                                      case_273919523041_64_3<TRIAL_SIZE>(mf);
         }
 #else
-        if (mf.getModulus() < (static_cast<std::uint64_t>(1) << 44)) {
+        if (umodulus < (static_cast<std::uint64_t>(1) << 44)) {
             constexpr std::size_t TOTAL_BASES = 3;
             constexpr std::size_t TRIAL_SIZE = 2;
             return MillerRabinMontgomery<MontType, 44, TRIAL_SIZE,
@@ -768,7 +771,7 @@ struct is_prime_miller_rabin {
     // We consistently use bitsTable as a template argument to call<>(), so
     // that we instantiate the same miller-rabin hash table in all calls.
 
-    if constexpr (std::is_same<MontgomeryForm<T2>::MontyTag,
+    if constexpr (std::is_same<typename MontgomeryForm<T2>::MontyTag,
                                TagMontyQuarterrange>::value) {
         MontgomeryForm<T2> mf(static_cast<T2>(x));
         return call<decltype(mf),bitsTable>(mf);
diff --git a/include/hurchalla/factoring/factorize.h b/include/hurchalla/factoring/factorize.h
@@ -31,60 +31,23 @@
 // argument value.  The only difference between the two functions is the
 // structure used for the factors: the first function uses an array, and the
 // second uses a vector.  See the comments above each function for more details.
+//
+// Information on the algorithms they use, and on performance, is provided at
+// the bottom of this file.
 
 
 namespace hurchalla {
 
 
-
-// ------------------------------------
-// The Algorithms:
-// ------------------------------------
-// Prior to heavier-weight factorization, factorize() first uses a small trial
-// disivion stage.  It then uses either ECM or Pollard-Rho to find all remaining
-// factors, depending on the size of the number.  Prior to trying to extract
-// any factor with ECM or Pollard-Rho, it tests for primality by using the
-// deterministic Miller-Rabin algorithm - we usually speed up this algorithm by
-// using one of the very small hash tables (~100 bytes for example) in
-// factoring/include/hurchalla/factoring/detail/miller_rabin_bases/
-//
-// For numbers below ~40 bits, factorize() uses the Pollard-Rho factorization
-// algorithm, with Brent's improvements (see https://en.wikipedia.org/wiki/Pollard%27s_rho_algorithm)
-// along with other further improvements I made to the algorithm.
-//
-// For numbers above ~40 bits, factorize() uses ECM tailored for numbers between
-// 32 to 128 bits in size.  This ECM code was initially based on Ben Buhrow's
-// "micro-ecm", which was then improved, optimized, and extended to 128 bits.
-
-// ------------------------------------
-// Performance:
-// ------------------------------------
-// For 64 bit numbers, the resulting factorization functions below are likely
-// the fastest you will currently be able to find, both for factoring arbitrary
-// values and for factoring semiprimes with two large factors.
-//
-// For 128 bit numbers, this code needs to be performance tested against other
-// factoring libraries.  An initial expectation is that this code will be
-// be competitive or possibly do better than other libraries available for 128
-// bit numbers, but this is not yet known.
-//
-// For 32 bit numbers, a very well-optimized implementation of Hart's One Line
-// Factoring algorithm and/or Lehman's method might potentially be faster than
-// the functions here.  The functions here should nonetheless be fairly close to
-// the fastest currently available at 32 bits.
-//
-// For 256 bit or larger numbers - which this library does not support - you may
-// wish to seek out ECM for smaller bit depths, and then Quadratic Sieve and
-// GNFS for larger bit depths.  For example, see the GMP project/library.
-
-
 // ------------------------------------
 // The functions:
 // ------------------------------------
 
-// Returns a std::array that contains all factors of x, and writes the total
-// number of factors to num_factors.  The array entries with index < num_factor
-// are the factors.
+// There are two versions of the factorize function.
+
+// This first version returns a std::array that contains all factors of x, and
+// writes the total number of factors to num_factors.  The array entries with
+// index < num_factor are the factors.
 // The argument 'expect_arbitrary_size_factors' does not affect the results, but
 // it will optimize the factoring to be faster when you know or expect all the
 // factors will be large (assuming you set it to false), or it will optimize the
@@ -163,6 +126,47 @@ void factorize(T x, std::vector<T>& factors,
 }
 
 
+// ------------------------------------
+// The Algorithms:
+// ------------------------------------
+// Prior to heavier-weight factorization, factorize() first uses a small trial
+// disivion stage.  It then uses either ECM or Pollard-Rho to find all remaining
+// factors, depending on the size of the number.  Prior to trying to extract
+// any factor with ECM or Pollard-Rho, it tests for primality by using the
+// deterministic Miller-Rabin algorithm - we usually speed up this algorithm by
+// using one of the very small hash tables (~100 bytes for example) in
+// factoring/include/hurchalla/factoring/detail/miller_rabin_bases/
+//
+// For numbers below ~40 bits, factorize() uses the Pollard-Rho factorization
+// algorithm, with Brent's improvements (see https://en.wikipedia.org/wiki/Pollard%27s_rho_algorithm)
+// along with other further improvements I made to the algorithm.
+//
+// For numbers above ~40 bits, factorize() uses ECM tailored for numbers between
+// 32 to 128 bits in size.  This ECM code was initially based on Ben Buhrow's
+// "micro-ecm", which was improved and optimized and extended to 128 bits.
+
+// ------------------------------------
+// Performance:
+// ------------------------------------
+// For 64 bit numbers, the resulting factorization functions above are likely
+// the fastest you will currently be able to find, both for factoring arbitrary
+// values and for factoring semiprimes with two large factors.
+//
+// For 128 bit numbers, this code needs to be performance tested against other
+// factoring libraries.  An initial expectation is that this code will be
+// be competitive or possibly do better than other libraries available for 128
+// bit numbers, but this is not yet known.
+//
+// For 32 bit numbers, a very well-optimized implementation of Hart's One Line
+// Factoring algorithm and/or Lehman's method might potentially be faster than
+// the functions here.  The functions here should nonetheless be fairly close to
+// the fastest currently available at 32 bits.
+//
+// For 256 bit or larger numbers - which this library does not support - you may
+// wish to seek out ECM for smaller bit depths, and then Quadratic Sieve and
+// GNFS for larger bit depths.  For example, see the GMP project/library.
+
+
 }  // end namespace
 
 #endif
diff --git a/test/test_FactorByTable32.cpp b/test/test_FactorByTable32.cpp
@@ -92,6 +92,7 @@ void test_all_valid_inputs(const FactorByTable<BITLEN, FAVOR_SMALL_SIZE>& factor
 
 
 template <int BITLEN, bool FAVOR_SMALL_SIZE>
+[[maybe_unused]]
 dsec quick_bench(const FactorByTable<BITLEN, FAVOR_SMALL_SIZE>& factorTable,
                  uint32_t min, uint32_t max, uint32_t samplesize)
 {

Original file line number	Diff line number	Diff line change
`@@ -92,6 +92,7 @@ void test_all_valid_inputs(const FactorByTable<BITLEN, FAVOR_SMALL_SIZE>& factor`
`92`	`92`
`93`	`93`
`94`	`94`	`template <int BITLEN, bool FAVOR_SMALL_SIZE>`
	`95`	`+[[maybe_unused]]`
`95`	`96`	`dsec quick_bench(const FactorByTable<BITLEN, FAVOR_SMALL_SIZE>& factorTable,`
`96`	`97`	`uint32_t min, uint32_t max, uint32_t samplesize)`
`97`	`98`	`{`