Skip to content

Commit 00e1fe5

Browse files
committed
fix compile warnings and errors with gcc and clang
1 parent 7ff2d57 commit 00e1fe5

File tree

5 files changed

+84
-59
lines changed

5 files changed

+84
-59
lines changed

build_tests.sh

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,13 @@
99
# Description of this script -----------
1010
# This is a working convenience script for invoking the testing builds and then
1111
# running the tests.
12-
# The syntax is
13-
# ./build_tests [-c<compiler_name>] [-r] [-a] [-u] [-t] [-m<Release|Debug|Profile>] [-l<standard_library_name>]
12+
# The syntax is
13+
# ./build_tests [-c<compiler_name>] [-j<num_jobs>] [-r] [-a] [-u] [-t] [-m<Release|Debug|Profile>] [-l<standard_library_name>]
1414
#
1515
# -c allows you to select the compiler, rather than using the default.
16+
# -j specifies the number of jobs (typically threads) that you want the compiler
17+
# to use when building. If you omit this option, the compiler's default
18+
# number of jobs will be used.
1619
# -r specifies to run all tests after the build. Without -r, no tests will run.
1720
# -a specifies you want to compile the code using typically helpful (how much it
1821
# helps depends on your compiler) inline asm optimizations, which makes for
@@ -168,17 +171,20 @@
168171

169172

170173

171-
while getopts ":m:l:c:h-:raut" opt; do
174+
while getopts ":m:l:c:j:h-:raut" opt; do
172175
case $opt in
173176
h)
174177
;&
175178
-)
176-
echo "Usage: build_tests [-c<compiler_name>] [-r] [-a] [-u] [-t] [-m<Release|Debug|Profile>] [-l<standard_library_name>]" >&2
179+
echo "Usage: build_tests [-c<compiler_name>] [-j<num_jobs>] [-r] [-a] [-u] [-t] [-m<Release|Debug|Profile>] [-l<standard_library_name>]" >&2
177180
exit 1
178181
;;
179182
c)
180183
compiler=$OPTARG
181184
;;
185+
j)
186+
num_jobs="-j$OPTARG"
187+
;;
182188
m)
183189
mode=$OPTARG
184190
;;
@@ -519,7 +525,7 @@ if [ "${mode,,}" = "release" ]; then
519525
$gcc_static_analysis" "${clang_static_analysis[@]}" \
520526
$cmake_cpp_compiler $cmake_c_compiler
521527
exit_on_failure
522-
cmake --build ./$build_dir --config Release
528+
cmake --build ./$build_dir $num_jobs --config Release
523529
exit_on_failure
524530
popd > /dev/null 2>&1
525531
elif [ "${mode,,}" = "debug" ]; then
@@ -536,7 +542,7 @@ elif [ "${mode,,}" = "debug" ]; then
536542
$gcc_static_analysis" "${clang_static_analysis[@]}" \
537543
$cmake_cpp_compiler $cmake_c_compiler
538544
exit_on_failure
539-
cmake --build ./$build_dir --config Debug
545+
cmake --build ./$build_dir $num_jobs --config Debug
540546
exit_on_failure
541547
popd > /dev/null 2>&1
542548
elif [ "${mode,,}" = "profile" ]; then
@@ -551,7 +557,7 @@ elif [ "${mode,,}" = "profile" ]; then
551557
$use_inline_asm $use_all_inline_asm" \
552558
$cmake_cpp_compiler $cmake_c_compiler
553559
exit_on_failure
554-
cmake --build ./$build_dir --config RelWithDebInfo
560+
cmake --build ./$build_dir $num_jobs --config RelWithDebInfo
555561
exit_on_failure
556562
popd > /dev/null 2>&1
557563
else

include/hurchalla/factoring/detail/impl_is_prime_intensive.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,13 @@ struct impl_is_prime_intensive {
121121
return is_prime_miller_rabin_special::
122122
case_3317044064679887385961981_128_13<TRIAL_SIZE>(mf);
123123
}
124+
// For the rationale behind the following static_assert, see the struct
125+
// MillerRabinMontgomery<MontType, 128, TRIAL_SIZE, 128>
126+
// inside is_prime_miller_rabin.h
127+
static_assert(ut_numeric_limits<T>::digits == 128 ||
128+
(ut_numeric_limits<T>::is_signed &&
129+
ut_numeric_limits<T>::digits == 127));
130+
124131
// 128 bit miller-rabin with 128 bases is going to be slow no matter
125132
// what, but a trial size of 3 will usually improve performance over
126133
// trial size 1, due to more efficient use of the CPU's pipelined and/or
@@ -141,10 +148,14 @@ struct impl_is_prime_intensive {
141148
// back to a much slower method (pollard-rho) if unexpectedly it's in
142149
// error. There's no way to trivially check any primality test, which
143150
// includes BPSW.]
144-
constexpr std::size_t TOTAL_BASES = 128;
145151
constexpr std::size_t TRIAL_SIZE = 3;
146-
const auto bases = MillerRabinBases<128, TOTAL_BASES>::get(x);
152+
const auto& bases = MillerRabinProbabilisticBases128<>::bases;
153+
#if 0
147154
return IPMR_internal::miller_rabin_trials<TRIAL_SIZE>(mf, bases);
155+
#else
156+
// using miller_rabin_trials128 improves perf by tuning to the modulus size
157+
return IPMR_internal::miller_rabin_trials128<TRIAL_SIZE>(mf, bases);
158+
#endif
148159
}
149160
else {
150161
// C++ treats static_assert in constexpr-if as ill-formed if it is
@@ -223,7 +234,7 @@ struct impl_is_prime_intensive {
223234
// mont_miller_rabin<>(), so that we instantiate the same miller-rabin
224235
// hash table in all calls.
225236

226-
if constexpr (std::is_same<MontgomeryForm<T>::MontyTag,
237+
if constexpr (std::is_same<typename MontgomeryForm<T>::MontyTag,
227238
TagMontyQuarterrange>::value) {
228239
MontgomeryForm<T> mf(x);
229240
return mont_miller_rabin<decltype(mf),bitsTable>(mf);

include/hurchalla/factoring/detail/is_prime_miller_rabin.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,7 @@ struct MillerRabinMontgomery {
408408
static bool is_prime(const MontType& mf)
409409
{
410410
using T = typename MontType::IntegerType;
411+
using UT = typename extensible_make_unsigned<T>::type;
411412
static_assert(ut_numeric_limits<T>::is_integer, "");
412413
static_assert(LOG2_MODULUS_LIMIT <= ut_numeric_limits<T>::digits, "");
413414
T modulus = mf.getModulus();
@@ -416,7 +417,7 @@ struct MillerRabinMontgomery {
416417
static_assert(POW2_LIMIT >= LOG2_MODULUS_LIMIT, "");
417418
using U = typename sized_uint<POW2_LIMIT>::type;
418419
// Ensure that 1 < modulus < (1 << LOG2_MODULUS_LIMIT)
419-
HPBC_PRECONDITION2(1 < modulus && modulus <=
420+
HPBC_PRECONDITION2(1 < modulus && static_cast<UT>(modulus) <=
420421
(static_cast<U>(1) << (LOG2_MODULUS_LIMIT-1)) - 1 +
421422
(static_cast<U>(1) << (LOG2_MODULUS_LIMIT-1)));
422423
const auto bases = MillerRabinBases<LOG2_MODULUS_LIMIT, TOTAL_BASES>::
@@ -669,14 +670,16 @@ struct is_prime_miller_rabin {
669670
// than the 5 base test below.
670671
// Note: the hashed version covers ~64x larger range than the unhashed.
671672
// It uses 64 bytes of static memory though, whereas unhashed uses none.
673+
using U = typename extensible_make_unsigned<T>::type;
674+
auto umodulus = static_cast<U>(mf.getModulus());
672675
#ifdef HURCHALLA_DEFAULT_TO_UNHASHED_MILLER_RABIN
673-
if (mf.getModulus() < UINT64_C(273919523041)) {
676+
if (umodulus < UINT64_C(273919523041)) {
674677
constexpr std::size_t TRIAL_SIZE = 2;
675678
return is_prime_miller_rabin_special::
676679
case_273919523041_64_3<TRIAL_SIZE>(mf);
677680
}
678681
#else
679-
if (mf.getModulus() < (static_cast<std::uint64_t>(1) << 44)) {
682+
if (umodulus < (static_cast<std::uint64_t>(1) << 44)) {
680683
constexpr std::size_t TOTAL_BASES = 3;
681684
constexpr std::size_t TRIAL_SIZE = 2;
682685
return MillerRabinMontgomery<MontType, 44, TRIAL_SIZE,
@@ -768,7 +771,7 @@ struct is_prime_miller_rabin {
768771
// We consistently use bitsTable as a template argument to call<>(), so
769772
// that we instantiate the same miller-rabin hash table in all calls.
770773

771-
if constexpr (std::is_same<MontgomeryForm<T2>::MontyTag,
774+
if constexpr (std::is_same<typename MontgomeryForm<T2>::MontyTag,
772775
TagMontyQuarterrange>::value) {
773776
MontgomeryForm<T2> mf(static_cast<T2>(x));
774777
return call<decltype(mf),bitsTable>(mf);

include/hurchalla/factoring/factorize.h

Lines changed: 49 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -31,60 +31,23 @@
3131
// argument value. The only difference between the two functions is the
3232
// structure used for the factors: the first function uses an array, and the
3333
// second uses a vector. See the comments above each function for more details.
34+
//
35+
// Information on the algorithms they use, and on performance, is provided at
36+
// the bottom of this file.
3437

3538

3639
namespace hurchalla {
3740

3841

39-
40-
// ------------------------------------
41-
// The Algorithms:
42-
// ------------------------------------
43-
// Prior to heavier-weight factorization, factorize() first uses a small trial
44-
// disivion stage. It then uses either ECM or Pollard-Rho to find all remaining
45-
// factors, depending on the size of the number. Prior to trying to extract
46-
// any factor with ECM or Pollard-Rho, it tests for primality by using the
47-
// deterministic Miller-Rabin algorithm - we usually speed up this algorithm by
48-
// using one of the very small hash tables (~100 bytes for example) in
49-
// factoring/include/hurchalla/factoring/detail/miller_rabin_bases/
50-
//
51-
// For numbers below ~40 bits, factorize() uses the Pollard-Rho factorization
52-
// algorithm, with Brent's improvements (see https://en.wikipedia.org/wiki/Pollard%27s_rho_algorithm)
53-
// along with other further improvements I made to the algorithm.
54-
//
55-
// For numbers above ~40 bits, factorize() uses ECM tailored for numbers between
56-
// 32 to 128 bits in size. This ECM code was initially based on Ben Buhrow's
57-
// "micro-ecm", which was then improved, optimized, and extended to 128 bits.
58-
59-
// ------------------------------------
60-
// Performance:
61-
// ------------------------------------
62-
// For 64 bit numbers, the resulting factorization functions below are likely
63-
// the fastest you will currently be able to find, both for factoring arbitrary
64-
// values and for factoring semiprimes with two large factors.
65-
//
66-
// For 128 bit numbers, this code needs to be performance tested against other
67-
// factoring libraries. An initial expectation is that this code will be
68-
// be competitive or possibly do better than other libraries available for 128
69-
// bit numbers, but this is not yet known.
70-
//
71-
// For 32 bit numbers, a very well-optimized implementation of Hart's One Line
72-
// Factoring algorithm and/or Lehman's method might potentially be faster than
73-
// the functions here. The functions here should nonetheless be fairly close to
74-
// the fastest currently available at 32 bits.
75-
//
76-
// For 256 bit or larger numbers - which this library does not support - you may
77-
// wish to seek out ECM for smaller bit depths, and then Quadratic Sieve and
78-
// GNFS for larger bit depths. For example, see the GMP project/library.
79-
80-
8142
// ------------------------------------
8243
// The functions:
8344
// ------------------------------------
8445

85-
// Returns a std::array that contains all factors of x, and writes the total
86-
// number of factors to num_factors. The array entries with index < num_factor
87-
// are the factors.
46+
// There are two versions of the factorize function.
47+
48+
// This first version returns a std::array that contains all factors of x, and
49+
// writes the total number of factors to num_factors. The array entries with
50+
// index < num_factor are the factors.
8851
// The argument 'expect_arbitrary_size_factors' does not affect the results, but
8952
// it will optimize the factoring to be faster when you know or expect all the
9053
// factors will be large (assuming you set it to false), or it will optimize the
@@ -163,6 +126,47 @@ void factorize(T x, std::vector<T>& factors,
163126
}
164127

165128

129+
// ------------------------------------
130+
// The Algorithms:
131+
// ------------------------------------
132+
// Prior to heavier-weight factorization, factorize() first uses a small trial
133+
// disivion stage. It then uses either ECM or Pollard-Rho to find all remaining
134+
// factors, depending on the size of the number. Prior to trying to extract
135+
// any factor with ECM or Pollard-Rho, it tests for primality by using the
136+
// deterministic Miller-Rabin algorithm - we usually speed up this algorithm by
137+
// using one of the very small hash tables (~100 bytes for example) in
138+
// factoring/include/hurchalla/factoring/detail/miller_rabin_bases/
139+
//
140+
// For numbers below ~40 bits, factorize() uses the Pollard-Rho factorization
141+
// algorithm, with Brent's improvements (see https://en.wikipedia.org/wiki/Pollard%27s_rho_algorithm)
142+
// along with other further improvements I made to the algorithm.
143+
//
144+
// For numbers above ~40 bits, factorize() uses ECM tailored for numbers between
145+
// 32 to 128 bits in size. This ECM code was initially based on Ben Buhrow's
146+
// "micro-ecm", which was improved and optimized and extended to 128 bits.
147+
148+
// ------------------------------------
149+
// Performance:
150+
// ------------------------------------
151+
// For 64 bit numbers, the resulting factorization functions above are likely
152+
// the fastest you will currently be able to find, both for factoring arbitrary
153+
// values and for factoring semiprimes with two large factors.
154+
//
155+
// For 128 bit numbers, this code needs to be performance tested against other
156+
// factoring libraries. An initial expectation is that this code will be
157+
// be competitive or possibly do better than other libraries available for 128
158+
// bit numbers, but this is not yet known.
159+
//
160+
// For 32 bit numbers, a very well-optimized implementation of Hart's One Line
161+
// Factoring algorithm and/or Lehman's method might potentially be faster than
162+
// the functions here. The functions here should nonetheless be fairly close to
163+
// the fastest currently available at 32 bits.
164+
//
165+
// For 256 bit or larger numbers - which this library does not support - you may
166+
// wish to seek out ECM for smaller bit depths, and then Quadratic Sieve and
167+
// GNFS for larger bit depths. For example, see the GMP project/library.
168+
169+
166170
} // end namespace
167171

168172
#endif

test/test_FactorByTable32.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ void test_all_valid_inputs(const FactorByTable<BITLEN, FAVOR_SMALL_SIZE>& factor
9292

9393

9494
template <int BITLEN, bool FAVOR_SMALL_SIZE>
95+
[[maybe_unused]]
9596
dsec quick_bench(const FactorByTable<BITLEN, FAVOR_SMALL_SIZE>& factorTable,
9697
uint32_t min, uint32_t max, uint32_t samplesize)
9798
{

0 commit comments

Comments
 (0)