Skip to content

Commit a2c2b08

Browse files
committed
use cselect_on_bit where applicable, instead of conditional_select or MontgomeryValue's cmov
1 parent e89e664 commit a2c2b08

File tree

10 files changed

+42
-16
lines changed

10 files changed

+42
-16
lines changed

modular_arithmetic/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ include(FetchContent)
7575
FetchContent_Declare(
7676
hurchalla_util
7777
GIT_REPOSITORY https://github.com/hurchalla/util.git
78-
GIT_TAG e3a0fd02c86b67dcbf833fdd4ccf0732552f6e3e
78+
GIT_TAG aa71ce34e12392db20229979801048ff97e6b7da
7979
)
8080
FetchContent_MakeAvailable(hurchalla_util)
8181

modular_arithmetic/include/hurchalla/modular_arithmetic/detail/impl_modular_pow.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
#include "hurchalla/modular_arithmetic/modular_multiplication.h"
1313
#include "hurchalla/util/traits/ut_numeric_limits.h"
14-
#include "hurchalla/util/conditional_select.h"
14+
#include "hurchalla/util/cselect_on_bit.h"
1515
#include "hurchalla/util/compiler_macros.h"
1616
#include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h"
1717

@@ -49,8 +49,9 @@ struct impl_modular_pow {
4949
}
5050
*/
5151
// slightly optimized version
52-
// result = (exponent & 1u) ? base : 1;
53-
T result = hc::conditional_select((exponent & 1u), base, static_cast<T>(1));
52+
// T result = (exponent & 1u) ? base : 1;
53+
T result = ::hurchalla::cselect_on_bit<0>::ne_0(
54+
static_cast<uint64_t>(exponent), base, static_cast<T>(1));
5455
while (exponent > 1)
5556
{
5657
exponent = static_cast<U>(exponent >> 1);

modular_arithmetic/include/hurchalla/modular_arithmetic/detail/platform_specific/impl_modular_multiplication.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#include "hurchalla/modular_arithmetic/detail/optimization_tag_structs.h"
1414
#include "hurchalla/util/traits/safely_promote_unsigned.h"
1515
#include "hurchalla/util/traits/ut_numeric_limits.h"
16-
#include "hurchalla/util/conditional_select.h"
16+
#include "hurchalla/util/cselect_on_bit.h"
1717
#include "hurchalla/util/compiler_macros.h"
1818
#include "hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h"
1919
#include <cstdint>
@@ -70,7 +70,8 @@ struct slow_modular_multiplication {
7070
namespace hc = ::hurchalla;
7171
T tmp = hc::modular_addition_prereduced_inputs(a, result, modulus);
7272
// result = (b&1) ? tmp : result
73-
result = hc::conditional_select((b & 1u), tmp, result);
73+
result = ::hurchalla::cselect_on_bit<0>::ne_0(
74+
static_cast<uint64_t>(b), tmp, result);
7475
a = hc::modular_addition_prereduced_inputs(a, a, modulus);
7576
b = static_cast<T>(b >> 1);
7677
}

montgomery_arithmetic/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ include(FetchContent)
7878
FetchContent_Declare(
7979
hurchalla_util
8080
GIT_REPOSITORY https://github.com/hurchalla/util.git
81-
GIT_TAG e3a0fd02c86b67dcbf833fdd4ccf0732552f6e3e
81+
GIT_TAG aa71ce34e12392db20229979801048ff97e6b7da
8282
)
8383
FetchContent_MakeAvailable(hurchalla_util)
8484

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/MontyFullRange.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,8 @@ class MontyFullRange final :
236236

237237
T oddsum = halfval + halfn_ceiling;
238238
// T retval = ((val & 1u) == 0) ? halfval : oddsum;
239-
T retval = conditional_select(((val & 1u) == 0), halfval, oddsum);
239+
T retval = ::hurchalla::cselect_on_bit<0>::eq_0(
240+
static_cast<uint64_t>(val), halfval, oddsum);
240241

241242
HPBC_CLOCKWORK_POSTCONDITION2(retval < n_);
242243
return V(retval);

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/MontyHalfRange.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,8 @@ class MontyHalfRange final :
536536
// S retval = (val % 2 == 0) ? halfval : oddsum;
537537
static_assert(static_cast<S>(-1) == ~(static_cast<S>(0)),
538538
"S must use two's complement representation");
539-
S retval = conditional_select(((static_cast<T>(val) & 1u) == 0), halfval, oddsum);
539+
S retval = ::hurchalla::cselect_on_bit<0>::eq_0(
540+
static_cast<uint64_t>(val), halfval, oddsum);
540541

541542
// It's fairly straightforward why retval works when val >= 0
542543
// it's basically the same situation as halve() in MontyFullRange,
@@ -584,7 +585,8 @@ class MontyHalfRange final :
584585
T oddhalf = static_cast<T>(val + n_) >> 1;
585586
HPBC_CLOCKWORK_ASSERT2(oddhalf < n_);
586587
// T retval = ((val & 1u) == 0) ? evenhalf : oddhalf;
587-
T retval = conditional_select(((val & 1u) == 0), evenhalf, oddhalf);
588+
T retval = ::hurchalla::cselect_on_bit<0>::eq_0(
589+
static_cast<uint64_t>(val), evenhalf, oddhalf);
588590

589591
HPBC_CLOCKWORK_POSTCONDITION2(0 <= retval && retval < n_);
590592
return C(retval);

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/MontyQuarterRange.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,8 @@ class MontyQuarterRange final : public
355355
// And we know by the assertion above that oddhalf fits in V.
356356

357357
// T retval = ((val & 1u) == 0) ? evenhalf : oddhalf;
358-
T retval = conditional_select(((val & 1u) == 0), evenhalf, oddhalf);
358+
T retval = ::hurchalla::cselect_on_bit<0>::eq_0(
359+
static_cast<uint64_t>(val), evenhalf, oddhalf);
359360

360361
HPBC_CLOCKWORK_POSTCONDITION2(retval < 2*n_);
361362
return V(retval);
@@ -375,7 +376,8 @@ class MontyQuarterRange final : public
375376
// since val < n, (val + n)/2 < n.
376377
T oddhalf = static_cast<T>(val + n_) >> 1;
377378
// T retval = ((val & 1u) == 0) ? evenhalf : oddhalf;
378-
T retval = conditional_select(((val & 1u) == 0), evenhalf, oddhalf);
379+
T retval = ::hurchalla::cselect_on_bit<0>::eq_0(
380+
static_cast<uint64_t>(val), evenhalf, oddhalf);
379381

380382
HPBC_CLOCKWORK_POSTCONDITION2(retval < n_);
381383
return C(retval);

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/MontyWrappedStandardMath.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,8 @@ class MontyWrappedStandardMath final {
287287

288288
T oddsum = halfval + halfn_ceiling;
289289
// T retval = ((val & 1u) == 0) ? halfval : oddsum;
290-
T retval = conditional_select(((val & 1u) == 0), halfval, oddsum);
290+
T retval = ::hurchalla::cselect_on_bit<0>::eq_0(
291+
static_cast<uint64_t>(val), halfval, oddsum);
291292

292293
HPBC_CLOCKWORK_POSTCONDITION2(retval < modulus_);
293294
return C(retval);

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/MontyFullRangeMasked.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,8 @@ class MontyFullRangeMasked final :
352352

353353
T oddsum = halfval + halfn_ceiling;
354354
// T retval = ((val & 1u) == 0) ? halfval : oddsum;
355-
T retval = conditional_select(((val & 1u) == 0), halfval, oddsum);
355+
T retval = ::hurchalla::cselect_on_bit<0>::eq_0(
356+
static_cast<uint64_t>(val), halfval, oddsum);
356357

357358
HPBC_CLOCKWORK_POSTCONDITION2(retval < n_);
358359
return C(retval);

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/platform_specific/montgomery_pow.h

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
namespace hurchalla { namespace detail {
2525

2626

27+
//#define HURCHALLA_MONTGOMERY_POW_USE_CSELECT_ON_BIT 1
28+
29+
2730
// This file is intended to implement the class MontgomeryForm's member
2831
// functions pow() and array_pow().
2932

@@ -41,8 +44,14 @@ struct montgomery_pow {
4144
// Applied Handbook of Cryptography- http://cacr.uwaterloo.ca/hac/
4245
// See also: hurchalla/modular_arithmetic/detail/impl_modular_pow.h
4346
V mont_one = mf.getUnityValue();
47+
#ifndef HURCHALLA_MONTGOMERY_POW_USE_CSELECT_ON_BIT
48+
// see comments for the cmov #ifndef section further below in this function
4449
V result = mont_one;
4550
result.cmov((exponent & static_cast<T>(1)), base);
51+
#else
52+
V result = V::template cselect_on_bit_ne0<0>(static_cast<uint64_t>(exponent), base, mont_one);
53+
#endif
54+
4655
while (exponent > static_cast<T>(1)) {
4756
exponent = static_cast<T>(exponent >> static_cast<T>(1));
4857
base = mf.template square<LowlatencyTag>(base);
@@ -67,11 +76,12 @@ struct montgomery_pow {
6776
// in theory should run faster. And in practice it has consistently
6877
// benchmarked better - usually ~5% faster, and never slower than above.
6978

70-
# if 1
79+
# ifndef HURCHALLA_MONTGOMERY_POW_USE_CSELECT_ON_BIT
7180
V tmp = mont_one;
7281
tmp.cmov(exponent & static_cast<T>(1), base);
7382
# else
74-
// this timed a little faster for MontyFull and MontyMasked, and a
83+
// with HURCHALLA_ALLOW_INLINE_ASM_CSELECT_ON_BIT defined, this timed a
84+
// little faster than the above for MontyFull and MontyMasked, and a
7585
// little slower (or a lot slower- gcc MontyHalf) for the rest.
7686
V tmp = V::template cselect_on_bit_ne0<0>(static_cast<uint64_t>(exponent), base, mont_one);
7787
# endif
@@ -198,8 +208,13 @@ struct montgomery_pow {
198208

199209
V mont_one = mf.getUnityValue();
200210
Unroll<NUM_BASES>::call([&](std::size_t i) HURCHALLA_INLINE_LAMBDA {
211+
# ifndef HURCHALLA_MONTGOMERY_POW_USE_CSELECT_ON_BIT
201212
V tmp = mont_one;
202213
tmp.cmov(exponent & static_cast<T>(1), bases[i]);
214+
# else
215+
V tmp = V::template cselect_on_bit_ne0<0>(
216+
static_cast<uint64_t>(exponent), bases[i], mont_one);
217+
# endif
203218
result[i] = mf.template multiply<PTAG>(result[i], tmp);
204219
});
205220
#endif
@@ -238,6 +253,8 @@ struct montgomery_pow {
238253
Unroll<NUM_BASES>::call([&](std::size_t i) HURCHALLA_INLINE_LAMBDA {
239254
bases[i] = mf.template square<PTAG>(bases[i]);
240255
V tmp = mont_one;
256+
// note: since we are doing masked selections, we definitely don't
257+
// want to use cselect_on_bit here
241258
tmp.template
242259
cmov<CSelectMaskedTag>(exponent & static_cast<T>(1), bases[i]);
243260
result[i] = mf.template multiply<PTAG>(result[i], tmp);

0 commit comments

Comments
 (0)