1414#include " hurchalla/util/traits/ut_numeric_limits.h"
1515#include " hurchalla/util/count_leading_zeros.h"
1616#include " hurchalla/util/compiler_macros.h"
17+ #include " hurchalla/util/branchless_shift_right.h"
1718#include " hurchalla/modular_arithmetic/detail/clockwork_programming_by_contract.h"
1819#include " hurchalla/util/traits/extensible_make_unsigned.h"
1920#include < type_traits>
@@ -208,15 +209,15 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
208209 HPBC_CLOCKWORK_ASSERT (numbits > P);
209210
210211 int shift = numbits - P;
211- U tmp = n >> shift;
212+ U tmp = branchless_shift_right (n, shift) ;
212213 HPBC_CLOCKWORK_ASSERT (tmp <= MASK);
213214 // normally we'd use (tmp & MASK), but it's redundant with tmp <= MASK
214215 size_t index = static_cast <size_t >(tmp);
215216 result = table[index];
216217
217218 while (shift >= P) {
218219 if HURCHALLA_CPP17_CONSTEXPR (USE_SLIDING_WINDOW_OPTIMIZATION) {
219- while (shift > P && (static_cast <size_t >(n >> ( shift-1 )) & 1u ) == 0 ) {
220+ while (shift > P && (static_cast <size_t >(branchless_shift_right (n, shift-1 )) & 1u ) == 0 ) {
220221 result = mf.square (result);
221222 --shift;
222223 }
@@ -236,7 +237,7 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
236237 }
237238
238239 shift -= P;
239- index = static_cast <size_t >(n >> shift) & MASK;
240+ index = static_cast <size_t >(branchless_shift_right (n, shift) ) & MASK;
240241 result = mf.multiply (result, table[index]);
241242 }
242243
@@ -314,7 +315,7 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
314315 int shift = numbits - NUMBITS_MASKBIG;
315316
316317 HPBC_CLOCKWORK_ASSERT2 (shift > 0 );
317- size_t tmp = static_cast <size_t >(n >> shift);
318+ size_t tmp = static_cast <size_t >(branchless_shift_right (n, shift) );
318319 HPBC_CLOCKWORK_ASSERT2 (tmp <= MASKBIG);
319320 size_t loindex = tmp & MASK;
320321 size_t hiindex = tmp >> TABLE_BITS;
@@ -323,15 +324,15 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
323324
324325 while (shift >= NUMBITS_MASKBIG) {
325326 if HURCHALLA_CPP17_CONSTEXPR (USE_SLIDING_WINDOW_OPTIMIZATION) {
326- while (shift > NUMBITS_MASKBIG && (static_cast <size_t >(n>>( shift-1 )) & 1u ) == 0 ) {
327+ while (shift > NUMBITS_MASKBIG && (static_cast <size_t >(branchless_shift_right (n, shift-1 )) & 1u ) == 0 ) {
327328 result = mf.square (result);
328329 --shift;
329330 }
330331 }
331332 HPBC_CLOCKWORK_ASSERT2 (shift >= NUMBITS_MASKBIG);
332333
333334 shift -= NUMBITS_MASKBIG;
334- tmp = static_cast <size_t >(n >> shift);
335+ tmp = static_cast <size_t >(branchless_shift_right (n, shift) );
335336 loindex = tmp & MASK;
336337 hiindex = (tmp >> TABLE_BITS) & MASK;
337338
@@ -388,7 +389,7 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
388389 shift = numbits - NUMBITS_MASKBIG;
389390 }
390391 HPBC_CLOCKWORK_ASSERT2 (shift >= 0 );
391- size_t tmp = static_cast <size_t >(n >> shift);
392+ size_t tmp = static_cast <size_t >(branchless_shift_right (n, shift) );
392393 HPBC_CLOCKWORK_ASSERT2 (tmp <= MASKBIG);
393394
394395 size_t index1 = tmp & MASK;
@@ -457,15 +458,15 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
457458
458459 while (shift >= NUMBITS_MASKBIG) {
459460 if HURCHALLA_CPP17_CONSTEXPR (USE_SLIDING_WINDOW_OPTIMIZATION) {
460- while (shift > NUMBITS_MASKBIG && (static_cast <size_t >(n>>( shift-1 )) & 1u ) == 0 ) {
461+ while (shift > NUMBITS_MASKBIG && (static_cast <size_t >(branchless_shift_right (n, shift-1 )) & 1u ) == 0 ) {
461462 result = mf.square (result);
462463 --shift;
463464 }
464465 }
465466 HPBC_CLOCKWORK_ASSERT2 (shift >= NUMBITS_MASKBIG);
466467
467468 shift -= NUMBITS_MASKBIG;
468- tmp = static_cast <size_t >(n >> shift);
469+ tmp = static_cast <size_t >(branchless_shift_right (n, shift) );
469470
470471 index1 = tmp & MASK;
471472 index2 = (tmp >> TABLE_BITS) & MASK;
@@ -564,7 +565,7 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
564565 }
565566 HPBC_CLOCKWORK_ASSERT2 (shift >= 0 );
566567
567- size_t tmp = static_cast <size_t >(n >> shift);
568+ size_t tmp = static_cast <size_t >(branchless_shift_right (n, shift) );
568569 HPBC_CLOCKWORK_ASSERT2 (tmp <= MASKBIG);
569570 V result = table[0 ][tmp & MASK];
570571
@@ -596,15 +597,15 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
596597
597598 while (shift >= NUMBITS_MASKBIG) {
598599 if HURCHALLA_CPP17_CONSTEXPR (USE_SLIDING_WINDOW_OPTIMIZATION) {
599- while (shift > NUMBITS_MASKBIG && (static_cast <size_t >(n>>( shift-1 )) & 1u ) == 0 ) {
600+ while (shift > NUMBITS_MASKBIG && (static_cast <size_t >(branchless_shift_right (n, shift-1 )) & 1u ) == 0 ) {
600601 result = mf.square (result);
601602 --shift;
602603 }
603604 }
604605 HPBC_CLOCKWORK_ASSERT2 (shift >= NUMBITS_MASKBIG);
605606
606607 shift -= NUMBITS_MASKBIG;
607- tmp = static_cast <size_t >(n >> shift);
608+ tmp = static_cast <size_t >(branchless_shift_right (n, shift) );
608609 V val1 = table[0 ][tmp & MASK];
609610
610611 if HURCHALLA_CPP17_CONSTEXPR (USE_SQUARING_VALUE_OPTIMIZATION) {
@@ -755,9 +756,9 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
755756
756757 int shift = numbits - P;
757758 HURCHALLA_REQUEST_UNROLL_LOOP for (size_t j=0 ; j<ARRAY_SIZE; ++j) {
758- HPBC_CLOCKWORK_ASSERT (static_cast <U>(n[j] >> shift) <= MASK);
759- // We don't need to 'and' with MASK, because (n[j] >> shift) <= MASK.
760- size_t index = static_cast <size_t >(n[j] >> shift);
759+ HPBC_CLOCKWORK_ASSERT (static_cast <U>(branchless_shift_right ( n[j], shift) ) <= MASK);
760+ // We don't need to 'and' with MASK, because (branchless_shift_right( n[j], shift) ) <= MASK.
761+ size_t index = static_cast <size_t >(branchless_shift_right ( n[j], shift) );
761762 result[j] = table[index][j];
762763 }
763764
@@ -784,7 +785,7 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
784785 }
785786
786787 HURCHALLA_REQUEST_UNROLL_LOOP for (size_t j=0 ; j<ARRAY_SIZE; ++j) {
787- size_t index = static_cast <size_t >(n[j] >> shift) & MASK;
788+ size_t index = static_cast <size_t >(branchless_shift_right ( n[j], shift) ) & MASK;
788789 result[j] = mf[j].template multiply <LowuopsTag>(
789790 result[j], table[index][j]);
790791 }
@@ -888,7 +889,7 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
888889 HPBC_CLOCKWORK_ASSERT (numbits > P);
889890
890891 int shift = numbits - P;
891- size_t index = static_cast <size_t >(n >> shift);
892+ size_t index = static_cast <size_t >(branchless_shift_right (n, shift) );
892893 HPBC_CLOCKWORK_ASSERT (index <= MASK);
893894 HURCHALLA_REQUEST_UNROLL_LOOP for (size_t j=0 ; j<ARRAY_SIZE; ++j) {
894895 // normally we'd use (index & MASK), but it's redundant with index <= MASK
@@ -898,7 +899,7 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
898899
899900 while (shift >= P) {
900901 if (USE_SLIDING_WINDOW_OPTIMIZATION) {
901- while (shift > P && (static_cast <size_t >(n >> ( shift-1 )) & 1u ) == 0 ) {
902+ while (shift > P && (static_cast <size_t >(branchless_shift_right (n, shift-1 )) & 1u ) == 0 ) {
902903 HURCHALLA_REQUEST_UNROLL_LOOP for (size_t j=0 ; j<ARRAY_SIZE; ++j)
903904 result[j] = mf.template square <LowuopsTag>(result[j]);
904905 --shift;
@@ -924,7 +925,7 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
924925 }
925926
926927 shift -= P;
927- index = static_cast <size_t >(n >> shift) & MASK;
928+ index = static_cast <size_t >(branchless_shift_right (n, shift) ) & MASK;
928929 HURCHALLA_REQUEST_UNROLL_LOOP for (size_t j=0 ; j<ARRAY_SIZE; ++j) {
929930 result[j] = mf.template multiply <LowuopsTag>(result[j], table[index][j]);
930931 }
@@ -1009,9 +1010,9 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
10091010
10101011 int shift = numbits - P;
10111012 HPBC_CLOCKWORK_ASSERT (shift >= 0 );
1012- HPBC_CLOCKWORK_ASSERT ((n >> shift) <= MASK);
1013+ HPBC_CLOCKWORK_ASSERT ((branchless_shift_right (n, shift) ) <= MASK);
10131014 // due to above assert, we don't need to 'and' with MASK
1014- size_t index = static_cast <size_t >(n >> shift);
1015+ size_t index = static_cast <size_t >(branchless_shift_right (n, shift) );
10151016
10161017 // because the highest set bit of n is by definition a 1, we know
10171018 HPBC_CLOCKWORK_ASSERT ((index >> (P-1 )) == 1u ); // and thus
@@ -1027,7 +1028,7 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
10271028
10281029 while (shift >= P) {
10291030 if (USE_SLIDING_WINDOW_OPTIMIZATION) {
1030- while (shift > P && (static_cast <size_t >(n >> ( shift-1 )) & 1u ) == 0 ) {
1031+ while (shift > P && (static_cast <size_t >(branchless_shift_right (n, shift-1 )) & 1u ) == 0 ) {
10311032 HURCHALLA_REQUEST_UNROLL_LOOP for (size_t j=0 ; j<ARRAY_SIZE; ++j)
10321033 result[j] = mf.template square <LowuopsTag>(result[j]);
10331034 --shift;
@@ -1055,7 +1056,7 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
10551056 }
10561057
10571058 shift -= P;
1058- index = static_cast <size_t >(n >> shift) & MASK;
1059+ index = static_cast <size_t >(branchless_shift_right (n, shift) ) & MASK;
10591060
10601061 HURCHALLA_REQUEST_UNROLL_LOOP for (size_t j=0 ; j<ARRAY_SIZE; ++j) {
10611062 V tmp = (index % 2 == 0 ) ? table[index/2 ][j] : result[j];
0 commit comments