You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_two_pow/experimental_montgomery_two_pow.h
+1-1Lines changed: 1 addition & 1 deletion
Original file line number
Diff line number
Diff line change
@@ -3572,7 +3572,7 @@ if HURCHALLA_CPP17_CONSTEXPR (CODE_SECTION == 0) {
Copy file name to clipboardExpand all lines: montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_two_pow/testbench.sh
+2-2Lines changed: 2 additions & 2 deletions
Original file line number
Diff line number
Diff line change
@@ -466,8 +466,6 @@ optimization_level=$2
466
466
define_mont_type=-DDEF_MONT_TYPE=$3
467
467
define_uint_type=-DDEF_UINT_TYPE=$4
468
468
469
-
define_use_asm=$8
470
-
471
469
472
470
cpp_standard=c++17
473
471
@@ -479,7 +477,9 @@ cpp_standard=c++17
479
477
# SET repo_directory TO THE DIRECTORY WHERE YOU CLONED THE HURCHALLA GIT
Copy file name to clipboardExpand all lines: montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/low_level_api/detail/platform_specific/ImplRedc.h
+39-40Lines changed: 39 additions & 40 deletions
Original file line number
Diff line number
Diff line change
@@ -222,13 +222,13 @@ struct RedcIncomplete {
222
222
}
223
223
224
224
225
+
225
226
#if (HURCHALLA_COMPILER_HAS_UINT128_T())
226
-
// It's possible these __uint128_t versions should be better tested than they
227
-
// have been so far - I've used the existing REDC unit tests, but little more.
228
-
// The performance on m2 is excellent, so long as throughput is needed rather
229
-
// than low latency.
230
-
// The performance on x86 is unknown at the time of this writing - I haven't
231
-
// yet tried it on x86.
227
+
// The performance for these __uint128_t versions on m2 is excellent, so long
228
+
// as throughput is needed rather than low latency.
229
+
// Performance benefits on x64 are similar to ARM64 (m2) - these are much
230
+
// faster than the ordinary versions when using LowuopsTag (for throughput),
231
+
// and slower when using LowlatencyTag.
232
232
233
233
234
234
// Calculates the minuend and subtrahend of the REDC, such that the finalized
0 commit comments