Skip to content

Commit ea9f748

Browse files
committed
udpate benchmarking of montgomery 2^k-ary pow
1 parent 5c3b570 commit ea9f748

File tree

2 files changed

+186
-52
lines changed

2 files changed

+186
-52
lines changed

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_pow_2kary/testbench_2kary.sh

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,17 +57,47 @@ cpp_standard=c++17
5757

5858
# You can use arguments $9 and ${10} and ${11} etc to define macros such as
5959
# -DHURCHALLA_ALLOW_INLINE_ASM_ALL
60-
# for debugging, defining the following macros may be useful
60+
# For debugging, defining the following macros may be useful
6161
# -DHURCHALLA_CLOCKWORK_ENABLE_ASSERTS -DHURCHALLA_UTIL_ENABLE_ASSERTS
6262

63+
append_if_set() {
64+
local array_name="$1"
65+
local value="$2"
66+
if [ -n "$value" ]; then
67+
eval "$array_name+=(\"\$value\")"
68+
fi
69+
}
70+
extra_args=()
71+
append_if_set extra_args "$9"
72+
append_if_set extra_args "${10}"
73+
append_if_set extra_args "${11}"
74+
append_if_set extra_args "${12}"
75+
append_if_set extra_args "${13}"
76+
append_if_set extra_args "${14}"
77+
append_if_set extra_args "${15}"
78+
append_if_set extra_args "${16}"
79+
append_if_set extra_args "${17}"
80+
append_if_set extra_args "${18}"
81+
append_if_set extra_args "${19}"
82+
append_if_set extra_args "${20}"
83+
append_if_set extra_args "${21}"
84+
append_if_set extra_args "${22}"
85+
append_if_set extra_args "${23}"
86+
append_if_set extra_args "${24}"
87+
append_if_set extra_args "${25}"
88+
append_if_set extra_args "${26}"
89+
append_if_set extra_args "${27}"
90+
append_if_set extra_args "${28}"
91+
append_if_set extra_args "${29}"
92+
append_if_set extra_args "${30}"
93+
6394

6495
# we could also use -g to get debug symbols (for lldb/gdb, and objdump)
6596

6697
$cppcompiler \
6798
$error_limit -$optimization_level \
6899
$define_mont_type $define_uint_type $define_test_type \
69-
$9 ${10} ${11} ${12} ${13} ${14} ${15} ${16} ${17} ${18} ${19} ${20} \
70-
${21} ${22} ${23} ${24} ${25} ${26} ${27} ${28} ${29} ${30} \
100+
"${extra_args[@]}" \
71101
-Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion $warn_nrvo \
72102
-std=$cpp_standard \
73103
-I${repo_directory}/modular_arithmetic/modular_arithmetic/include \

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_pow_2kary/testbench_montgomery_pow_2kary.cpp

Lines changed: 153 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <iterator>
2727
#include <random>
2828
#include <cstdlib>
29+
#include <type_traits>
2930

3031

3132
#if defined(HURCHALLA_CLOCKWORK_ENABLE_ASSERTS) || defined(HURCHALLA_UTIL_ENABLE_ASSERTS)
@@ -1287,68 +1288,140 @@ void bench_PA_6(std::vector<TimingPA>& vecTimingPA,
12871288
// typename U, typename ST>
12881289
//TimingPA bench_partial_array_pow(U min, U range, U& totalU, unsigned int max_modulus_bits_reduce, ST seed, unsigned int exponent_bits_reduce)
12891290

1291+
#if defined(USE_PA6_2_6) || defined(USE_PA6_2_7) || defined(USE_PA6_2_8) || defined(USE_PA6_2_9) || \
1292+
defined(USE_PA6_2_10) || defined(USE_PA6_2_11) || defined(USE_PA6_2_12) || defined(USE_PA6_2_13) || \
1293+
defined(USE_PA6_2_14) || defined(USE_PA6_2_15) || defined(USE_PA6_2_16) || \
1294+
defined(USE_PA6_3_6) || defined(USE_PA6_3_7) || defined(USE_PA6_3_8) || defined(USE_PA6_3_9) || \
1295+
defined(USE_PA6_3_10) || defined(USE_PA6_3_11) || defined(USE_PA6_3_12) || \
1296+
defined(USE_PA6_4_6) || defined(USE_PA6_4_7) || defined(USE_PA6_4_8) || defined(USE_PA6_4_9)
1297+
// do nothing
1298+
#else
1299+
# define USE_PA6_2_6
1300+
# define USE_PA6_2_7
1301+
# define USE_PA6_2_8
1302+
# define USE_PA6_2_9
1303+
# define USE_PA6_2_10
1304+
# define USE_PA6_2_11
1305+
# define USE_PA6_2_12
1306+
# define USE_PA6_2_13
1307+
# define USE_PA6_2_14
1308+
# define USE_PA6_2_15
1309+
# define USE_PA6_2_16
1310+
# define USE_PA6_3_6
1311+
# define USE_PA6_3_7
1312+
# define USE_PA6_3_8
1313+
# define USE_PA6_3_9
1314+
# define USE_PA6_3_10
1315+
# define USE_PA6_3_11
1316+
# define USE_PA6_3_12
1317+
# define USE_PA6_4_6
1318+
# define USE_PA6_4_7
1319+
# define USE_PA6_4_8
1320+
# define USE_PA6_4_9
1321+
#endif
1322+
1323+
#ifdef USE_PA6_2_7
12901324
vecTimingPA.push_back(bench_partial_array_pow
12911325
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
12921326
PTAG, 2, 7, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1327+
#endif
1328+
#ifdef USE_PA6_2_8
12931329
vecTimingPA.push_back(bench_partial_array_pow
12941330
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
12951331
PTAG, 2, 8, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1332+
#endif
1333+
#ifdef USE_PA6_2_9
12961334
vecTimingPA.push_back(bench_partial_array_pow
12971335
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
12981336
PTAG, 2, 9, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1337+
#endif
1338+
#ifdef USE_PA6_2_10
12991339
vecTimingPA.push_back(bench_partial_array_pow
13001340
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13011341
PTAG, 2, 10, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1342+
#endif
1343+
#ifdef USE_PA6_2_11
13021344
vecTimingPA.push_back(bench_partial_array_pow
13031345
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13041346
PTAG, 2, 11, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1347+
#endif
1348+
#ifdef USE_PA6_2_12
13051349
vecTimingPA.push_back(bench_partial_array_pow
13061350
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13071351
PTAG, 2, 12, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1352+
#endif
1353+
#ifdef USE_PA6_2_13
13081354
vecTimingPA.push_back(bench_partial_array_pow
13091355
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13101356
PTAG, 2, 13, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1357+
#endif
1358+
#ifdef USE_PA6_2_14
13111359
vecTimingPA.push_back(bench_partial_array_pow
13121360
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13131361
PTAG, 2, 14, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1362+
#endif
1363+
#ifdef USE_PA6_2_15
13141364
vecTimingPA.push_back(bench_partial_array_pow
13151365
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13161366
PTAG, 2, 15, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1367+
#endif
1368+
#ifdef USE_PA6_2_16
13171369
vecTimingPA.push_back(bench_partial_array_pow
13181370
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13191371
PTAG, 2, 16, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1372+
#endif
13201373

1374+
#ifdef USE_PA6_3_6
13211375
vecTimingPA.push_back(bench_partial_array_pow
13221376
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13231377
PTAG, 3, 6, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1378+
#endif
1379+
#ifdef USE_PA6_3_7
13241380
vecTimingPA.push_back(bench_partial_array_pow
13251381
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13261382
PTAG, 3, 7, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1383+
#endif
1384+
#ifdef USE_PA6_3_8
13271385
vecTimingPA.push_back(bench_partial_array_pow
13281386
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13291387
PTAG, 3, 8, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1388+
#endif
1389+
#ifdef USE_PA6_3_9
13301390
vecTimingPA.push_back(bench_partial_array_pow
13311391
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13321392
PTAG, 3, 9, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1393+
#endif
1394+
#ifdef USE_PA6_3_10
13331395
vecTimingPA.push_back(bench_partial_array_pow
13341396
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13351397
PTAG, 3, 10, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1398+
#endif
1399+
#ifdef USE_PA6_3_11
13361400
vecTimingPA.push_back(bench_partial_array_pow
13371401
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13381402
PTAG, 3, 11, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1403+
#endif
13391404

1405+
#ifdef USE_PA6_4_6
13401406
vecTimingPA.push_back(bench_partial_array_pow
13411407
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13421408
PTAG, 4, 6, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1409+
#endif
1410+
#ifdef USE_PA6_4_7
13431411
vecTimingPA.push_back(bench_partial_array_pow
13441412
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13451413
PTAG, 4, 7, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1414+
#endif
1415+
#ifdef USE_PA6_4_8
13461416
vecTimingPA.push_back(bench_partial_array_pow
13471417
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13481418
PTAG, 4, 8, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1419+
#endif
1420+
#ifdef USE_PA6_4_9
13491421
vecTimingPA.push_back(bench_partial_array_pow
13501422
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
13511423
PTAG, 4, 9, ARRAY_SIZE, MontType, USE_SQUARING_VALUE_OPTIMIZATION, USE_SLIDING_WINDOW_OPTIMIZATION>(static_cast<U>(maxU - range), range, dummy, mmbr, seed, ebr));
1424+
#endif
13521425
}
13531426

13541427

@@ -1560,25 +1633,28 @@ void bench_PA_PTAG(std::vector<TimingPA>& vecTimingPA,
15601633
// typename U, typename ST>
15611634
//void bench_PA_2a(...)
15621635

1636+
#ifndef SKIP_PA_2
15631637
bench_PA_2a<PTAG, ARRAY_SIZE, MontType, false, false>(vecTimingPA, maxU, range, dummy, mmbr, seed, ebr);
15641638
bench_PA_2a<PTAG, ARRAY_SIZE, MontType, false, true>(vecTimingPA, maxU, range, dummy, mmbr, seed, ebr);
1565-
1639+
#endif
1640+
#ifndef SKIP_PA_1
15661641
bench_PA_1a<PTAG, ARRAY_SIZE, MontType, false, false>(vecTimingPA, maxU, range, dummy, mmbr, seed, ebr);
15671642
bench_PA_1a<PTAG, ARRAY_SIZE, MontType, false, true>(vecTimingPA, maxU, range, dummy, mmbr, seed, ebr);
1568-
1643+
#endif
15691644
bench_PA_6b<PTAG, ARRAY_SIZE, MontType, false, false>(vecTimingPA, maxU, range, dummy, mmbr, seed, ebr);
15701645
bench_PA_6b<PTAG, ARRAY_SIZE, MontType, false, true>(vecTimingPA, maxU, range, dummy, mmbr, seed, ebr);
15711646

15721647

15731648
if constexpr (std::is_same<typename MontType::MontType::MontyTag,
15741649
::hurchalla::detail::TagMontyFullrange>::value) {
1575-
1650+
#ifndef SKIP_PA_2
15761651
bench_PA_2a<PTAG, ARRAY_SIZE, MontType, true, false>(vecTimingPA, maxU, range, dummy, mmbr, seed, ebr);
15771652
bench_PA_2a<PTAG, ARRAY_SIZE, MontType, true, true>(vecTimingPA, maxU, range, dummy, mmbr, seed, ebr);
1578-
1653+
#endif
1654+
#ifndef SKIP_PA_1
15791655
bench_PA_1a<PTAG, ARRAY_SIZE, MontType, true, false>(vecTimingPA, maxU, range, dummy, mmbr, seed, ebr);
15801656
bench_PA_1a<PTAG, ARRAY_SIZE, MontType, true, true>(vecTimingPA, maxU, range, dummy, mmbr, seed, ebr);
1581-
1657+
#endif
15821658
bench_PA_6b<PTAG, ARRAY_SIZE, MontType, true, false>(vecTimingPA, maxU, range, dummy, mmbr, seed, ebr);
15831659
bench_PA_6b<PTAG, ARRAY_SIZE, MontType, true, true>(vecTimingPA, maxU, range, dummy, mmbr, seed, ebr);
15841660
}
@@ -1601,6 +1677,70 @@ void bench_PA_all(std::vector<TimingPA>& vecTimingPA,
16011677

16021678

16031679

1680+
#ifdef PERF_MACRO
1681+
1682+
1683+
//#define XSTR(x) STR(x)
1684+
//#define STR(x) #x
1685+
//#pragma message "The value of PERF_MACRO is: " XSTR(PERF_MACRO)
1686+
1687+
//float invalid_assignment = PERF_MACRO;
1688+
1689+
1690+
template <int i>
1691+
constexpr bool bool_parse_perf_macro()
1692+
{
1693+
static_assert(i == 5 || i == 7 ||
1694+
(i >= 13 && i <= 18), "");
1695+
constexpr char parse_string[] = PERF_MACRO;
1696+
return (parse_string[i] == 't');
1697+
}
1698+
1699+
constexpr int table_bits_perf_macro()
1700+
{
1701+
constexpr char parse_string[] = PERF_MACRO;
1702+
return (parse_string[0] - '0');
1703+
}
1704+
constexpr int code_section_perf_macro()
1705+
{
1706+
constexpr char parse_string[] = PERF_MACRO;
1707+
int val = parse_string[2] - '0';
1708+
val = 10*val + parse_string[3] - '0';
1709+
return val;
1710+
}
1711+
constexpr int array_size_perf_macro()
1712+
{
1713+
constexpr char parse_string[] = PERF_MACRO;
1714+
int val = parse_string[9] - '0';
1715+
val = 10*val + parse_string[10] - '0';
1716+
return val;
1717+
}
1718+
constexpr bool is_low_uops_perf_macro()
1719+
{
1720+
constexpr char parse_string[] = PERF_MACRO;
1721+
return (parse_string[12] == 'u');
1722+
}
1723+
1724+
constexpr int TableBits = table_bits_perf_macro();
1725+
constexpr int CodeSection = code_section_perf_macro();
1726+
constexpr int ArraySize = array_size_perf_macro();
1727+
1728+
constexpr bool UseSlidingWindow = bool_parse_perf_macro<5>();
1729+
constexpr bool UseSquaringValue = bool_parse_perf_macro<7>();
1730+
1731+
constexpr bool UseLowUops = is_low_uops_perf_macro();
1732+
1733+
constexpr bool UseEarlyExitInInit = bool_parse_perf_macro<13>();
1734+
constexpr bool UnrollTablesizeInInit = bool_parse_perf_macro<14>();
1735+
constexpr bool UnrollArraySize = bool_parse_perf_macro<15>();
1736+
constexpr bool UnrollNumTablesInit = bool_parse_perf_macro<16>();
1737+
constexpr bool UnrollTableBits = bool_parse_perf_macro<17>();
1738+
constexpr bool UnrollNumTablesMainloop = bool_parse_perf_macro<18>();
1739+
1740+
#endif
1741+
1742+
1743+
16041744

16051745

16061746
int main(int argc, char** argv)
@@ -1708,61 +1848,25 @@ using namespace hurchalla;
17081848
for (size_t i=0; i<4; ++i) {
17091849
for (size_t j=0; j<timingPA[i].size(); ++j) {
17101850

1711-
#if 0
1712-
bench_PA_all<2, MontType>(timingPA[i][j], maxU, range, dummy, mmbr[i], seed, ebr[i]);
1713-
1714-
#elif 1
1715-
//3 07 t t 02 yxxxxxx
1716-
constexpr bool UseEarlyExitInInit = true;
1717-
constexpr bool UnrollTablesizeInInit = false;
1718-
constexpr bool UnrollArraySize = false;
1719-
constexpr bool UnrollNumTablesInit = false;
1720-
constexpr bool UnrollTableBits = false;
1721-
constexpr bool UnrollNumTablesMainloop = false;
1722-
timingPA[i][j].push_back(bench_partial_array_pow
1723-
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
1724-
LowlatencyTag, 3, 7, 2, MontType, true, false>(static_cast<U>(maxU - range), range, dummy, mmbr[i], seed, ebr[i]));
1851+
#if defined(PERF_MACRO)
17251852

1726-
#elif 1
1727-
// template <bool UseEarlyExitInInit,
1728-
// bool UnrollTablesizeInInit,
1729-
// bool UnrollArraySize,
1730-
// bool UnrollNumTablesInit,
1731-
// bool UnrollTableBits,
1732-
// bool UnrollNumTablesMainloop,
1733-
// class PTAG, size_t TABLE_BITS, size_t CODE_SECTION, size_t ARRAY_SIZE,
1734-
// class MontType, bool USE_SQUARING_VALUE_OPTIMIZATION, bool USE_SLIDING_WINDOW_OPTIMIZATION,
1735-
// typename U, typename ST>
1736-
//TimingPA bench_partial_array_pow(U min, U range, U& totalU, unsigned int max_modulus_bits_reduce, ST seed, unsigned int exponent_bits_reduce)
1737-
constexpr bool UseEarlyExitInInit = false;
1738-
constexpr bool UnrollTablesizeInInit = false;
1739-
constexpr bool UnrollArraySize = false;
1740-
constexpr bool UnrollNumTablesInit = false;
1741-
constexpr bool UnrollTableBits = false;
1742-
constexpr bool UnrollNumTablesMainloop = false;
1743-
timingPA[i][j].push_back(bench_partial_array_pow
1744-
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
1745-
LowuopsTag, 2, 12, 3, MontType, false, false>(static_cast<U>(maxU - range), range, dummy, mmbr[i], seed, ebr[i]));
1746-
timingPA[i][j].push_back(bench_partial_array_pow
1747-
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
1748-
LowuopsTag, 2, 15, 3, MontType, false, false>(static_cast<U>(maxU - range), range, dummy, mmbr[i], seed, ebr[i]));
1853+
using PERF_PTAG = std::conditional<UseLowUops, hurchalla::LowuopsTag, hurchalla::LowlatencyTag>::type;
17491854
timingPA[i][j].push_back(bench_partial_array_pow
17501855
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
1751-
LowuopsTag, 2, 12, 4, MontType, false, false>(static_cast<U>(maxU - range), range, dummy, mmbr[i], seed, ebr[i]));
1752-
timingPA[i][j].push_back(bench_partial_array_pow
1753-
<UseEarlyExitInInit, UnrollTablesizeInInit, UnrollArraySize, UnrollNumTablesInit, UnrollTableBits, UnrollNumTablesMainloop,
1754-
LowuopsTag, 2, 15, 4, MontType, false, false>(static_cast<U>(maxU - range), range, dummy, mmbr[i], seed, ebr[i]));
1856+
PERF_PTAG, TableBits, CodeSection, ArraySize, MontType, UseSquaringValue, UseSlidingWindow>(static_cast<U>(maxU - range), range, dummy, mmbr[i], seed, ebr[i]));
1857+
17551858
#else
1756-
bench_PA_all<2, MontType>(timingPA[i][j], maxU, range, dummy, mmbr[i], seed, ebr[i]);
1757-
bench_PA_all<3, MontType>(timingPA[i][j], maxU, range, dummy, mmbr[i], seed, ebr[i]);
1859+
// bench_PA_all<2, MontType>(timingPA[i][j], maxU, range, dummy, mmbr[i], seed, ebr[i]);
1860+
// bench_PA_all<3, MontType>(timingPA[i][j], maxU, range, dummy, mmbr[i], seed, ebr[i]);
17581861
bench_PA_all<4, MontType>(timingPA[i][j], maxU, range, dummy, mmbr[i], seed, ebr[i]);
1759-
bench_PA_all<5, MontType>(timingPA[i][j], maxU, range, dummy, mmbr[i], seed, ebr[i]);
1862+
/* bench_PA_all<5, MontType>(timingPA[i][j], maxU, range, dummy, mmbr[i], seed, ebr[i]);
17601863
bench_PA_all<6, MontType>(timingPA[i][j], maxU, range, dummy, mmbr[i], seed, ebr[i]);
17611864
bench_PA_all<7, MontType>(timingPA[i][j], maxU, range, dummy, mmbr[i], seed, ebr[i]);
17621865
bench_PA_all<8, MontType>(timingPA[i][j], maxU, range, dummy, mmbr[i], seed, ebr[i]);
17631866
bench_PA_all<10, MontType>(timingPA[i][j], maxU, range, dummy, mmbr[i], seed, ebr[i]);
17641867
bench_PA_all<12, MontType>(timingPA[i][j], maxU, range, dummy, mmbr[i], seed, ebr[i]);
17651868
bench_PA_all<14, MontType>(timingPA[i][j], maxU, range, dummy, mmbr[i], seed, ebr[i]);
1869+
*/
17661870
#endif
17671871

17681872
}

0 commit comments

Comments
 (0)