Skip to content

Commit 270c6ee

Browse files
committed
move the best (formerly) experimental two_pow code into the main line code
1 parent 2c42c1b commit 270c6ee

File tree

10 files changed

+2900
-708
lines changed

10 files changed

+2900
-708
lines changed

modular_arithmetic/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ include(FetchContent)
7575
FetchContent_Declare(
7676
hurchalla_util
7777
GIT_REPOSITORY https://github.com/hurchalla/util.git
78-
GIT_TAG aa71ce34e12392db20229979801048ff97e6b7da
78+
GIT_TAG 38272aeb2b19a8bce2f96c11bf6e0ecbd9eed25b
7979
)
8080
FetchContent_MakeAvailable(hurchalla_util)
8181

montgomery_arithmetic/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ include(FetchContent)
7979
FetchContent_Declare(
8080
hurchalla_util
8181
GIT_REPOSITORY https://github.com/hurchalla/util.git
82-
GIT_TAG aa71ce34e12392db20229979801048ff97e6b7da
82+
GIT_TAG 38272aeb2b19a8bce2f96c11bf6e0ecbd9eed25b
8383
)
8484
FetchContent_MakeAvailable(hurchalla_util)
8585

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_two_pow/experimental_montgomery_two_pow.h

Lines changed: 780 additions & 5 deletions
Large diffs are not rendered by default.

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_two_pow/testbench_montgomery_two_pow.cpp

Lines changed: 567 additions & 382 deletions
Large diffs are not rendered by default.

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/montgomery_two_pow/two_pow_perf_stats_M2.txt

Lines changed: 315 additions & 0 deletions
Large diffs are not rendered by default.

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/experimental/perf_winners.txt

Lines changed: 0 additions & 42 deletions
This file was deleted.

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/impl_montgomery_two_pow.h

Lines changed: 964 additions & 226 deletions
Large diffs are not rendered by default.

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/detail/platform_specific/montgomery_two_pow.h

Lines changed: 160 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -37,106 +37,233 @@ template <class MontyTag, class CompilerTag, class PowSizeTag>
3737
struct tagged_montgomery_two_pow {};
3838

3939

40-
// -- Generic tunings, measured with mac M2 with no asm enabled --
4140

42-
// Full Specialization: clang and big uint pow and MontgomeryFull.
41+
42+
// These specializations call impl_montgomery_two_pow, using the template
43+
// arguments that were found to perform best in benchmarks for a corresponding
44+
// configuration of compiler/uint_type/Mont_type
45+
46+
47+
// Fyi, the meaning of the impl_montgomery_two_pow::call template parameters are
48+
// scalar call
49+
// template <class MF, typename U,
50+
// bool USE_SLIDING_WINDOW_OPTIMIZATION,
51+
// size_t TABLE_BITS, size_t CODE_SECTION,
52+
// bool USE_SQUARING_VALUE_OPTIMIZATION>
53+
//
54+
// array call
55+
// template <class MF, typename U,
56+
// size_t ARRAY_SIZE, size_t TABLE_BITS, size_t CODE_SECTION,
57+
// bool USE_SQUARING_VALUE_OPTIMIZATION>
58+
59+
60+
// -- the following best performance tunings were measured with mac M2 --
61+
62+
63+
// Partial Specialization: clang and big uint pow.
64+
// Intended for MontgomeryFull, but catches all non-specialized monty types
65+
template <class MontyTag> struct tagged_montgomery_two_pow
66+
<MontyTag, Tag_montgomery_two_pow_clang, Tag_montgomery_two_pow_big>
67+
{
68+
template <class MF, typename U> HURCHALLA_FORCE_INLINE
69+
static typename MF::MontgomeryValue call(const MF& mf, U n)
70+
{
71+
return impl_montgomery_two_pow::call<MF, U, false, 0, 34, true>(mf, n);
72+
}
73+
template <class MF, typename U, std::size_t ARRAY_SIZE> HURCHALLA_FORCE_INLINE
74+
static std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
75+
call(const std::array<MF, ARRAY_SIZE>& mf, const std::array<U, ARRAY_SIZE>& n)
76+
{
77+
return impl_montgomery_two_pow::call<MF, U, ARRAY_SIZE, 0, 30, false>(mf, n);
78+
}
79+
};
80+
// Full Specialization: clang and big uint pow and MontgomeryHalf.
4381
template <> struct tagged_montgomery_two_pow
44-
<TagMontyFullrange, Tag_montgomery_two_pow_clang, Tag_montgomery_two_pow_big>
82+
<TagMontyHalfrange, Tag_montgomery_two_pow_clang, Tag_montgomery_two_pow_big>
4583
{
4684
template <class MF, typename U> HURCHALLA_FORCE_INLINE
4785
static typename MF::MontgomeryValue call(const MF& mf, U n)
4886
{
49-
return impl_montgomery_two_pow::call<true, 0, 3, MF, U>(mf, n);
87+
return impl_montgomery_two_pow::call<MF, U, false, 0, 22, false>(mf, n);
5088
}
5189
template <class MF, typename U, std::size_t ARRAY_SIZE> HURCHALLA_FORCE_INLINE
5290
static std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
5391
call(const std::array<MF, ARRAY_SIZE>& mf, const std::array<U, ARRAY_SIZE>& n)
5492
{
55-
return impl_montgomery_two_pow::call<0, 0, MF, U, ARRAY_SIZE>(mf, n);
93+
return impl_montgomery_two_pow::call<MF, U, ARRAY_SIZE, 0, 31, false>(mf, n);
5694
}
5795
};
58-
// Partial specialization: clang and big uint pow.
59-
template <class MontyTag> struct tagged_montgomery_two_pow
60-
<MontyTag, Tag_montgomery_two_pow_clang, Tag_montgomery_two_pow_big>
96+
// Full Specialization: clang and big uint pow and MontgomeryQuarter.
97+
template <> struct tagged_montgomery_two_pow
98+
<TagMontyQuarterrange, Tag_montgomery_two_pow_clang, Tag_montgomery_two_pow_big>
6199
{
62100
template <class MF, typename U> HURCHALLA_FORCE_INLINE
63101
static typename MF::MontgomeryValue call(const MF& mf, U n)
64102
{
65-
return impl_montgomery_two_pow::call<true, 0, 3, MF, U>(mf, n);
103+
return impl_montgomery_two_pow::call<MF, U, false, 0, 22, false>(mf, n);
66104
}
67105
template <class MF, typename U, std::size_t ARRAY_SIZE> HURCHALLA_FORCE_INLINE
68106
static std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
69107
call(const std::array<MF, ARRAY_SIZE>& mf, const std::array<U, ARRAY_SIZE>& n)
70108
{
71-
return impl_montgomery_two_pow::call<0, 2, MF, U, ARRAY_SIZE>(mf, n);
109+
return impl_montgomery_two_pow::call<MF, U, ARRAY_SIZE, 0, 30, false>(mf, n);
72110
}
73111
};
74-
// Partial specialization: clang and small uint pow.
112+
113+
114+
// Partial specialization: gcc and big uint pow.
115+
// Intended for MontgomeryFull, but catches all non-specialized monty types
75116
template <class MontyTag> struct tagged_montgomery_two_pow
76-
<MontyTag, Tag_montgomery_two_pow_clang, Tag_montgomery_two_pow_small>
117+
<MontyTag, Tag_montgomery_two_pow_gcc, Tag_montgomery_two_pow_big>
118+
{
119+
template <class MF, typename U> HURCHALLA_FORCE_INLINE
120+
static typename MF::MontgomeryValue call(const MF& mf, U n)
121+
{
122+
return impl_montgomery_two_pow::call<MF, U, false, 0, 33, true>(mf, n);
123+
}
124+
template <class MF, typename U, std::size_t ARRAY_SIZE> HURCHALLA_FORCE_INLINE
125+
static std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
126+
call(const std::array<MF, ARRAY_SIZE>& mf, const std::array<U, ARRAY_SIZE>& n)
127+
{
128+
return impl_montgomery_two_pow::call<MF, U, ARRAY_SIZE, 0, 30, false>(mf, n);
129+
}
130+
};
131+
// Full Specialization: gcc and big uint pow and MontgomeryHalf.
132+
template <> struct tagged_montgomery_two_pow
133+
<TagMontyHalfrange, Tag_montgomery_two_pow_gcc, Tag_montgomery_two_pow_big>
134+
{
135+
template <class MF, typename U> HURCHALLA_FORCE_INLINE
136+
static typename MF::MontgomeryValue call(const MF& mf, U n)
137+
{
138+
return impl_montgomery_two_pow::call<MF, U, false, 0, 33, false>(mf, n);
139+
}
140+
template <class MF, typename U, std::size_t ARRAY_SIZE> HURCHALLA_FORCE_INLINE
141+
static std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
142+
call(const std::array<MF, ARRAY_SIZE>& mf, const std::array<U, ARRAY_SIZE>& n)
143+
{
144+
return impl_montgomery_two_pow::call<MF, U, ARRAY_SIZE, 0, 31, false>(mf, n);
145+
}
146+
};
147+
// Full Specialization: gcc and big uint pow and MontgomeryQuarter.
148+
template <> struct tagged_montgomery_two_pow
149+
<TagMontyQuarterrange, Tag_montgomery_two_pow_gcc, Tag_montgomery_two_pow_big>
77150
{
78151
template <class MF, typename U> HURCHALLA_FORCE_INLINE
79152
static typename MF::MontgomeryValue call(const MF& mf, U n)
80153
{
81-
return impl_montgomery_two_pow::call<true, 0, 1, MF, U>(mf, n);
154+
return impl_montgomery_two_pow::call<MF, U, false, 0, 22, false>(mf, n);
82155
}
83156
template <class MF, typename U, std::size_t ARRAY_SIZE> HURCHALLA_FORCE_INLINE
84157
static std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
85158
call(const std::array<MF, ARRAY_SIZE>& mf, const std::array<U, ARRAY_SIZE>& n)
86159
{
87-
return impl_montgomery_two_pow::call<0, 0, MF, U, ARRAY_SIZE>(mf, n);
160+
return impl_montgomery_two_pow::call<MF, U, ARRAY_SIZE, 0, 31, false>(mf, n);
88161
}
89162
};
90163

91164

92-
// Full specialization: gcc and big uint pow and MontgomeryQuarter.
165+
166+
// Partial Specialization: clang and small uint pow.
167+
// Intended for MontgomeryFull, but catches all non-specialized monty types
168+
template <class MontyTag> struct tagged_montgomery_two_pow
169+
<MontyTag, Tag_montgomery_two_pow_clang, Tag_montgomery_two_pow_small>
170+
{
171+
template <class MF, typename U> HURCHALLA_FORCE_INLINE
172+
static typename MF::MontgomeryValue call(const MF& mf, U n)
173+
{
174+
return impl_montgomery_two_pow::call<MF, U, false, 0, 33, true>(mf, n);
175+
}
176+
template <class MF, typename U, std::size_t ARRAY_SIZE> HURCHALLA_FORCE_INLINE
177+
static std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
178+
call(const std::array<MF, ARRAY_SIZE>& mf, const std::array<U, ARRAY_SIZE>& n)
179+
{
180+
return impl_montgomery_two_pow::call<MF, U, ARRAY_SIZE, 0, 31, false>(mf, n);
181+
}
182+
};
183+
// Full Specialization: clang and small uint pow and MontgomeryHalf.
93184
template <> struct tagged_montgomery_two_pow
94-
<TagMontyQuarterrange, Tag_montgomery_two_pow_gcc, Tag_montgomery_two_pow_big>
185+
<TagMontyHalfrange, Tag_montgomery_two_pow_clang, Tag_montgomery_two_pow_small>
95186
{
96187
template <class MF, typename U> HURCHALLA_FORCE_INLINE
97188
static typename MF::MontgomeryValue call(const MF& mf, U n)
98189
{
99-
return impl_montgomery_two_pow::call<true, 0, 1, MF, U>(mf, n);
190+
return impl_montgomery_two_pow::call<MF, U, false, 0, 24, false>(mf, n);
100191
}
101192
template <class MF, typename U, std::size_t ARRAY_SIZE> HURCHALLA_FORCE_INLINE
102193
static std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
103194
call(const std::array<MF, ARRAY_SIZE>& mf, const std::array<U, ARRAY_SIZE>& n)
104195
{
105-
return impl_montgomery_two_pow::call<0, 0, MF, U, ARRAY_SIZE>(mf, n);
196+
return impl_montgomery_two_pow::call<MF, U, ARRAY_SIZE, 0, 29, false>(mf, n);
106197
}
107198
};
108-
// Partial specialization: gcc and big uint pow.
109-
template <class MontyTag> struct tagged_montgomery_two_pow
110-
<MontyTag, Tag_montgomery_two_pow_gcc, Tag_montgomery_two_pow_big>
199+
// Full Specialization: clang and small uint pow and MontgomeryQuarter.
200+
template <> struct tagged_montgomery_two_pow
201+
<TagMontyQuarterrange, Tag_montgomery_two_pow_clang, Tag_montgomery_two_pow_small>
111202
{
112203
template <class MF, typename U> HURCHALLA_FORCE_INLINE
113204
static typename MF::MontgomeryValue call(const MF& mf, U n)
114205
{
115-
return impl_montgomery_two_pow::call<false, 0, 2, MF, U>(mf, n);
206+
return impl_montgomery_two_pow::call<MF, U, false, 0, 24, false>(mf, n);
116207
}
117208
template <class MF, typename U, std::size_t ARRAY_SIZE> HURCHALLA_FORCE_INLINE
118209
static std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
119210
call(const std::array<MF, ARRAY_SIZE>& mf, const std::array<U, ARRAY_SIZE>& n)
120211
{
121-
return impl_montgomery_two_pow::call<0, 0, MF, U, ARRAY_SIZE>(mf, n);
212+
return impl_montgomery_two_pow::call<MF, U, ARRAY_SIZE, 0, 31, false>(mf, n);
122213
}
123214
};
124-
// Partial specialization: gcc and small uint pow.
215+
216+
217+
// Partial Specialization: gcc and small uint pow and MontgomeryFull.
218+
// Intended for MontgomeryFull, but catches all non-specialized monty types
125219
template <class MontyTag> struct tagged_montgomery_two_pow
126-
<MontyTag, Tag_montgomery_two_pow_gcc, Tag_montgomery_two_pow_small>
220+
<MontyTag, Tag_montgomery_two_pow_gcc, Tag_montgomery_two_pow_small>
127221
{
128222
template <class MF, typename U> HURCHALLA_FORCE_INLINE
129223
static typename MF::MontgomeryValue call(const MF& mf, U n)
130224
{
131-
return impl_montgomery_two_pow::call<true, 0, 3, MF, U>(mf, n);
225+
return impl_montgomery_two_pow::call<MF, U, false, 0, 23, true>(mf, n);
132226
}
133227
template <class MF, typename U, std::size_t ARRAY_SIZE> HURCHALLA_FORCE_INLINE
134228
static std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
135229
call(const std::array<MF, ARRAY_SIZE>& mf, const std::array<U, ARRAY_SIZE>& n)
136230
{
137-
return impl_montgomery_two_pow::call<0, 0, MF, U, ARRAY_SIZE>(mf, n);
231+
return impl_montgomery_two_pow::call<MF, U, ARRAY_SIZE, 0, 31, false>(mf, n);
138232
}
139233
};
234+
// Full Specialization: gcc and small uint pow and MontgomeryHalf.
235+
template <> struct tagged_montgomery_two_pow
236+
<TagMontyHalfrange, Tag_montgomery_two_pow_gcc, Tag_montgomery_two_pow_small>
237+
{
238+
template <class MF, typename U> HURCHALLA_FORCE_INLINE
239+
static typename MF::MontgomeryValue call(const MF& mf, U n)
240+
{
241+
return impl_montgomery_two_pow::call<MF, U, false, 0, 24, false>(mf, n);
242+
}
243+
template <class MF, typename U, std::size_t ARRAY_SIZE> HURCHALLA_FORCE_INLINE
244+
static std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
245+
call(const std::array<MF, ARRAY_SIZE>& mf, const std::array<U, ARRAY_SIZE>& n)
246+
{
247+
return impl_montgomery_two_pow::call<MF, U, ARRAY_SIZE, 0, 28, false>(mf, n);
248+
}
249+
};
250+
// Full Specialization: gcc and small uint pow and MontgomeryQuarter.
251+
template <> struct tagged_montgomery_two_pow
252+
<TagMontyQuarterrange, Tag_montgomery_two_pow_gcc, Tag_montgomery_two_pow_small>
253+
{
254+
template <class MF, typename U> HURCHALLA_FORCE_INLINE
255+
static typename MF::MontgomeryValue call(const MF& mf, U n)
256+
{
257+
return impl_montgomery_two_pow::call<MF, U, false, 0, 24, false>(mf, n);
258+
}
259+
template <class MF, typename U, std::size_t ARRAY_SIZE> HURCHALLA_FORCE_INLINE
260+
static std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
261+
call(const std::array<MF, ARRAY_SIZE>& mf, const std::array<U, ARRAY_SIZE>& n)
262+
{
263+
return impl_montgomery_two_pow::call<MF, U, ARRAY_SIZE, 0, 29, false>(mf, n);
264+
}
265+
};
266+
140267

141268

142269

@@ -145,6 +272,7 @@ struct montgomery_two_pow {
145272
// Calculate pow(2, n), modulo the modulus of mf, and return the result in
146273
// montgomeryform representation.
147274
template <class MF, typename T>
275+
HURCHALLA_FORCE_INLINE
148276
static typename MF::MontgomeryValue call(const MF& mf, T nt)
149277
{
150278
HPBC_CLOCKWORK_PRECONDITION(nt >= 0);
@@ -169,8 +297,8 @@ struct montgomery_two_pow {
169297

170298
// Helper function - delegated Array version of montgomery two pow
171299
template <class MF, typename U, std::size_t ARRAY_SIZE>
172-
static std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
173300
HURCHALLA_FORCE_INLINE
301+
static std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
174302
helper(const std::array<MF,ARRAY_SIZE>& mf, const std::array<U,ARRAY_SIZE>& n)
175303
{
176304
static_assert(hurchalla::ut_numeric_limits<U>::is_integer, "");
@@ -194,18 +322,18 @@ struct montgomery_two_pow {
194322

195323
// Array version of montgomery two pow, for unsigned T
196324
template <class MF, typename T, std::size_t ARRAY_SIZE>
197-
static
325+
HURCHALLA_FORCE_INLINE static
198326
typename std::enable_if<!(hurchalla::ut_numeric_limits<T>::is_signed),
199327
std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
200328
>::type
201329
call(const std::array<MF,ARRAY_SIZE>& mf, const std::array<T,ARRAY_SIZE>& nt)
202330
{
203-
return helper<MF, T, ARRAY_SIZE>(mf, nt);
331+
return helper(mf, nt);
204332
}
205333

206334
// Array version of montgomery two pow, for signed T
207335
template <class MF, typename T, std::size_t ARRAY_SIZE>
208-
static
336+
HURCHALLA_FORCE_INLINE static
209337
typename std::enable_if<(hurchalla::ut_numeric_limits<T>::is_signed),
210338
std::array<typename MF::MontgomeryValue, ARRAY_SIZE>
211339
>::type
@@ -217,7 +345,7 @@ struct montgomery_two_pow {
217345
HPBC_CLOCKWORK_PRECONDITION(nt[i] >= 0);
218346
n[i] = static_cast<U>(nt[i]);
219347
}
220-
return helper<MF, U, ARRAY_SIZE>(mf, n);
348+
return helper(mf, n);
221349
}
222350
};
223351

0 commit comments

Comments
 (0)