Skip to content

Commit ccdc256

Browse files
author
Jeffrey Hurchalla
committed
add lowlatency and lowuop template tags to modular_addition and modular_subtraction, and improve the experimental montgomery_pow_kary and montgomery_two_pow
1 parent 24e7e88 commit ccdc256

30 files changed

+1385
-404
lines changed

modular_arithmetic/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020-2022 Jeffrey Hurchalla.
1+
# Copyright (c) 2020-2025 Jeffrey Hurchalla.
22
# This Source Code Form is subject to the terms of the Mozilla Public
33
# License, v. 2.0. If a copy of the MPL was not distributed with this
44
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
@@ -47,6 +47,7 @@ target_sources(hurchalla_basic_modular_arithmetic INTERFACE
4747
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include/hurchalla/modular_arithmetic/modular_subtraction.h>
4848
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include/hurchalla/modular_arithmetic/detail/impl_modular_multiplicative_inverse.h>
4949
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include/hurchalla/modular_arithmetic/detail/impl_modular_pow.h>
50+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include/hurchalla/modular_arithmetic/detail/optimization_tag_structs.h>
5051
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include/hurchalla/modular_arithmetic/detail/platform_specific/impl_absolute_value_difference.h>
5152
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include/hurchalla/modular_arithmetic/detail/platform_specific/impl_modular_addition.h>
5253
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include/hurchalla/modular_arithmetic/detail/platform_specific/impl_modular_multiplication.h>

modular_arithmetic/include/hurchalla/modular_arithmetic/absolute_value_difference.h

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2020-2022 Jeffrey Hurchalla.
1+
// Copyright (c) 2020-2025 Jeffrey Hurchalla.
22
/*
33
* This Source Code Form is subject to the terms of the Mozilla Public
44
* License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -11,20 +11,16 @@
1111

1212
#include "hurchalla/modular_arithmetic/detail/platform_specific/impl_absolute_value_difference.h"
1313
#include "hurchalla/util/traits/ut_numeric_limits.h"
14+
#include "hurchalla/util/compiler_macros.h"
1415
#include "hurchalla/util/programming_by_contract.h"
1516

1617
namespace hurchalla {
1718

1819

19-
// Performance note:
20-
// On some systems, this function may perform better when T is signed than
21-
// when it is unsigned. Specifically, when HURCHALLA_AVOID_CSELECT is defined
22-
// (see hurchalla/util/compiler_macros.h) a signed type can perform better; if
23-
// it is not defined you should expect no performance difference between signed
24-
// and unsigned.
20+
// This function returns absolute_value(a-b), calculated as if 'a' and 'b' are
21+
// infinite precision signed integers. It requires a>=0 and b>=0.
2522

26-
27-
template <typename T>
23+
template <typename T> HURCHALLA_FORCE_INLINE
2824
T absolute_value_difference(T a, T b)
2925
{
3026
static_assert(ut_numeric_limits<T>::is_integer, "");
@@ -39,6 +35,15 @@ T absolute_value_difference(T a, T b)
3935
}
4036

4137

38+
// Performance note for RISC-V (and other uncommon CPU architectures that do not
39+
// have an instruction for conditional move or conditional select):
40+
// On this architecture, this function may perform better when T is signed
41+
// than when it is unsigned. Specifically, when HURCHALLA_AVOID_CSELECT is
42+
// defined (see hurchalla/util/compiler_macros.h), a signed type may perform
43+
// better; if it is not defined, you should expect no performance difference
44+
// between signed and unsigned.
45+
46+
4247
} // end namespace
4348

4449
#endif

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/low_level_api/optimization_tag_structs.h renamed to modular_arithmetic/include/hurchalla/modular_arithmetic/detail/optimization_tag_structs.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
// Copyright (c) 2020-2022 Jeffrey Hurchalla.
1+
// Copyright (c) 2020-2025 Jeffrey Hurchalla.
22
/*
33
* This Source Code Form is subject to the terms of the Mozilla Public
44
* License, v. 2.0. If a copy of the MPL was not distributed with this
55
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
66
*/
77

8-
#ifndef HURCHALLA_MONTGOMERY_ARITHMETIC_OPTIMIZATION_TAG_STRUCTS_H_INCLUDED
9-
#define HURCHALLA_MONTGOMERY_ARITHMETIC_OPTIMIZATION_TAG_STRUCTS_H_INCLUDED
8+
#ifndef HURCHALLA_MODULAR_ARITHMETIC_OPTIMIZATION_TAG_STRUCTS_H_INCLUDED
9+
#define HURCHALLA_MODULAR_ARITHMETIC_OPTIMIZATION_TAG_STRUCTS_H_INCLUDED
1010

1111

1212
namespace hurchalla {

modular_arithmetic/include/hurchalla/modular_arithmetic/detail/platform_specific/impl_absolute_value_difference.h

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2020-2022 Jeffrey Hurchalla.
1+
// Copyright (c) 2020-2025 Jeffrey Hurchalla.
22
/*
33
* This Source Code Form is subject to the terms of the Mozilla Public
44
* License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -19,7 +19,9 @@
1919
namespace hurchalla { namespace detail {
2020

2121

22-
// note: uses a static member function to disallow ADL.
22+
// Fyi: the purpose of having structs with static member functions is to
23+
// disallow ADL and to make specializations simple and easy.
24+
2325
struct default_impl_absdiff_unsigned {
2426
template <typename T>
2527
HURCHALLA_FORCE_INLINE static T call(T a, T b)
@@ -63,7 +65,11 @@ struct impl_absolute_value_difference_unsigned<std::uint32_t> {
6365
__asm__ ("subl %[b], %[tmp] \n\t" /* tmp = a - b */
6466
"cmovbl %[diff], %[tmp] \n\t" /* tmp = (a < b) ? diff : tmp */
6567
: [tmp]"+&r"(tmp)
68+
# if defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=20197 */
6669
: [b]"r"(b), [diff]"r"(diff)
70+
# else
71+
: [b]"rm"(b), [diff]"rm"(diff)
72+
# endif
6773
: "cc");
6874
uint32_t result = tmp;
6975

@@ -84,7 +90,11 @@ struct impl_absolute_value_difference_unsigned<std::uint64_t> {
8490
__asm__ ("subq %[b], %[tmp] \n\t" /* tmp = a - b */
8591
"cmovbq %[diff], %[tmp] \n\t" /* tmp = (a < b) ? diff : tmp */
8692
: [tmp]"+&r"(tmp)
93+
# if defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=20197 */
8794
: [b]"r"(b), [diff]"r"(diff)
95+
# else
96+
: [b]"rm"(b), [diff]"rm"(diff)
97+
# endif
8898
: "cc");
8999
uint64_t result = tmp;
90100

@@ -94,6 +104,41 @@ struct impl_absolute_value_difference_unsigned<std::uint64_t> {
94104
}
95105
};
96106

107+
#ifdef HURCHALLA_ENABLE_INLINE_ASM_128_BIT
108+
template <>
109+
struct impl_absolute_value_difference_unsigned<__uint128_t> {
110+
HURCHALLA_FORCE_INLINE
111+
static __uint128_t call(__uint128_t a, __uint128_t b)
112+
{
113+
using std::uint64_t;
114+
__uint128_t diff = static_cast<__uint128_t>(b - a);
115+
116+
uint64_t alo = static_cast<uint64_t>(a);
117+
uint64_t ahi = static_cast<uint64_t>(a >> 64);
118+
uint64_t difflo = static_cast<uint64_t>(diff);
119+
uint64_t diffhi = static_cast<uint64_t>(diff >> 64);
120+
uint64_t blo = static_cast<uint64_t>(b);
121+
uint64_t bhi = static_cast<uint64_t>(b >> 64);
122+
__asm__ ("subq %[blo], %[alo] \n\t" /* tmp = a - b */
123+
"sbbq %[bhi], %[ahi] \n\t"
124+
"cmovbq %[difflo], %[alo] \n\t" /* tmp = (a < b) ? diff : tmp */
125+
"cmovbq %[diffhi], %[ahi] \n\t"
126+
: [alo]"+&r"(alo), [ahi]"+&r"(ahi)
127+
# if defined(__clang__) /* https://bugs.llvm.org/show_bug.cgi?id=20197 */
128+
: [blo]"r"(blo), [bhi]"r"(bhi), [difflo]"r"(difflo), [diffhi]"r"(diffhi)
129+
# else
130+
: [blo]"rm"(blo), [bhi]"rm"(bhi), [difflo]"rm"(difflo), [diffhi]"rm"(diffhi)
131+
# endif
132+
: "cc");
133+
__uint128_t result = (static_cast<__uint128_t>(ahi) << 64) | alo;
134+
135+
HPBC_POSTCONDITION2(result<=a || result<=b);
136+
HPBC_POSTCONDITION2(result == default_impl_absdiff_unsigned::call(a, b));
137+
return result;
138+
}
139+
};
140+
#endif
141+
97142
#endif
98143

99144

0 commit comments

Comments
 (0)