Skip to content

Commit 37126f8

Browse files
committed
add first portion of x64 inline asm to 128bit REDC
1 parent 41bd907 commit 37126f8

File tree

1 file changed

+47
-5
lines changed
  • montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/low_level_api/detail/platform_specific

1 file changed

+47
-5
lines changed

montgomery_arithmetic/include/hurchalla/montgomery_arithmetic/low_level_api/detail/platform_specific/ImplRedc.h

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -262,11 +262,6 @@ struct RedcIncomplete {
262262
defined(HURCHALLA_ALLOW_INLINE_ASM_REDC)) && \
263263
defined(HURCHALLA_TARGET_ISA_ARM_64) && !defined(_MSC_VER)
264264

265-
TH u2 = static_cast<TH>(u_hi);
266-
TH u3 = static_cast<TH>(u_hi >> HALF_BITS);
267-
(void)u2; // avoid warning when a #if section doesn't use u2
268-
(void)u3; // avoid warning when a #if section doesn't use u3
269-
270265
# if 0
271266
// this #if section corresponds to the first #if section of C++ code
272267
// lower in this function.
@@ -277,6 +272,9 @@ struct RedcIncomplete {
277272
// the #else section) to make a potential negative value positive during
278273
// calculations (done by the instructions that conditionally set moz and then
279274
// add moz).
275+
276+
TH u2 = static_cast<TH>(u_hi);
277+
TH u3 = static_cast<TH>(u_hi >> HALF_BITS);
280278
TH m, tmp;
281279
__asm__ ("mul %[m], %[u0], %[invn0] \n\t"
282280
"umulh %[u0], %[m], %[n0] \n\t" /* u0 = mnA_1 */
@@ -344,6 +342,50 @@ struct RedcIncomplete {
344342
subtrahend = (static_cast<T>(tmp) << HALF_BITS) | u1;
345343
# endif
346344

345+
/*
346+
#elif (defined(HURCHALLA_ALLOW_INLINE_ASM_ALL) || \
347+
defined(HURCHALLA_ALLOW_INLINE_ASM_REDC)) && \
348+
defined(HURCHALLA_TARGET_ISA_X86_64) && !defined(_MSC_VER)
349+
*/
350+
#elif 0
351+
TH m = u0;
352+
TH rrax = n0;
353+
TH rrdx, tmp2;
354+
__asm__ ("imulq %[invn0], %[m] \n\t" /* mA = u0 * inv_n */
355+
"mulq %[m] \n\t" /* rdx:rax = mnA_10 = rax * mA (rax == n0); high-order bits of the product in rdx */
356+
"movq %%rdx, %[tmp2] \n\t" /* tmp2 = mnA_1 */
357+
"movq %[n1], %%rax \n\t"
358+
"mulq %[m] \n\t" /* rdx:rax = mnA_21 = n1 * mA */
359+
"xorl %k[m], %k[m] \n\t" /* m = 0 */
360+
"addq %%rax, %[tmp2] \n\t" /* tmp2 = mnA_1 = mnA_1_part2 + mnA_1 */
361+
"adcq %%rdx, %[m] \n\t" /* m = mnA_2 + carry */
362+
"subq %[tmp2], %[u1] \n\t" /* u1 = v1 = u1 - mnA_1 */
363+
"imulq %[u1], %[invn0] \n\t" /* invn0 = mB = v1 * invn0 */
364+
365+
"movq %[n0], %%rax \n\t"
366+
"mulq %[invn0] \n\t" /* rdx:rax = mnB_21 = n0 * mB */
367+
"movq %%rax, %[u1] \n\t" /* u1 = mnB_1 */
368+
"movq %%rdx, %[n0] \n\t" /* n0 = mnB_2 */
369+
370+
"movq %[n1], %%rax \n\t"
371+
"mulq %[invn0] \n\t" /* rdx:rax = mnB_32 = n1 * mB */
372+
"xorl %k[invn0], %k[invn0] \n\t" /* invn0 = 0 */
373+
"addq %%rax, %[n0] \n\t" /* n0 = mnB_2 = mnB_2_part2 + mnB_2 */
374+
"adcq %%rdx, %[invn0] \n\t" /* invn0 = mnB_3 = mnB_3 + carry */
375+
376+
"xorl %%eax, %%eax \n\t" /* rax = 0 */
377+
"addq %[u1], %[tmp2] \n\t" /* tmp2 = dummy = mnA_1 + mnB_1 */
378+
"adcq %[n0], %[m] \n\t" /* m = sum2 = mnB_2 + mnA_2 + carry */
379+
"adcq %%rax, %[invn0], hs \n\t" /* tmp = sum3 = mnB_3 += carry */
380+
: [m]"+&r"(m), [invn0]"+&r"(invn0),
381+
"+&a"(rrax), "=&d"(rrdx), [tmp2]"=&r"(tmp2), [n1]"+&r"(n1), [u1]"+&r"(u1),
382+
[n0]"+&r"(n0)
383+
:
384+
: "cc");
385+
minuend = u_hi;
386+
subtrahend = (static_cast<T>(tmp) << HALF_BITS) | u1;
387+
388+
347389
#else // not using inline-asm
348390

349391
TH mA = u0 * invn0;

0 commit comments

Comments
 (0)