@@ -549,7 +549,50 @@ struct RedcIncomplete {
549549
550550 T t_hi = (static_cast <T>(tmp) << HALF_BITS) | u1;
551551
552+ #elif (defined(HURCHALLA_ALLOW_INLINE_ASM_ALL) || \
553+ defined (HURCHALLA_ALLOW_INLINE_ASM_REDC)) && \
554+ defined (HURCHALLA_TARGET_ISA_X86_64) && !defined (_MSC_VER)
555+
556+ TH u2 = static_cast <TH>(u_hi);
557+ TH u3 = static_cast <TH>(u_hi >> HALF_BITS);
558+
559+ TH tmp = u0;
560+ TH rrax = n0;
561+ TH rrdx;
562+ __asm__ (" imulq %[invn0], %[tmp] \n\t " /* tmp = mA = u0 * inv_n */
563+ " mulq %[tmp] \n\t " /* rdx:rax = mnA_10 = rax * mA (rax == n0); high-order bits of the product in rdx */
564+ " movq %[tmp], %%rax \n\t " /* rax = mA */
565+ " movq %%rdx, %[tmp] \n\t " /* tmp = mnA_1 */
566+ " mulq %[n1] \n\t " /* rdx:rax = mnA_21 = n1 * mA */
567+ " addq %%rax, %[tmp] \n\t " /* tmp = mnA_1 += mnA_1_part2 */
568+
569+ " movq %[n0], %%rax \n\t " /* rax = n0_original */
570+
571+ " adcq $0, %%rdx \n\t " /* mnA_2 += carry */
572+ " subq %[tmp], %[u1] \n\t " /* u1 = v1 = u1 - mnA_1 */
573+ " sbbq %%rdx, %[u2] \n\t " /* u2 = v2 = u2 - mnA_2 - borrow */
574+ " sbbq $0, %[u3] \n\t " /* u3 = v3 = u3 - borrow */
575+
576+ " imulq %[invn0], %[u1] \n\t " /* u1 = mB = v1 * invn0 */
577+
578+ " mulq %[u1] \n\t " /* rdx:rax = mnB_21 = n0_original * mB */
579+ " movq %[u1], %%rax \n\t " /* rax = mB */
580+ " movq %%rdx, %[u1] \n\t " /* invn0 = mnB_2 */
581+ " mulq %[n1] \n\t " /* rdx:rax = mnB_32 = n1 * mB */
582+ " addq %%rax, %[u1] \n\t " /* invn0 = mnB_2 += mnB_2_part2 */
583+ " adcq $0, %%rdx \n\t " /* rdx = mnB_3 += carry */
584+
585+ " subq %[u1], %[u2] \n\t " /* t2 = v2 - mnB_2 */
586+ " sbbq %%rdx, %[u3] \n\t " /* t3 = v3 - mnB_3 - borrow */
587+ : [invn0]" +&r" (invn0), " +&a" (rrax), " =&d" (rrdx), [tmp]" +&r" (tmp),
588+ [u1]" +&r" (u1), [u2]" +&r" (u2), [u3]" +&r" (u3)
589+ : [n0]" r" (n0), [n1]" r" (n1)
590+ : " cc" );
591+ T t_hi = (static_cast <T>(u3) << HALF_BITS) | u2;
592+
552593#else
594+ // no inline-asm
595+
553596 TH mA = u0 * invn0;
554597
555598 T mnA_10 = static_cast <T>(mA ) * n0;
0 commit comments