@@ -28,6 +28,54 @@ namespace {
2828 c1 = 0 ; \
2929}
3030
31+ #if defined(__amd64__) || defined(__x86_64__)
32+
33+ /* * [c0,c1] = a * b */
34+ #define mul (c0,c1,a,b ) { \
35+ __asm__ (" mulq %3" : " =d" (c1), " =a" (c0) : " a" (a), " g" (b) : " cc" ); \
36+ }
37+
38+ /* * [c0,c1,c2] += a * b */
39+ #define muladd3 (c0,c1,c2,a,b ) { \
40+ uint64_t tl, th; \
41+ __asm__ (" mulq %3" : " =a" (tl), " =d" (th) : " a" (a), " g" (b) : " cc" ); \
42+ __asm__ (" addq %3,%0; adcq %4,%1; adcq $0,%2" : " +r" (c0), " +r" (c1), " +r" (c2) : " a" (tl), " d" (th) : " cc" ); \
43+ }
44+
45+ /* * [c0,c1,c2] += 2 * a * b */
46+ #define muldbladd3 (c0,c1,c2,a,b ) { \
47+ uint64_t tl, th; \
48+ __asm__ (" mulq %3" : " =a" (tl), " =d" (th) : " a" (a), " g" (b) : " cc" ); \
49+ __asm__ (" addq %3,%0; adcq %4,%1; adcq $0,%2" : " +r" (c0), " +r" (c1), " +r" (c2) : " a" (tl), " d" (th) : " cc" ); \
50+ __asm__ (" addq %3,%0; adcq %4,%1; adcq $0,%2" : " +r" (c0), " +r" (c1), " +r" (c2) : " a" (tl), " d" (th) : " cc" ); \
51+ }
52+
53+ /* [c0,c1,c2] += n * [d0,d1,d2]. c0 is initially 0 */
54+ #define mulnadd3 (c0,c1,c2,d0,d1,d2,n ) { \
55+ uint64_t tl1, th1, tl2, th2, tl3; \
56+ __asm__ (" mulq %3" : " =a" (tl1), " =d" (th1) : " a" (d0), " r" ((Num3072::limb_type)n) : " cc" ); \
57+ __asm__ (" addq %3,%0; adcq %4,%1; adcq $0,%2" : " +r" (c0), " +r" (c1), " +r" (c2) : " g" (tl1), " g" (th1) : " cc" ); \
58+ __asm__ (" mulq %3" : " =a" (tl2), " =d" (th2) : " a" (d1), " r" ((Num3072::limb_type)n) : " cc" ); \
59+ __asm__ (" addq %2,%0; adcq %3,%1" : " +r" (c1), " +r" (c2) : " g" (tl2), " g" (th2) : " cc" ); \
60+ __asm__ (" imulq %2,%1,%0" : " =r" (tl3) : " g" (d2), " i" (n) : " cc" ); \
61+ __asm__ (" addq %1,%0" : " +r" (c2) : " g" (tl3) : " cc" ); \
62+ }
63+
64+ /* [c0,c1] *= n */
65+ #define muln2 (c0,c1,n ) { \
66+ uint64_t th; \
67+ __asm__ (" mulq %2" : " +a" (c0), " =d" (th) : " r" ((Num3072::limb_type)n) : " cc" ); \
68+ __asm__ (" imul %1,%0,%0" : " +r" (c1) : " i" (n) : " cc" ); \
69+ __asm__ (" addq %1,%0" : " +r" (c1) : " g" (th) : " cc" ); \
70+ }
71+
72+ /* * [c0,c1] += a */
73+ #define add2 (c0,c1,a ) { \
74+ __asm__ (" add %2,%0; adc $0,%1" : " +r" (c0), " +r" (c1) : " r" (a) : " cc" ); \
75+ }
76+
77+ #else
78+
3179/* * [c0,c1] = a * b */
3280#define mul (c0,c1,a,b ) { \
3381 Num3072::double_limb_type t = (Num3072::double_limb_type)a * b; \
@@ -95,6 +143,8 @@ namespace {
95143 c1 += (c0 < (a)) ? 1 : 0 ; \
96144}
97145
146+ #endif
147+
98148bool IsOverflow (const Num3072* d)
99149{
100150 for (int i = 1 ; i < Num3072::LIMBS - 1 ; ++i) {
0 commit comments