@@ -94,11 +94,13 @@ see https://www.gnu.org/licenses/. */
94
94
95
95
#define gmp_clz (count , x ) do { \
96
96
mp_limb_t __clz_x = (x); \
97
- unsigned __clz_c; \
98
- for (__clz_c = 0; \
99
- (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \
100
- __clz_c += 8) \
101
- __clz_x <<= 8; \
97
+ unsigned __clz_c = 0; \
98
+ int LOCAL_SHIFT_BITS = 8; \
99
+ if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS) \
100
+ for (; \
101
+ (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \
102
+ __clz_c += 8) \
103
+ { __clz_x <<= LOCAL_SHIFT_BITS; } \
102
104
for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++) \
103
105
__clz_x <<= 1; \
104
106
(count) = __clz_c; \
@@ -143,27 +145,27 @@ see https://www.gnu.org/licenses/. */
143
145
w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \
144
146
} \
145
147
else { \
146
- mp_limb_t __x0, __x1, __x2, __x3; \
147
- unsigned __ul, __vl, __uh, __vh; \
148
- mp_limb_t __u = (u), __v = (v); \
148
+ mp_limb_t __x0, __x1, __x2, __x3; \
149
+ unsigned __ul, __vl, __uh, __vh; \
150
+ mp_limb_t __u = (u), __v = (v); \
149
151
\
150
- __ul = __u & GMP_LLIMB_MASK; \
151
- __uh = __u >> (GMP_LIMB_BITS / 2); \
152
- __vl = __v & GMP_LLIMB_MASK; \
153
- __vh = __v >> (GMP_LIMB_BITS / 2); \
152
+ __ul = __u & GMP_LLIMB_MASK; \
153
+ __uh = __u >> (GMP_LIMB_BITS / 2); \
154
+ __vl = __v & GMP_LLIMB_MASK; \
155
+ __vh = __v >> (GMP_LIMB_BITS / 2); \
154
156
\
155
- __x0 = (mp_limb_t) __ul * __vl; \
156
- __x1 = (mp_limb_t) __ul * __vh; \
157
- __x2 = (mp_limb_t) __uh * __vl; \
158
- __x3 = (mp_limb_t) __uh * __vh; \
157
+ __x0 = (mp_limb_t) __ul * __vl; \
158
+ __x1 = (mp_limb_t) __ul * __vh; \
159
+ __x2 = (mp_limb_t) __uh * __vl; \
160
+ __x3 = (mp_limb_t) __uh * __vh; \
159
161
\
160
- __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */ \
161
- __x1 += __x2 ; /* but this indeed can */ \
162
- if (__x1 < __x2 ) /* did we get it? */ \
163
- __x3 += GMP_HLIMB_BIT ; /* yes, add it in the proper pos. */ \
162
+ __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */ \
163
+ __x1 += __x2 ; /* but this indeed can */ \
164
+ if (__x1 < __x2 ) /* did we get it? */ \
165
+ __x3 += GMP_HLIMB_BIT ; /* yes, add it in the proper pos. */ \
164
166
\
165
- (w1 ) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2 )); \
166
- (w0 ) = (__x1 << (GMP_LIMB_BITS / 2 )) + (__x0 & GMP_LLIMB_MASK ); \
167
+ (w1 ) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2 )); \
168
+ (w0 ) = (__x1 << (GMP_LIMB_BITS / 2 )) + (__x0 & GMP_LLIMB_MASK ); \
167
169
} \
168
170
} while (0 )
169
171
@@ -768,91 +770,81 @@ mpn_neg (mp_ptr rp, mp_srcptr up, mp_size_t n)
768
770
mp_limb_t
769
771
mpn_invert_3by2 (mp_limb_t u1 , mp_limb_t u0 )
770
772
{
771
- int GMP_LIMB_BITS_MUL_3 = GMP_LIMB_BITS * 3 ;
772
- if (sizeof (unsigned ) * CHAR_BIT > GMP_LIMB_BITS * 3 )
773
- {
774
- return (((unsigned ) 1 << GMP_LIMB_BITS_MUL_3 ) - 1 ) /
775
- (((unsigned ) u1 << GMP_LIMB_BITS_MUL_3 / 3 ) + u0 );
776
- }
777
- else if (GMP_ULONG_BITS > GMP_LIMB_BITS * 3 )
778
- {
779
- return (((unsigned long ) 1 << GMP_LIMB_BITS_MUL_3 ) - 1 ) /
780
- (((unsigned long ) u1 << GMP_LIMB_BITS_MUL_3 / 3 ) + u0 );
781
- }
782
- else {
783
- mp_limb_t r , p , m , ql ;
784
- unsigned ul , uh , qh ;
773
+ mp_limb_t r , m ;
785
774
786
- assert (u1 >= GMP_LIMB_HIGHBIT );
775
+ {
776
+ mp_limb_t p , ql ;
777
+ unsigned ul , uh , qh ;
787
778
788
- /* For notation, let b denote the half-limb base, so that B = b^2.
789
- Split u1 = b uh + ul. */
790
- ul = u1 & GMP_LLIMB_MASK ;
791
- uh = u1 >> (GMP_LIMB_BITS / 2 );
779
+ /* For notation, let b denote the half-limb base, so that B = b^2.
780
+ Split u1 = b uh + ul. */
781
+ ul = u1 & GMP_LLIMB_MASK ;
782
+ uh = u1 >> (GMP_LIMB_BITS / 2 );
792
783
793
- /* Approximation of the high half of quotient. Differs from the 2/1
794
- inverse of the half limb uh, since we have already subtracted
795
- u0. */
796
- qh = ~ u1 / uh ;
784
+ /* Approximation of the high half of quotient. Differs from the 2/1
785
+ inverse of the half limb uh, since we have already subtracted
786
+ u0. */
787
+ qh = ( u1 ^ GMP_LIMB_MAX ) / uh ;
797
788
798
- /* Adjust to get a half-limb 3/2 inverse, i.e., we want
789
+ /* Adjust to get a half-limb 3/2 inverse, i.e., we want
799
790
800
- qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u
801
- = floor( (b (~u) + b-1) / u),
791
+ qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u
792
+ = floor( (b (~u) + b-1) / u),
802
793
803
- and the remainder
794
+ and the remainder
804
795
805
- r = b (~u) + b-1 - qh (b uh + ul)
796
+ r = b (~u) + b-1 - qh (b uh + ul)
806
797
= b (~u - qh uh) + b-1 - qh ul
807
798
808
- Subtraction of qh ul may underflow, which implies adjustments.
809
- But by normalization, 2 u >= B > qh ul, so we need to adjust by
810
- at most 2.
811
- */
799
+ Subtraction of qh ul may underflow, which implies adjustments.
800
+ But by normalization, 2 u >= B > qh ul, so we need to adjust by
801
+ at most 2.
802
+ */
812
803
813
- r = ((~u1 - (mp_limb_t ) qh * uh ) << (GMP_LIMB_BITS / 2 )) | GMP_LLIMB_MASK ;
804
+ r = ((~u1 - (mp_limb_t ) qh * uh ) << (GMP_LIMB_BITS / 2 )) | GMP_LLIMB_MASK ;
814
805
815
- p = (mp_limb_t ) qh * ul ;
816
- /* Adjustment steps taken from udiv_qrnnd_c */
817
- if (r < p )
818
- {
819
- qh -- ;
820
- r += u1 ;
821
- if (r >= u1 ) /* i.e. we didn't get carry when adding to r */
822
- if (r < p )
823
- {
824
- qh -- ;
825
- r += u1 ;
826
- }
827
- }
828
- r -= p ;
806
+ p = (mp_limb_t ) qh * ul ;
807
+ /* Adjustment steps taken from udiv_qrnnd_c */
808
+ if (r < p )
809
+ {
810
+ qh -- ;
811
+ r += u1 ;
812
+ if (r >= u1 ) /* i.e. we didn't get carry when adding to r */
813
+ if (r < p )
814
+ {
815
+ qh -- ;
816
+ r += u1 ;
817
+ }
818
+ }
819
+ r -= p ;
829
820
830
- /* Low half of the quotient is
821
+ /* Low half of the quotient is
831
822
832
823
ql = floor ( (b r + b-1) / u1).
833
824
834
- This is a 3/2 division (on half-limbs), for which qh is a
835
- suitable inverse. */
825
+ This is a 3/2 division (on half-limbs), for which qh is a
826
+ suitable inverse. */
836
827
837
- p = (r >> (GMP_LIMB_BITS / 2 )) * qh + r ;
838
- /* Unlike full-limb 3/2, we can add 1 without overflow. For this to
839
- work, it is essential that ql is a full mp_limb_t. */
840
- ql = (p >> (GMP_LIMB_BITS / 2 )) + 1 ;
828
+ p = (r >> (GMP_LIMB_BITS / 2 )) * qh + r ;
829
+ /* Unlike full-limb 3/2, we can add 1 without overflow. For this to
830
+ work, it is essential that ql is a full mp_limb_t. */
831
+ ql = (p >> (GMP_LIMB_BITS / 2 )) + 1 ;
841
832
842
- /* By the 3/2 trick, we don't need the high half limb. */
843
- r = (r << (GMP_LIMB_BITS / 2 )) + GMP_LLIMB_MASK - ql * u1 ;
833
+ /* By the 3/2 trick, we don't need the high half limb. */
834
+ r = (r << (GMP_LIMB_BITS / 2 )) + GMP_LLIMB_MASK - ql * u1 ;
844
835
845
- if (r >= (p << (GMP_LIMB_BITS / 2 )))
846
- {
847
- ql -- ;
848
- r += u1 ;
849
- }
850
- m = ((mp_limb_t ) qh << (GMP_LIMB_BITS / 2 )) + ql ;
851
- if (r >= u1 )
852
- {
853
- m ++ ;
854
- r -= u1 ;
855
- }
836
+ if (r >= (GMP_LIMB_MAX & (p << (GMP_LIMB_BITS / 2 ))))
837
+ {
838
+ ql -- ;
839
+ r += u1 ;
840
+ }
841
+ m = ((mp_limb_t ) qh << (GMP_LIMB_BITS / 2 )) + ql ;
842
+ if (r >= u1 )
843
+ {
844
+ m ++ ;
845
+ r -= u1 ;
846
+ }
847
+ }
856
848
857
849
/* Now m is the 2/1 inverse of u1. If u0 > 0, adjust it to become a
858
850
3/2 inverse. */
@@ -881,7 +873,6 @@ mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0)
881
873
}
882
874
883
875
return m ;
884
- }
885
876
}
886
877
887
878
struct gmp_div_inverse
@@ -3332,7 +3323,7 @@ mpz_bin_uiui (mpz_t r, unsigned long n, unsigned long k)
3332
3323
mpz_fac_ui (t , k );
3333
3324
3334
3325
for (; k > 0 ; -- k )
3335
- mpz_mul_ui (r , r , n -- );
3326
+ mpz_mul_ui (r , r , n -- );
3336
3327
3337
3328
mpz_divexact (r , r , t );
3338
3329
mpz_clear (t );
@@ -3427,7 +3418,7 @@ gmp_lucas_mod (mpz_t V, mpz_t Qk, long Q,
3427
3418
gmp_lucas_step_k_2k (V , Qk , n );
3428
3419
3429
3420
/* A step k->k+1 is performed if the bit in $n$ is 1 */
3430
- /* mpz_tstbit(n,bs) or the bit is 0 in $n$ but */
3421
+ /* mpz_tstbit(n,bs) or the the bit is 0 in $n$ but */
3431
3422
/* should be 1 in $n+1$ (bs == b0) */
3432
3423
if (b0 == bs || mpz_tstbit (n , bs ))
3433
3424
{
@@ -3990,13 +3981,18 @@ gmp_popcount_limb (mp_limb_t x)
3990
3981
unsigned c ;
3991
3982
3992
3983
/* Do 16 bits at a time, to avoid limb-sized constants. */
3993
- for (c = 0 ; x > 0 ; x >>= 16 )
3984
+ int LOCAL_SHIFT_BITS = 16 ;
3985
+ for (c = 0 ; x > 0 ;)
3994
3986
{
3995
3987
unsigned w = x - ((x >> 1 ) & 0x5555 );
3996
3988
w = ((w >> 2 ) & 0x3333 ) + (w & 0x3333 );
3997
3989
w = (w >> 4 ) + w ;
3998
3990
w = ((w >> 8 ) & 0x000f ) + (w & 0x000f );
3999
3991
c += w ;
3992
+ if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS )
3993
+ x >>= LOCAL_SHIFT_BITS ;
3994
+ else
3995
+ x = 0 ;
4000
3996
}
4001
3997
return c ;
4002
3998
}
@@ -4492,7 +4488,7 @@ mpz_export (void *r, size_t *countp, int order, size_t size, int endian,
4492
4488
ptrdiff_t word_step ;
4493
4489
/* The current (partial) limb. */
4494
4490
mp_limb_t limb ;
4495
- /* The number of bytes left to do in this limb. */
4491
+ /* The number of bytes left to to in this limb. */
4496
4492
size_t bytes ;
4497
4493
/* The index where the limb was read. */
4498
4494
mp_size_t i ;
@@ -4503,10 +4499,15 @@ mpz_export (void *r, size_t *countp, int order, size_t size, int endian,
4503
4499
limb = u -> _mp_d [un - 1 ];
4504
4500
assert (limb != 0 );
4505
4501
4506
- k = 0 ;
4507
- do {
4508
- k ++ ; limb >>= CHAR_BIT ;
4509
- } while (limb != 0 );
4502
+ k = (GMP_LIMB_BITS <= CHAR_BIT );
4503
+ if (!k )
4504
+ {
4505
+ do {
4506
+ int LOCAL_CHAR_BIT = CHAR_BIT ;
4507
+ k ++ ; limb >>= LOCAL_CHAR_BIT ;
4508
+ } while (limb != 0 );
4509
+ }
4510
+ /* else limb = 0; */
4510
4511
4511
4512
count = (k + (un - 1 ) * sizeof (mp_limb_t ) + size - 1 ) / size ;
4512
4513
@@ -4535,17 +4536,28 @@ mpz_export (void *r, size_t *countp, int order, size_t size, int endian,
4535
4536
for (bytes = 0 , i = 0 , k = 0 ; k < count ; k ++ , p += word_step )
4536
4537
{
4537
4538
size_t j ;
4538
- for (j = 0 ; j < size ; j ++ , p -= (ptrdiff_t ) endian )
4539
+ for (j = 0 ; j < size ; ++ j , p -= (ptrdiff_t ) endian )
4539
4540
{
4540
- if (bytes == 0 )
4541
+ if (sizeof ( mp_limb_t ) == 1 )
4541
4542
{
4542
4543
if (i < un )
4543
- limb = u -> _mp_d [i ++ ];
4544
- bytes = sizeof (mp_limb_t );
4544
+ * p = u -> _mp_d [i ++ ];
4545
+ else
4546
+ * p = 0 ;
4547
+ }
4548
+ else
4549
+ {
4550
+ int LOCAL_CHAR_BIT = CHAR_BIT ;
4551
+ if (bytes == 0 )
4552
+ {
4553
+ if (i < un )
4554
+ limb = u -> _mp_d [i ++ ];
4555
+ bytes = sizeof (mp_limb_t );
4556
+ }
4557
+ * p = limb ;
4558
+ limb >>= LOCAL_CHAR_BIT ;
4559
+ bytes -- ;
4545
4560
}
4546
- * p = limb ;
4547
- limb >>= CHAR_BIT ;
4548
- bytes -- ;
4549
4561
}
4550
4562
}
4551
4563
assert (i == un );
0 commit comments