@@ -17,7 +17,6 @@ const Endian = std.builtin.Endian;
17
17
const Signedness = std .builtin .Signedness ;
18
18
const native_endian = builtin .cpu .arch .endian ();
19
19
20
-
21
20
/// Returns the number of limbs needed to store `scalar`, which must be a
22
21
/// primitive integer value.
23
22
/// Note: A comptime-known upper bound of this value that may be used
@@ -210,7 +209,7 @@ pub const Mutable = struct {
210
209
for (self .limbs [0.. self .len ]) | limb | {
211
210
std .debug .print ("{x} " , .{limb });
212
211
}
213
- std .debug .print ("capacity={} positive={}\n " , .{ self .limbs .len , self .positive });
212
+ std .debug .print ("len={} capacity={} positive={}\n " , .{ self . len , self .limbs .len , self .positive });
214
213
}
215
214
216
215
/// Clones an Mutable and returns a new Mutable with the same value. The new Mutable is a deep copy and
@@ -1104,8 +1103,8 @@ pub const Mutable = struct {
1104
1103
/// Asserts there is enough memory to fit the result. The upper bound Limb count is
1105
1104
/// `a.limbs.len + (shift / (@sizeOf(Limb) * 8))`.
1106
1105
pub fn shiftLeft (r : * Mutable , a : Const , shift : usize ) void {
1107
- llshl (r .limbs , a .limbs , shift );
1108
- r .normalize (a . limbs . len + ( shift / limb_bits ) + 1 );
1106
+ const new_len = llshl (r .limbs , a .limbs , shift );
1107
+ r .normalize (new_len );
1109
1108
r .positive = a .positive ;
1110
1109
}
1111
1110
@@ -1173,16 +1172,16 @@ pub const Mutable = struct {
1173
1172
1174
1173
// This shift should not be able to overflow, so invoke llshl and normalize manually
1175
1174
// to avoid the extra required limb.
1176
- llshl (r .limbs , a .limbs , shift );
1177
- r .normalize (a . limbs . len + ( shift / limb_bits ) );
1175
+ const new_len = llshl (r .limbs , a .limbs , shift );
1176
+ r .normalize (new_len );
1178
1177
r .positive = a .positive ;
1179
1178
}
1180
1179
1181
1180
/// r = a >> shift
1182
1181
/// r and a may alias.
1183
1182
///
1184
1183
/// Asserts there is enough memory to fit the result. The upper bound Limb count is
1185
- /// `a.limbs.len - (shift / (@sizeOf (Limb) * 8 ))`.
1184
+ /// `a.limbs.len - (shift / (@bitSizeOf (Limb)))`.
1186
1185
pub fn shiftRight (r : * Mutable , a : Const , shift : usize ) void {
1187
1186
const full_limbs_shifted_out = shift / limb_bits ;
1188
1187
const remaining_bits_shifted_out = shift % limb_bits ;
@@ -1210,9 +1209,9 @@ pub const Mutable = struct {
1210
1209
break :nonzero a .limbs [full_limbs_shifted_out ] << not_covered != 0 ;
1211
1210
};
1212
1211
1213
- llshr (r .limbs , a .limbs , shift );
1212
+ const new_len = llshr (r .limbs , a .limbs , shift );
1214
1213
1215
- r .len = a . limbs . len - full_limbs_shifted_out ;
1214
+ r .len = new_len ;
1216
1215
r .positive = a .positive ;
1217
1216
if (nonzero_negative_shiftout ) r .addScalar (r .toConst (), -1 );
1218
1217
r .normalize (r .len );
@@ -1971,7 +1970,7 @@ pub const Const = struct {
1971
1970
for (self .limbs [0.. self .limbs .len ]) | limb | {
1972
1971
std .debug .print ("{x} " , .{limb });
1973
1972
}
1974
- std .debug .print ("positive={}\n " , .{self .positive });
1973
+ std .debug .print ("len={} positive={}\n " , .{ self .len , self . positive });
1975
1974
}
1976
1975
1977
1976
pub fn abs (self : Const ) Const {
@@ -2673,7 +2672,7 @@ pub const Managed = struct {
2673
2672
for (self .limbs [0.. self .len ()]) | limb | {
2674
2673
std .debug .print ("{x} " , .{limb });
2675
2674
}
2676
- std .debug .print ("capacity={} positive={}\n " , .{ self .limbs .len , self .isPositive () });
2675
+ std .debug .print ("len={} capacity={} positive={}\n " , .{ self . len (), self .limbs .len , self .isPositive () });
2677
2676
}
2678
2677
2679
2678
/// Negate the sign.
@@ -3711,68 +3710,114 @@ fn lldiv0p5(quo: []Limb, rem: *Limb, a: []const Limb, b: HalfLimb) void {
3711
3710
}
3712
3711
}
3713
3712
3714
- fn llshl (r : []Limb , a : []const Limb , shift : usize ) void {
3715
- @setRuntimeSafety (debug_safety );
3716
- assert (a .len >= 1 );
3713
+ /// Performs r = a << shift and returns the amount of limbs affected
3714
+ ///
3715
+ /// if a and r overlaps, then r.ptr >= a.ptr is asserted
3716
+ /// r must have the capacity to store a << shift
3717
+ fn llshl (r : []Limb , a : []const Limb , shift : usize ) usize {
3718
+ std .debug .assert (a .len >= 1 );
3719
+ if (slicesOverlap (a , r ))
3720
+ std .debug .assert (@intFromPtr (r .ptr ) >= @intFromPtr (a .ptr ));
3721
+
3722
+ if (shift == 0 ) {
3723
+ if (a .ptr != r .ptr )
3724
+ std .mem .copyBackwards (Limb , r [0.. a .len ], a );
3725
+ return a .len ;
3726
+ }
3727
+ if (shift >= limb_bits ) {
3728
+ const limb_shift = shift / limb_bits ;
3729
+
3730
+ const affected = llshl (r [limb_shift .. ], a , shift % limb_bits );
3731
+ @memset (r [0.. limb_shift ], 0 );
3732
+
3733
+ return limb_shift + affected ;
3734
+ }
3717
3735
3718
- const interior_limb_shift = @as (Log2Limb , @truncate (shift ));
3736
+ // shift is guaranteed to be < limb_bits
3737
+ const bit_shift : Log2Limb = @truncate (shift );
3738
+ const opposite_bit_shift : Log2Limb = @truncate (limb_bits - bit_shift );
3719
3739
3720
3740
// We only need the extra limb if the shift of the last element overflows.
3721
3741
// This is useful for the implementation of `shiftLeftSat`.
3722
- if (a [a .len - 1 ] << interior_limb_shift >> interior_limb_shift != a [a .len - 1 ]) {
3723
- assert (r .len >= a .len + (shift / limb_bits ) + 1 );
3742
+ const overflows = a [a .len - 1 ] >> opposite_bit_shift != 0 ;
3743
+ if (overflows ) {
3744
+ std .debug .assert (r .len >= a .len + 1 );
3724
3745
} else {
3725
- assert (r .len >= a .len + ( shift / limb_bits ) );
3746
+ std . debug . assert (r .len >= a .len );
3726
3747
}
3727
3748
3728
- const limb_shift = shift / limb_bits + 1 ;
3749
+ var i : usize = a .len ;
3750
+ if (overflows ) {
3751
+ // r is asserted to be large enough above
3752
+ r [a .len ] = a [a .len - 1 ] >> opposite_bit_shift ;
3753
+ }
3754
+ while (i > 1 ) {
3755
+ i -= 1 ;
3756
+ r [i ] = (a [i - 1 ] >> opposite_bit_shift ) | (a [i ] << bit_shift );
3757
+ }
3758
+ r [0 ] = a [0 ] << bit_shift ;
3729
3759
3730
- var carry : Limb = 0 ;
3731
- var i : usize = 0 ;
3732
- while (i < a .len ) : (i += 1 ) {
3733
- const src_i = a .len - i - 1 ;
3734
- const dst_i = src_i + limb_shift ;
3760
+ return a .len + @intFromBool (overflows );
3761
+ }
3735
3762
3736
- const src_digit = a [src_i ];
3737
- r [dst_i ] = carry | @call (.always_inline , math .shr , .{
3738
- Limb ,
3739
- src_digit ,
3740
- limb_bits - @as (Limb , @intCast (interior_limb_shift )),
3741
- });
3742
- carry = (src_digit << interior_limb_shift );
3763
+ /// Performs r = a >> shift and returns the amount of limbs affected
3764
+ ///
3765
+ /// if a and r overlaps, then r.ptr <= a.ptr is asserted
3766
+ /// r must have the capacity to store a >> shift
3767
+ ///
3768
+ /// See tests below for examples of behaviour
3769
+ fn llshr (r : []Limb , a : []const Limb , shift : usize ) usize {
3770
+ if (slicesOverlap (a , r ))
3771
+ std .debug .assert (@intFromPtr (r .ptr ) <= @intFromPtr (a .ptr ));
3772
+
3773
+ if (a .len == 0 ) return 0 ;
3774
+
3775
+ if (shift == 0 ) {
3776
+ std .debug .assert (r .len >= a .len );
3777
+
3778
+ if (a .ptr != r .ptr )
3779
+ std .mem .copyForwards (Limb , r [0.. a .len ], a );
3780
+ return a .len ;
3781
+ }
3782
+ if (shift >= limb_bits ) {
3783
+ if (shift / limb_bits >= a .len ) {
3784
+ r [0 ] = 0 ;
3785
+ return 1 ;
3786
+ }
3787
+ return llshr (r , a [shift / limb_bits .. ], shift % limb_bits );
3743
3788
}
3744
3789
3745
- r [ limb_shift - 1 ] = carry ;
3746
- @memset ( r [ 0 .. limb_shift - 1 ], 0 );
3747
- }
3790
+ // shift is guaranteed to be < limb_bits
3791
+ const bit_shift : Log2Limb = @truncate ( shift );
3792
+ const opposite_bit_shift : Log2Limb = @truncate ( limb_bits - bit_shift );
3748
3793
3749
- fn llshr (r : []Limb , a : []const Limb , shift : usize ) void {
3750
- @setRuntimeSafety (debug_safety );
3751
- assert (a .len >= 1 );
3752
- assert (r .len >= a .len - (shift / limb_bits ));
3794
+ // special case, where there is a risk to set r to 0
3795
+ if (a .len == 1 ) {
3796
+ r [0 ] = a [0 ] >> bit_shift ;
3797
+ return 1 ;
3798
+ }
3799
+ if (a .len == 0 ) {
3800
+ r [0 ] = 0 ;
3801
+ return 1 ;
3802
+ }
3753
3803
3754
- const limb_shift = shift / limb_bits ;
3755
- const interior_limb_shift = @as (Log2Limb , @truncate (shift ));
3804
+ // if the most significant limb becomes 0 after the shift
3805
+ const shrink = a [a .len - 1 ] >> bit_shift == 0 ;
3806
+ std .debug .assert (r .len >= a .len - @intFromBool (! shrink ));
3756
3807
3757
3808
var i : usize = 0 ;
3758
- while (i < a .len - limb_shift ) : (i += 1 ) {
3759
- const dst_i = i ;
3760
- const src_i = dst_i + limb_shift ;
3761
-
3762
- const src_digit = a [src_i ];
3763
- const src_digit_next = if (src_i + 1 < a .len ) a [src_i + 1 ] else 0 ;
3764
- const carry = @call (.always_inline , math .shl , .{
3765
- Limb ,
3766
- src_digit_next ,
3767
- limb_bits - @as (Limb , @intCast (interior_limb_shift )),
3768
- });
3769
- r [dst_i ] = carry | (src_digit >> interior_limb_shift );
3809
+ while (i < a .len - 1 ) : (i += 1 ) {
3810
+ r [i ] = (a [i ] >> bit_shift ) | (a [i + 1 ] << opposite_bit_shift );
3770
3811
}
3812
+
3813
+ if (! shrink )
3814
+ r [i ] = a [i ] >> bit_shift ;
3815
+
3816
+ return a .len - @intFromBool (shrink );
3771
3817
}
3772
3818
3773
3819
// r = ~r
3774
3820
fn llnot (r : []Limb ) void {
3775
-
3776
3821
for (r ) | * elem | {
3777
3822
elem .* = ~ elem .* ;
3778
3823
}
@@ -4107,7 +4152,7 @@ fn llsquareBasecase(r: []Limb, x: []const Limb) void {
4107
4152
}
4108
4153
4109
4154
// Each product appears twice, multiply by 2
4110
- llshl (r , r [0 .. 2 * x_norm .len ], 1 );
4155
+ _ = llshl (r , r [0 .. 2 * x_norm .len ], 1 );
4111
4156
4112
4157
for (x_norm , 0.. ) | v , i | {
4113
4158
// Compute and add the squares
0 commit comments