@@ -3275,6 +3275,8 @@ const AccOp = enum {
3275
3275
fn llmulacc (comptime op : AccOp , opt_allocator : ? Allocator , r : []Limb , a : []const Limb , b : []const Limb ) void {
3276
3276
assert (r .len >= a .len );
3277
3277
assert (r .len >= b .len );
3278
+ assert (! slicesOverlap (r , a ));
3279
+ assert (! slicesOverlap (r , b ));
3278
3280
3279
3281
// Order greatest first.
3280
3282
var x = a ;
@@ -3313,6 +3315,8 @@ fn llmulaccKaratsuba(
3313
3315
) error {OutOfMemory }! void {
3314
3316
assert (r .len >= a .len );
3315
3317
assert (a .len >= b .len );
3318
+ assert (! slicesOverlap (r , a ));
3319
+ assert (! slicesOverlap (r , b ));
3316
3320
3317
3321
// Classical karatsuba algorithm:
3318
3322
// a = a1 * B + a0
@@ -4129,6 +4133,7 @@ fn llsignedxor(r: []Limb, a: []const Limb, a_positive: bool, b: []const Limb, b_
4129
4133
fn llsquareBasecase (r : []Limb , x : []const Limb ) void {
4130
4134
const x_norm = x ;
4131
4135
assert (r .len >= 2 * x_norm .len + 1 );
4136
+ assert (! slicesOverlap (r , x ));
4132
4137
4133
4138
// Compute the square of a N-limb bigint with only (N^2 + N)/2
4134
4139
// multiplications by exploiting the symmetry of the coefficients around the
@@ -4224,6 +4229,318 @@ fn fixedIntFromSignedDoubleLimb(A: SignedDoubleLimb, storage: []Limb) Mutable {
4224
4229
};
4225
4230
}
4226
4231
4232
+ fn slicesOverlap (a : []const Limb , b : []const Limb ) bool {
4233
+ // there is no overlap if a.ptr + a.len <= b.ptr or b.ptr + b.len <= a.ptr
4234
+ return @intFromPtr (a .ptr + a .len ) > @intFromPtr (b .ptr ) and @intFromPtr (b .ptr + b .len ) > @intFromPtr (a .ptr );
4235
+ }
4236
+
4227
4237
test {
4228
4238
_ = @import ("int_test.zig" );
4229
4239
}
4240
+
4241
+ const testing_allocator = std .testing .allocator ;
4242
+ test "llshl shift by whole number of limb" {
4243
+ const padding = std .math .maxInt (Limb );
4244
+
4245
+ var r : [10 ]Limb = @splat (padding );
4246
+
4247
+ // arbitrary numbers known to fit ?
4248
+ const A : Limb = @truncate (0xCCCCCCCCCCCCCCCCCCCCCCC );
4249
+ const B : Limb = @truncate (0x22222222222222222222222 );
4250
+
4251
+ const data = [2 ]Limb { A , B };
4252
+ for (0.. 9) | i | {
4253
+ @memset (& r , padding );
4254
+ const len = llshl (& r , & data , i * @bitSizeOf (Limb ));
4255
+
4256
+ try std .testing .expectEqual (i + 2 , len );
4257
+ try std .testing .expectEqualSlices (Limb , & data , r [i .. i + 2 ]);
4258
+ for (r [0.. i ]) | x |
4259
+ try std .testing .expectEqual (0 , x );
4260
+ for (r [i + 2 .. ]) | x |
4261
+ try std .testing .expectEqual (padding , x );
4262
+ }
4263
+ }
4264
+
4265
+ test llshl {
4266
+ if (limb_bits != 64 ) return error .SkipZigTest ;
4267
+
4268
+ // 1 << 63
4269
+ const left_one = 0x8000000000000000 ;
4270
+ const maxint : Limb = 0xFFFFFFFFFFFFFFFF ;
4271
+
4272
+ // zig fmt: off
4273
+ const cases : Cases = &.{
4274
+ .{0 , &.{0 }, &.{0 }},
4275
+ .{0 , &.{1 }, &.{1 }},
4276
+ .{0 , &.{125484842448 }, &.{125484842448 }},
4277
+ .{0 , &.{0xdeadbeef }, &.{0xdeadbeef }},
4278
+ .{0 , &.{maxint }, &.{maxint }},
4279
+ .{0 , &.{left_one }, &.{left_one }},
4280
+ .{0 , &.{0 , 1 }, &.{0 , 1 }},
4281
+ .{0 , &.{1 , 2 }, &.{1 , 2 }},
4282
+ .{0 , &.{left_one , 1 }, &.{left_one , 1 }},
4283
+ .{1 , &.{0 }, &.{0 }},
4284
+ .{1 , &.{2 }, &.{1 }},
4285
+ .{1 , &.{250969684896 }, &.{125484842448 }},
4286
+ .{1 , &.{0x1bd5b7dde }, &.{0xdeadbeef }},
4287
+ .{1 , &.{0xfffffffffffffffe , 1 }, &.{maxint }},
4288
+ .{1 , &.{0 , 1 }, &.{left_one }},
4289
+ .{1 , &.{0 , 2 }, &.{0 , 1 }},
4290
+ .{1 , &.{2 , 4 }, &.{1 , 2 }},
4291
+ .{1 , &.{0 , 3 }, &.{left_one , 1 }},
4292
+ .{5 , &.{32 }, &.{1 }},
4293
+ .{5 , &.{4015514958336 }, &.{125484842448 }},
4294
+ .{5 , &.{0x1bd5b7dde0 }, &.{0xdeadbeef }},
4295
+ .{5 , &.{0xffffffffffffffe0 , 0x1f }, &.{maxint }},
4296
+ .{5 , &.{0 , 16 }, &.{left_one }},
4297
+ .{5 , &.{0 , 32 }, &.{0 , 1 }},
4298
+ .{5 , &.{32 , 64 }, &.{1 , 2 }},
4299
+ .{5 , &.{0 , 48 }, &.{left_one , 1 }},
4300
+ .{64 , &.{0 , 1 }, &.{1 }},
4301
+ .{64 , &.{0 , 125484842448 }, &.{125484842448 }},
4302
+ .{64 , &.{0 , 0xdeadbeef }, &.{0xdeadbeef }},
4303
+ .{64 , &.{0 , maxint }, &.{maxint }},
4304
+ .{64 , &.{0 , left_one }, &.{left_one }},
4305
+ .{64 , &.{0 , 0 , 1 }, &.{0 , 1 }},
4306
+ .{64 , &.{0 , 1 , 2 }, &.{1 , 2 }},
4307
+ .{64 , &.{0 , left_one , 1 }, &.{left_one , 1 }},
4308
+ .{35 , &.{0x800000000 }, &.{1 }},
4309
+ .{35 , &.{13534986488655118336 , 233 }, &.{125484842448 }},
4310
+ .{35 , &.{0xf56df77800000000 , 6 }, &.{0xdeadbeef }},
4311
+ .{35 , &.{0xfffffff800000000 , 0x7ffffffff }, &.{maxint }},
4312
+ .{35 , &.{0 , 17179869184 }, &.{left_one }},
4313
+ .{35 , &.{0 , 0x800000000 }, &.{0 , 1 }},
4314
+ .{35 , &.{0x800000000 , 0x1000000000 }, &.{1 , 2 }},
4315
+ .{35 , &.{0 , 0xc00000000 }, &.{left_one , 1 }},
4316
+ .{70 , &.{0 , 64 }, &.{1 }},
4317
+ .{70 , &.{0 , 8031029916672 }, &.{125484842448 }},
4318
+ .{70 , &.{0 , 0x37ab6fbbc0 }, &.{0xdeadbeef }},
4319
+ .{70 , &.{0 , 0xffffffffffffffc0 , 63 }, &.{maxint }},
4320
+ .{70 , &.{0 , 0 , 32 }, &.{left_one }},
4321
+ .{70 , &.{0 , 0 , 64 }, &.{0 , 1 }},
4322
+ .{70 , &.{0 , 64 , 128 }, &.{1 , 2 }},
4323
+ .{70 , &.{0 , 0 , 0x60 }, &.{left_one , 1 }},
4324
+ };
4325
+ // zig fmt: on
4326
+
4327
+ try test_shift_cases (llshl , cases );
4328
+ try test_shift_cases_aliasing (llshl , cases , -1 );
4329
+ }
4330
+
4331
+ test "llshl shift 0" {
4332
+ const n = @bitSizeOf (Limb );
4333
+ if (n <= 20 ) return error .SkipZigTest ;
4334
+
4335
+ // zig fmt: off
4336
+ const cases = &.{
4337
+ .{0 , &.{0 }, &.{0 }},
4338
+ .{1 , &.{0 }, &.{0 }},
4339
+ .{5 , &.{0 }, &.{0 }},
4340
+ .{13 , &.{0 }, &.{0 }},
4341
+ .{20 , &.{0 }, &.{0 }},
4342
+ .{0 , &.{0 , 0 }, &.{0 , 0 }},
4343
+ .{2 , &.{0 , 0 }, &.{0 , 0 }},
4344
+ .{7 , &.{0 , 0 }, &.{0 , 0 }},
4345
+ .{11 , &.{0 , 0 }, &.{0 , 0 }},
4346
+ .{19 , &.{0 , 0 }, &.{0 , 0 }},
4347
+
4348
+ .{0 , &.{0 }, &.{0 }},
4349
+ .{n , &.{0 , 0 }, &.{0 }},
4350
+ .{2 * n , &.{0 , 0 , 0 }, &.{0 }},
4351
+ .{3 * n , &.{0 , 0 , 0 , 0 }, &.{0 }},
4352
+ .{4 * n , &.{0 , 0 , 0 , 0 , 0 }, &.{0 }},
4353
+ .{0 , &.{0 , 0 }, &.{0 , 0 }},
4354
+ .{n , &.{0 , 0 , 0 }, &.{0 , 0 }},
4355
+ .{2 * n , &.{0 , 0 , 0 , 0 }, &.{0 , 0 }},
4356
+ .{3 * n , &.{0 , 0 , 0 , 0 , 0 }, &.{0 , 0 }},
4357
+ .{4 * n , &.{0 , 0 , 0 , 0 , 0 , 0 }, &.{0 , 0 }},
4358
+ };
4359
+ // zig fmt: on
4360
+
4361
+ try test_shift_cases (llshl , cases );
4362
+ try test_shift_cases_aliasing (llshl , cases , -1 );
4363
+ }
4364
+
4365
+ test "llshr shift 0" {
4366
+ const n = @bitSizeOf (Limb );
4367
+
4368
+ // zig fmt: off
4369
+ const cases = &.{
4370
+ .{0 , &.{0 }, &.{0 }},
4371
+ .{1 , &.{0 }, &.{0 }},
4372
+ .{5 , &.{0 }, &.{0 }},
4373
+ .{13 , &.{0 }, &.{0 }},
4374
+ .{20 , &.{0 }, &.{0 }},
4375
+ .{0 , &.{0 , 0 }, &.{0 , 0 }},
4376
+ .{2 , &.{0 }, &.{0 , 0 }},
4377
+ .{7 , &.{0 }, &.{0 , 0 }},
4378
+ .{11 , &.{0 }, &.{0 , 0 }},
4379
+ .{19 , &.{0 }, &.{0 , 0 }},
4380
+
4381
+ .{n , &.{0 }, &.{0 }},
4382
+ .{2 * n , &.{0 }, &.{0 }},
4383
+ .{3 * n , &.{0 }, &.{0 }},
4384
+ .{4 * n , &.{0 }, &.{0 }},
4385
+ .{n , &.{0 }, &.{0 , 0 }},
4386
+ .{2 * n , &.{0 }, &.{0 , 0 }},
4387
+ .{3 * n , &.{0 }, &.{0 , 0 }},
4388
+ .{4 * n , &.{0 }, &.{0 , 0 }},
4389
+
4390
+ .{1 , &.{}, &.{}},
4391
+ .{2 , &.{}, &.{}},
4392
+ .{64 , &.{}, &.{}},
4393
+ };
4394
+ // zig fmt: on
4395
+
4396
+ try test_shift_cases (llshr , cases );
4397
+ try test_shift_cases_aliasing (llshr , cases , 1 );
4398
+ }
4399
+
4400
+ test "llshr to 0" {
4401
+ const n = @bitSizeOf (Limb );
4402
+ if (n != 64 and n != 32 ) return error .SkipZigTest ;
4403
+
4404
+ // zig fmt: off
4405
+ const cases = &.{
4406
+ .{1 , &.{0 }, &.{0 }},
4407
+ .{1 , &.{0 }, &.{1 }},
4408
+ .{5 , &.{0 }, &.{1 }},
4409
+ .{65 , &.{0 }, &.{0 , 1 }},
4410
+ .{193 , &.{0 }, &.{0 , 0 , std .math .maxInt (Limb )}},
4411
+ .{193 , &.{0 }, &.{std .math .maxInt (Limb ), 1 , std .math .maxInt (Limb )}},
4412
+ .{193 , &.{0 }, &.{0xdeadbeef , 0xabcdefab , 0x1234 }},
4413
+ };
4414
+ // zig fmt: on
4415
+
4416
+ try test_shift_cases (llshr , cases );
4417
+ try test_shift_cases_aliasing (llshr , cases , 1 );
4418
+ }
4419
+
4420
+ test "llshr single" {
4421
+ if (limb_bits != 64 ) return error .SkipZigTest ;
4422
+
4423
+ // 1 << 63
4424
+ const left_one = 0x8000000000000000 ;
4425
+ const maxint : Limb = 0xFFFFFFFFFFFFFFFF ;
4426
+
4427
+ // zig fmt: off
4428
+ const cases : Cases = &.{
4429
+ .{0 , &.{0 }, &.{0 }},
4430
+ .{0 , &.{1 }, &.{1 }},
4431
+ .{0 , &.{125484842448 }, &.{125484842448 }},
4432
+ .{0 , &.{0xdeadbeef }, &.{0xdeadbeef }},
4433
+ .{0 , &.{maxint }, &.{maxint }},
4434
+ .{0 , &.{left_one }, &.{left_one }},
4435
+ .{1 , &.{0 }, &.{0 }},
4436
+ .{1 , &.{1 }, &.{2 }},
4437
+ .{1 , &.{62742421224 }, &.{125484842448 }},
4438
+ .{1 , &.{62742421223 }, &.{125484842447 }},
4439
+ .{1 , &.{0x6f56df77 }, &.{0xdeadbeef }},
4440
+ .{1 , &.{0x7fffffffffffffff }, &.{maxint }},
4441
+ .{1 , &.{0x4000000000000000 }, &.{left_one }},
4442
+ .{8 , &.{1 }, &.{256 }},
4443
+ .{8 , &.{490175165 }, &.{125484842448 }},
4444
+ .{8 , &.{0xdeadbe }, &.{0xdeadbeef }},
4445
+ .{8 , &.{0xffffffffffffff }, &.{maxint }},
4446
+ .{8 , &.{0x80000000000000 }, &.{left_one }},
4447
+ };
4448
+ // zig fmt: on
4449
+
4450
+ try test_shift_cases (llshr , cases );
4451
+ try test_shift_cases_aliasing (llshr , cases , 1 );
4452
+ }
4453
+
4454
+ test llshr {
4455
+ if (limb_bits != 64 ) return error .SkipZigTest ;
4456
+
4457
+ // 1 << 63
4458
+ const left_one = 0x8000000000000000 ;
4459
+ const maxint : Limb = 0xFFFFFFFFFFFFFFFF ;
4460
+
4461
+ // zig fmt: off
4462
+ const cases : Cases = &.{
4463
+ .{0 , &.{0 , 0 }, &.{0 , 0 }},
4464
+ .{0 , &.{0 , 1 }, &.{0 , 1 }},
4465
+ .{0 , &.{15 , 1 }, &.{15 , 1 }},
4466
+ .{0 , &.{987656565 , 123456789456 }, &.{987656565 , 123456789456 }},
4467
+ .{0 , &.{0xfeebdaed , 0xdeadbeef }, &.{0xfeebdaed , 0xdeadbeef }},
4468
+ .{0 , &.{1 , maxint }, &.{1 , maxint }},
4469
+ .{0 , &.{0 , left_one }, &.{0 , left_one }},
4470
+ .{1 , &.{0 }, &.{0 , 0 }},
4471
+ .{1 , &.{left_one }, &.{0 , 1 }},
4472
+ .{1 , &.{0x8000000000000007 }, &.{15 , 1 }},
4473
+ .{1 , &.{493828282 , 61728394728 }, &.{987656565 , 123456789456 }},
4474
+ .{1 , &.{0x800000007f75ed76 , 0x6f56df77 }, &.{0xfeebdaed , 0xdeadbeef }},
4475
+ .{1 , &.{left_one , 0x7fffffffffffffff }, &.{1 , maxint }},
4476
+ .{1 , &.{0 , 0x4000000000000000 }, &.{0 , left_one }},
4477
+ .{64 , &.{0 }, &.{0 , 0 }},
4478
+ .{64 , &.{1 }, &.{0 , 1 }},
4479
+ .{64 , &.{1 }, &.{15 , 1 }},
4480
+ .{64 , &.{123456789456 }, &.{987656565 , 123456789456 }},
4481
+ .{64 , &.{0xdeadbeef }, &.{0xfeebdaed , 0xdeadbeef }},
4482
+ .{64 , &.{maxint }, &.{1 , maxint }},
4483
+ .{64 , &.{left_one }, &.{0 , left_one }},
4484
+ .{72 , &.{0 }, &.{0 , 0 }},
4485
+ .{72 , &.{0 }, &.{0 , 1 }},
4486
+ .{72 , &.{0 }, &.{15 , 1 }},
4487
+ .{72 , &.{482253083 }, &.{987656565 , 123456789456 }},
4488
+ .{72 , &.{0xdeadbe }, &.{0xfeebdaed , 0xdeadbeef }},
4489
+ .{72 , &.{0xffffffffffffff }, &.{1 , maxint }},
4490
+ .{72 , &.{0x80000000000000 }, &.{0 , left_one }},
4491
+ };
4492
+ // zig fmt: on
4493
+
4494
+ try test_shift_cases (llshr , cases );
4495
+ try test_shift_cases_aliasing (llshr , cases , 1 );
4496
+ }
4497
+
4498
+ const Cases = []const struct { usize , []const Limb , []const Limb };
4499
+ fn test_shift_cases (func : fn ([]Limb , []const Limb , usize ) usize , cases : Cases ) ! void {
4500
+ const padding = std .math .maxInt (Limb );
4501
+ var r : [20 ]Limb = @splat (padding );
4502
+
4503
+ for (cases ) | case | {
4504
+ const shift = case [0 ];
4505
+ const expected = case [1 ];
4506
+ const data = case [2 ];
4507
+
4508
+ std .debug .assert (expected .len <= 20 );
4509
+
4510
+ @memset (& r , padding );
4511
+ const len = func (& r , data , shift );
4512
+
4513
+ try std .testing .expectEqual (expected .len , len );
4514
+ try std .testing .expectEqualSlices (Limb , expected , r [0.. len ]);
4515
+ for (r [len .. ]) | x |
4516
+ try std .testing .expectEqual (padding , x );
4517
+ }
4518
+ }
4519
+
4520
+ fn test_shift_cases_aliasing (func : fn ([]Limb , []const Limb , usize ) usize , cases : Cases , shift_direction : isize ) ! void {
4521
+ const padding = std .math .maxInt (Limb );
4522
+ var r : [60 ]Limb = @splat (padding );
4523
+ const base = 20 ;
4524
+
4525
+ assert (shift_direction == 1 or shift_direction == -1 );
4526
+
4527
+ for (0.. 10) | limb_shift | {
4528
+ for (cases ) | case | {
4529
+ const shift = case [0 ];
4530
+ const expected = case [1 ];
4531
+ const data = case [2 ];
4532
+
4533
+ std .debug .assert (expected .len <= 20 );
4534
+
4535
+ @memset (& r , padding );
4536
+ const final_limb_base : usize = @intCast (base + shift_direction * @as (isize , @intCast (limb_shift )));
4537
+ const written_data = r [final_limb_base .. ][0.. data .len ];
4538
+ @memcpy (written_data , data );
4539
+
4540
+ const len = func (r [base .. ], written_data , shift );
4541
+
4542
+ try std .testing .expectEqual (expected .len , len );
4543
+ try std .testing .expectEqualSlices (Limb , expected , r [base .. base + len ]);
4544
+ }
4545
+ }
4546
+ }
0 commit comments