Skip to content

Commit 5c18a16

Browse files
committed
feat: impl convolutionI64
1 parent cae54ec commit 5c18a16

File tree

1 file changed

+57
-4
lines changed

1 file changed

+57
-4
lines changed

src/convolution.zig

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,63 @@ pub fn convolutionModint(comptime mod: u32, allocator: Allocator, a: []const Mod
8080

8181
// TODO: add doc comment
8282
pub fn convolutionI64(allocator: Allocator, a: []const i64, b: []const i64) ![]i64 {
83-
_ = allocator; // autofix
84-
_ = a; // autofix
85-
_ = b; // autofix
86-
// TODO: implement
83+
const n = a.len;
84+
const m = b.len;
85+
if (n == 0 or m == 0) {
86+
return allocator.alloc(i64, 0);
87+
}
88+
89+
const m1: u64 = 754_974_721; // 2^24 * 45 + 1
90+
const m2: u64 = 167_772_161; // 2^25 * 5 + 1
91+
const m3: u64 = 469_762_049; // 2^26 * 7 + 1
92+
const m2m3: u64 = m2 * m3;
93+
const m1m3: u64 = m1 * m3;
94+
const m1m2: u64 = m1 * m2;
95+
const m1m2m3: u64 = m1m2 *% m3;
96+
97+
const inv1 = internal.invGcd(m2m3, m1).@"1";
98+
const inv2 = internal.invGcd(m1m3, m2).@"1";
99+
const inv3 = internal.invGcd(m1m2, m3).@"1";
100+
101+
const c1 = try convolution(m1, i64, allocator, a, b);
102+
defer allocator.free(c1);
103+
const c2 = try convolution(m2, i64, allocator, a, b);
104+
defer allocator.free(c2);
105+
const c3 = try convolution(m3, i64, allocator, a, b);
106+
defer allocator.free(c3);
107+
108+
var c = try allocator.alloc(i64, n + m - 1);
109+
for (0..c.len) |i| {
110+
var x: u64 = 0;
111+
x +%= @as(u64, @bitCast(@rem((c1[i] *% inv1), m1))) *% m2m3;
112+
x +%= @as(u64, @bitCast(@rem((c2[i] *% inv2), m2))) *% m1m3;
113+
x +%= @as(u64, @bitCast(@rem((c3[i] *% inv3), m3))) *% m1m2;
114+
// B = 2^63, -B <= x, r(real value) < B
115+
// (x, x - M, x - 2M, or x - 3M) = r (mod 2B)
116+
// r = c1[i] (mod MOD1)
117+
// focus on MOD1
118+
// r = x, x - M', x - 2M', x - 3M' (M' = M % 2^64) (mod 2B)
119+
// r = x,
120+
// x - M' + (0 or 2B),
121+
// x - 2M' + (0, 2B or 4B),
122+
// x - 3M' + (0, 2B, 4B or 6B) (without mod!)
123+
// (r - x) = 0, (0)
124+
// - M' + (0 or 2B), (1)
125+
// -2M' + (0 or 2B or 4B), (2)
126+
// -3M' + (0 or 2B or 4B or 6B) (3) (mod MOD1)
127+
// we checked that
128+
// ((1) mod MOD1) mod 5 = 2
129+
// ((2) mod MOD1) mod 5 = 3
130+
// ((3) mod MOD1) mod 5 = 4
131+
var diff: i64 = c1[i] - @as(i64, @intCast(@mod(x, m1)));
132+
if (diff < 0) {
133+
diff += m1;
134+
}
135+
const offset = [5]u64{ 0, 0, m1m2m3, 2 * m1m2m3, 3 * m1m2m3 };
136+
x -%= offset[@intCast(@mod(diff, 5))];
137+
c[i] = @bitCast(x);
138+
}
139+
return c;
87140
}
88141

89142
// internal

0 commit comments

Comments
 (0)