Update

adamant-pwn · adamant-pwn · commit 434b9befd0b7 · 2024-06-29T15:22:55.000+02:00
diff --git a/cp-algo/math/fft.hpp b/cp-algo/math/fft.hpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include <complex>
 #include <cassert>
+#include <ranges>
 #include <vector>
 #include <bit>
 
@@ -33,16 +34,10 @@ namespace cp_algo::math::fft {
     struct cvector {
         static constexpr size_t pre_roots = 1 << 17;
         std::vector<vftype> x, y;
-        cvector() {}
         cvector(size_t n) {
-            resize(n);
-        }
-        void resize(size_t n) {
-            n = std::bit_ceil(std::max<size_t>(n, flen));
-            if(size() != n) {
-                x.resize(n / flen);
-                y.resize(n / flen);
-            }
+            n = std::max(flen, std::bit_ceil(n));
+            x.resize(n / flen);
+            y.resize(n / flen);
         }
         template<class pt = point>
         void set(size_t k, pt t) {
@@ -162,23 +157,6 @@ namespace cp_algo::math::fft {
         return res;
     }();
 
-    template<typename base>
-    void mul_slow(std::vector<base> &a, const std::vector<base> &b) {
-        if(a.empty() || b.empty()) {
-            a.clear();
-        } else {
-            int n = a.size();
-            int m = b.size();
-            a.resize(n + m - 1);
-            for(int k = n + m - 2; k >= 0; k--) {
-                a[k] *= b[0];
-                for(int j = std::max(k - n + 1, 1); j < std::min(k + 1, m); j++) {
-                    a[k] += a[k - j] * b[j];
-                }
-            }
-        }
-    }
-
     template<typename base>
     struct dft {
         cvector A;
@@ -219,7 +197,7 @@ namespace cp_algo::math::fft {
         int split;
         cvector A, B;
         
-        dft(std::vector<base> const& a, size_t n): A(n), B(n) {
+        dft(auto const& a, size_t n): A(n), B(n) {
             split = std::sqrt(base::mod());
             cvector::exec_on_roots(2 * n, size(a), [&](size_t i, point rt) {
                 size_t ti = std::min(i, i - n);
@@ -233,7 +211,7 @@ namespace cp_algo::math::fft {
             }
         }
 
-        void mul(auto &&C, auto &&D, auto &res) {
+        void mul(auto &&C, auto &&D, auto &res, size_t k) {
             assert(A.size() == C.size());
             size_t n = A.size();
             if(!n) {
@@ -249,9 +227,8 @@ namespace cp_algo::math::fft {
             A.ifft();
             B.ifft();
             C.ifft();
-            res.resize(2 * n);
             auto splitsplit = (base(split) * split).rem();
-            cvector::exec_on_roots(2 * n, n, [&](size_t i, point rt) {
+            cvector::exec_on_roots(2 * n, std::min(n, k), [&](size_t i, point rt) {
                 rt = conj(rt);
                 auto Ai = A.get(i) * rt;
                 auto Bi = B.get(i) * rt;
@@ -260,18 +237,21 @@ namespace cp_algo::math::fft {
                 int64_t A1 = llround(real(Ci));
                 int64_t A2 = llround(real(Bi));
                 res[i] = A0 + A1 * split + A2 * splitsplit;
+                if(n + i >= k) {
+                    return;
+                }
                 int64_t B0 = llround(imag(Ai));
                 int64_t B1 = llround(imag(Ci));
                 int64_t B2 = llround(imag(Bi));
                 res[n + i] = B0 + B1 * split + B2 * splitsplit;
             });
         }
-        void mul(auto &&B, auto& res) {
-            mul(B.A, B.B, res);
+        void mul(auto &&B, auto& res, size_t k) {
+            mul(B.A, B.B, res, k);
         }
         std::vector<base> operator *= (auto &&B) {
-            std::vector<base> res;
-            mul(B.A, B.B, res);
+            std::vector<base> res(2 * A.size());
+            mul(B.A, B.B, res, size(res));
             return res;
         }
 
@@ -288,30 +268,24 @@ namespace cp_algo::math::fft {
         }
         return std::max(flen, std::bit_ceil(as + bs - 1) / 2);
     }
-    
-    template<typename base>
-    void mul(std::vector<base> &a, std::vector<base> const& b) {
-        if(std::min(a.size(), b.size()) < magic) {
-            mul_slow(a, b);
-            return;
+    void mul_truncate(auto &a, auto const& b, size_t k) {
+        using base = std::decay_t<decltype(a[0])>;
+        auto n = std::max(flen, std::bit_ceil(k) / 2);
+        auto A = dft<base>(std::views::take(a, k), n);
+        if(size(a) < k) {
+            a.resize(k);
         }
-        auto n = com_size(a.size(), b.size());
-        auto A = dft<base>(a, n);
         if(a == b) {
-            A.mul(dft<base>(A), a);
+            A.mul(dft<base>(A), a, k);
         } else {
-            A.mul(dft<base>(b, n), a);
+            A.mul(dft<base>(std::views::take(b, k), n), a, k);
         }
     }
-    template<typename base>
-    void circular_mul(std::vector<base> &a, std::vector<base> const& b) {
-        auto n = std::max(flen, std::bit_ceil(max(a.size(), b.size())) / 2);
-        auto A = dft<base>(a, n);
-        if(a == b) {
-            A.mul(dft<base>(A), a);
-        } else {
-            A.mul(dft<base>(b, n), a);
-        }
+    void mul(auto &a, auto const& b) {
+        mul_truncate(a, b, std::max(size_t(0), size(a) + size(b) - 1));
+    }
+    void circular_mul(auto &a, auto const& b) {
+        mul_truncate(a, b, std::max(size(a), size(b)));
     }
 }
 #endif // CP_ALGO_MATH_FFT_HPP
diff --git a/cp-algo/math/poly.hpp b/cp-algo/math/poly.hpp
@@ -12,18 +12,20 @@
 #include <optional>
 #include <utility>
 #include <vector>
+#include <deque>
 #include <list>
 namespace cp_algo::math {
     template<typename T>
     struct poly_t {
         using base = T;
-        std::vector<T> a;
+        std::deque<T> a;
         
         void normalize() {poly::impl::normalize(*this);}
         
         poly_t(){}
         poly_t(T a0): a{a0} {normalize();}
-        poly_t(std::vector<T> const& t): a(t) {normalize();}
+        poly_t(std::vector<T> const& t): a(begin(t), end(t)) {normalize();}
+        poly_t(std::deque<T> const& t): a(t) {normalize();}
         
         poly_t operator -() const {return poly::impl::neg(*this);}
         poly_t& operator += (poly_t const& t) {return poly::impl::add(*this, t);}
@@ -563,10 +565,10 @@ namespace cp_algo::math {
                 
                 int N = fft::com_size((n + 1) / 2, (n + 1) / 2);
                 
-                auto Q0f = fft::dft(Q0.a, N);
-                auto Q1f = fft::dft(Q1.a, N);
-                auto P0f = fft::dft(P0.a, N);
-                auto P1f = fft::dft(P1.a, N);
+                auto Q0f = fft::dft<T>(Q0.a, N);
+                auto Q1f = fft::dft<T>(Q1.a, N);
+                auto P0f = fft::dft<T>(P0.a, N);
+                auto P1f = fft::dft<T>(P1.a, N);
                 
                 if(k % 2) {
                     P = poly_t(Q0f * P1f) + poly_t(Q1f * P0f);
diff --git a/cp-algo/math/poly/impl/div.hpp b/cp-algo/math/poly/impl/div.hpp
@@ -100,11 +100,11 @@ namespace cp_algo::math::poly::impl {
         
         int N = fft::com_size((n + 1) / 2, (n + 1) / 2);
         
-        auto q0f = fft::dft(q0.a, N);
-        auto q1f = fft::dft(q1.a, N);
+        auto q0f = fft::dft<typename poly::base>(q0.a, N);
+        auto q1f = fft::dft<typename poly::base>(q1.a, N);
 
         // Q(x)*Q(-x) = Q0(x^2)^2 - x^2 Q1(x^2)^2
-        auto qqf = fft::dft(inv(
+        auto qqf = fft::dft<typename poly::base>(inv(
             poly(q0f * q0f) - poly(q1f * q1f).mul_xk(1)
         , (n + 1) / 2).a, N);
         
diff --git a/verify/poly/inv.test.cpp b/verify/poly/inv.test.cpp
@@ -1,5 +1,6 @@
 // @brief Inv of Power Series
 #define PROBLEM "https://judge.yosupo.jp/problem/inv_of_formal_power_series"
+#pragma GCC optimize("Ofast,unroll-loops")
 #include "cp-algo/math/poly.hpp"
 #include <bits/stdc++.h>