3
3
* Date: 2019-01-09
4
4
* License: CC0
5
5
* Source: http://neerc.ifmo.ru/trains/toulouse/2017/fft2.pdf (do read, it's excellent)
6
- Papers about accuracy: http://www.daemonology.net/papers/fft.pdf, http://www.cs.berkeley.edu/~fateman/papers/fftvsothers.pdf
7
- For integers rounding works if $(|a| + |b|)\max(a, b) < \mathtt{\sim} 10^9$, or in theory maybe $10^6$.
8
- * Description: fft(a, ...) computes $\hat f(k) = \sum_x a[x] \exp(2\pi i \cdot k x / N)$ for all $k$. Useful for convolution:
6
+ Accuracy bound from http://www.daemonology.net/papers/fft.pdf
7
+ * Description: fft(a) computes $\hat f(k) = \sum_x a[x] \exp(2\pi i \cdot k x / N)$ for all $k$. Useful for convolution:
9
8
\texttt{conv(a, b) = c}, where $c[x] = \sum a[i]b[x-i]$.
10
9
For convolution of complex numbers or more than two vectors: FFT, multiply
11
10
pointwise, divide by n, reverse(start+1, end), FFT back.
12
- For integers, consider using a number-theoretic transform instead, to avoid rounding issues.
13
- * Time: O(N \log N) with $N = |A|+|B|-1$ ($\tilde 1s$ for $N=2^{22}$)
11
+ Rounding is safe if $(\sum a_i^2 + \sum b_i^2)\log_2{N} < 9\cdot10^{14}$
12
+ (in practice $10^{16}$; higher for random inputs).
13
+ Otherwise, use long doubles/NTT/FFTMod.
14
+ * Time: O(N \log N) with $N = |A|+|B|$ ($\tilde 1s$ for $N=2^{22}$)
14
15
* Status: somewhat tested
15
16
*/
16
17
#pragma once
17
18
18
19
typedef complex < double > C ;
19
20
typedef vector < double > vd ;
20
-
21
- void fft (vector < C > & a , vector < C > & rt , vi & rev , int n ) {
21
+ void fft (vector < C > & a ) {
22
+ int n = sz (a ), L = 31 - __builtin_clz (n );
23
+ static vector < complex < long double >> R (2 , 1 );
24
+ static vector < C > rt (2 , 1 ); // (^ 10% faster if double)
25
+ for (static int k = 2 ; k < n ; k *= 2 ) {
26
+ R .resize (n ); rt .resize (n );
27
+ auto x = polar (1.0L , M_PIl / k ); // M_PI, lower-case L
28
+ rep (i ,k ,2 * k ) rt [i ] = R [i ] = i & 1 ? R [i /2 ] * x : R [i /2 ];
29
+ }
30
+ vi rev (n );
31
+ rep (i ,0 ,n ) rev [i ] = (rev [i / 2 ] | (i & 1 ) << L ) / 2 ;
22
32
rep (i ,0 ,n ) if (i < rev [i ]) swap (a [i ], a [rev [i ]]);
23
33
for (int k = 1 ; k < n ; k *= 2 )
24
34
for (int i = 0 ; i < n ; i += 2 * k ) rep (j ,0 ,k ) {
@@ -27,25 +37,19 @@ void fft(vector<C> &a, vector<C> &rt, vi& rev, int n) {
27
37
C z (x [0 ]* y [0 ] - x [1 ]* y [1 ], x [0 ]* y [1 ] + x [1 ]* y [0 ]); /// exclude-line
28
38
a [i + j + k ] = a [i + j ] - z ;
29
39
a [i + j ] += z ;
30
- }
40
+ }
31
41
}
32
-
33
42
vd conv (const vd & a , const vd & b ) {
34
43
if (a .empty () || b .empty ()) return {};
35
44
vd res (sz (a ) + sz (b ) - 1 );
36
45
int L = 32 - __builtin_clz (sz (res )), n = 1 << L ;
37
- vector < C > in (n ), out (n ), rt (n , 1 ); vi rev (n );
38
- rep (i ,0 ,n ) rev [i ] = (rev [i /2 ] | (i & 1 ) << L ) / 2 ;
39
- for (int k = 2 ; k < n ; k *= 2 ) {
40
- C z [] = {1 , polar (1.0 , M_PI / k )};
41
- rep (i ,k ,2 * k ) rt [i ] = rt [i /2 ] * z [i & 1 ];
42
- }
46
+ vector < C > in (n ), out (n );
43
47
copy (all (a ), begin (in ));
44
48
rep (i ,0 ,sz (b )) in [i ].imag (b [i ]);
45
- fft (in , rt , rev , n );
49
+ fft (in );
46
50
trav (x , in ) x *= x ;
47
51
rep (i ,0 ,n ) out [i ] = in [- i & (n - 1 )] - conj (in [i ]);
48
- fft (out , rt , rev , n );
49
- rep (i ,0 ,sz (res )) res [i ] = imag (out [i ]) / (4 * n );
52
+ fft (out );
53
+ rep (i ,0 ,sz (res )) res [i ] = imag (out [i ]) / (4 * n );
50
54
return res ;
51
55
}
0 commit comments