|
62 | 62 |
|
63 | 63 | // MODULES // |
64 | 64 |
|
65 | | -var sincos = require( '@stdlib/math/base/special/sincos' ); |
66 | | -var TWO_PI = require( '@stdlib/constants/float64/two-pi' ); |
67 | | -var chRef = require( './ch_ref.js' ); |
68 | | -var ccRef = require( './cc_ref.js' ); |
| 65 | +var sin = require( '@stdlib/math/base/special/sin' ); |
| 66 | +var cos = require( '@stdlib/math/base/special/cos' ); |
| 67 | +var PI = require( '@stdlib/constants/float64/pi' ); |
69 | 68 |
|
70 | 69 |
|
71 | 70 | // VARIABLES // |
72 | 71 |
|
73 | | -var sc = sincos( ( 2 * TWO_PI ) / 3 ); |
74 | | -var taur = sc[ 1 ]; // -0.5 |
75 | | -var taui = -sc[ 0 ]; // 0.866025403784439 |
| 72 | +var TAUR = cos( ( 2.0 * PI ) / 3.0 ); |
| 73 | +var TAUI = sin( ( 2.0 * PI ) / 3.0 ); |
76 | 74 |
|
77 | 75 |
|
78 | | -// MAIN // |
| 76 | +// FUNCTIONS // |
79 | 77 |
|
80 | 78 | /** |
81 | | -* Performs the forward FFT of length 3 for real-valued sequences. |
| 79 | +* Resolves an index into the input array. |
| 80 | +* |
| 81 | +* ## Notes |
| 82 | +* |
| 83 | +* In a forward real FFT, the previous stage writes its results as two "rows" per sub-sequence. |
| 84 | +* |
| 85 | +* Thus, when reading from an input array stored in linear memory, we can reinterpret the array as a three-dimensional logical view containing `L` independent sub-sequences having two "rows" (`even + odd` and `even - odd`, respectively) and where each "row" is arranged as `M*L` contiguous elements corresponding to interleaved real and imaginary components. |
| 86 | +* |
| 87 | +* Accordingly, the following is a logical view of an input array (zero-based indexing) which contains `L = 3` transforms and in which each sub-sequence has length `M = 4`: |
| 88 | +* |
| 89 | +* ```text |
| 90 | +* │ k = 0 k = 1 k = 2 |
| 91 | +* │ ──────────────────────────────────────────────────────────────────────────→ k |
| 92 | +* j = 0 (even+odd) │ cc(0,0,0) ... cc(3,0,0) cc(0,1,0) ... cc(3,1,0) cc(0,2,0) ... cc(3,2,0) |
| 93 | +* │ |
| 94 | +* j = 1 (even-odd) │ cc(0,0,1) ... cc(3,0,1) cc(0,1,1) ... cc(3,1,1) cc(0,2,1) ... cc(3,2,1) |
| 95 | +* └───────────────────────────────────────────────────────────────────────────→ i |
| 96 | +* ↑ ↑ ↑ ↑ ↑ ↑ |
| 97 | +* i = 0 M-1 0 M-1 0 M-1 |
| 98 | +* ``` |
| 99 | +* |
| 100 | +* In the above, |
| 101 | +* |
| 102 | +* - `i` is the fastest varying index, which walks within one short sub-sequence corresponding to either the `even + odd` or `even - odd` row of the previous stage. |
| 103 | +* - `j` selects between the `even + odd` and `even - odd` row of the previous stage. |
| 104 | +* - `k` specifies the index of one of the `L` independent transforms we are processing. |
| 105 | +* |
| 106 | +* In linear memory, the three-dimensional logical view is arranged as follows: |
| 107 | +* |
| 108 | +* ```text |
| 109 | +* | cc(0,0,0)...cc(3,0,0) ... cc(0,2,0)...cc(3,2,0) | cc(0,0,1)...cc(3,0,1) ... cc(0,2,1)...cc(3,2,1) | |
| 110 | +* ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ |
| 111 | +* 0 M-1 LM LM-1 (L+1)M (L+1)M-1 (2L-1)M 2LM-1 |
| 112 | +* ``` |
82 | 113 | * |
83 | 114 | * @private |
84 | | -* @param {number} ido - number of real values for each transform |
85 | | -* @param {number} l1 - length of the input sequences |
86 | | -* @param {Float64Array} cc - input array containing sequences to be transformed |
87 | | -* @param {number} ccOffset - offset for the input array |
88 | | -* @param {Float64Array} ch - output array containing transformed sequences |
89 | | -* @param {number} chOffset - offset for the output array |
90 | | -* @param {Float64Array} wa1 - first array of twiddle factors |
91 | | -* @param {number} wa1Offset - offset for the first twiddle factors array |
92 | | -* @param {Float64Array} wa2 - second array of twiddle factors |
93 | | -* @param {number} wa2Offset - offset for the second twiddle factors array |
94 | | -* @returns {void} |
| 115 | +* @param {NonNegativeInteger} i - index of an element within a sub-sequence |
| 116 | +* @param {NonNegativeInteger} k - index of the sub-sequence being transformed |
| 117 | +* @param {NonNegativeInteger} j - input row |
| 118 | +* @param {NonNegativeInteger} L - number of sub-sequences |
| 119 | +* @param {NonNegativeInteger} M - sub-sequence length |
| 120 | +* @param {integer} stride - stride length of the input array |
| 121 | +* @param {NonNegativeInteger} offset - index specifying the first indexed element in the output array |
| 122 | +* @returns {NonNegativeInteger} computed index |
| 123 | +* |
| 124 | +* @example |
| 125 | +* var stride = 1; |
| 126 | +* var offset = 0; |
| 127 | +* |
| 128 | +* var M = 4; // sub-sequence length |
| 129 | +* var L = 3; // number of sub-sequences |
| 130 | +* |
| 131 | +* var idx = iptr( 0, 0, 0, L, M, stride, offset ); |
| 132 | +* // returns 0 |
| 133 | +* |
| 134 | +* idx = iptr( 1, 0, 0, L, M, stride, offset ); |
| 135 | +* // returns 1 |
| 136 | +* |
| 137 | +* idx = iptr( M-1, 0, 0, L, M, stride, offset ); |
| 138 | +* // returns 3 |
| 139 | +* |
| 140 | +* idx = iptr( 0, 1, 0, L, M, stride, offset ); |
| 141 | +* // returns 4 |
| 142 | +* |
| 143 | +* // ... |
| 144 | +* |
| 145 | +* idx = iptr( M-1, L-1, 1, L, M, stride, offset ); |
| 146 | +* // returns 23 |
95 | 147 | */ |
96 | | -function radf3( ido, l1, cc, ccOffset, ch, chOffset, wa1, wa1Offset, wa2, wa2Offset ) { |
97 | | - var idp2; |
98 | | - var tr3; |
99 | | - var tr2; |
100 | | - var ti3; |
101 | | - var ti2; |
102 | | - var dr3; |
103 | | - var dr2; |
104 | | - var di3; |
105 | | - var di2; |
106 | | - var cr2; |
107 | | - var ci2; |
108 | | - var ic; |
109 | | - var i; |
110 | | - var k; |
111 | | - |
112 | | - // Parameter adjustments... |
113 | | - chOffset -= 1 + ( ido << 2 ); |
114 | | - ccOffset -= 1 + ( ido * ( 1 + l1 ) ); |
115 | | - wa1Offset -= 1; |
116 | | - wa2Offset -= 1; |
117 | | - |
118 | | - // Function body: |
119 | | - for ( k = 1; k <= l1; k++ ) { |
120 | | - cr2 = cc[ ccRef( 1, k, 2, l1, ido ) + ccOffset ] + cc[ ccRef( 1, k, 3, l1, ido ) + ccOffset ]; |
121 | | - ch[ chRef( 1, 1, k, 3, ido ) + chOffset ] = cc[ ccRef( 1, k, 1, l1, ido ) + ccOffset ] + cr2; |
122 | | - ch[ chRef( 1, 3, k, 3, ido ) + chOffset ] = taui * ( cc[ ccRef( 1, k, 3, l1, ido ) + ccOffset ] - cc[ ccRef( 1, k, 2, l1, ido ) + ccOffset ] ); |
123 | | - ch[ chRef( ido, 2, k, 3, ido ) + chOffset ] = cc[ ccRef( 1, k, 1, l1, ido ) + ccOffset ] + ( taur * cr2 ); |
124 | | - } |
125 | | - if ( ido === 1 ) { |
126 | | - return; |
127 | | - } |
128 | | - idp2 = ido + 2; |
129 | | - for ( k = 1; k <= l1; k++ ) { |
130 | | - for ( i = 3; i <= ido; i += 2 ) { |
131 | | - ic = idp2 - i; |
132 | | - dr2 = ( wa1[ i - 2 + wa1Offset ] * cc[ ccRef( i - 1, k, 2, l1, ido ) + ccOffset ]) + ( wa1[ i - 1 + wa1Offset ] * cc[ ccRef( i, k, 2, l1, ido ) + ccOffset ] ); |
133 | | - di2 = ( wa1[ i - 2 + wa1Offset ] * cc[ ccRef( i, k, 2, l1, ido ) + ccOffset ]) - ( wa1[ i - 1 + wa1Offset ] * cc[ ccRef( i - 1, k, 2, l1, ido ) + ccOffset ] ); |
134 | | - dr3 = ( wa2[ i - 2 + wa2Offset ] * cc[ ccRef( i - 1, k, 3, l1, ido ) + ccOffset ]) + ( wa2[ i - 1 + wa2Offset ] * cc[ ccRef( i, k, 3, l1, ido ) + ccOffset ] ); |
135 | | - di3 = ( wa2[ i - 2 + wa2Offset ] * cc[ ccRef( i, k, 3, l1, ido ) + ccOffset ]) - ( wa2[ i - 1 + wa2Offset ] * cc[ ccRef( i - 1, k, 3, l1, ido ) + ccOffset ] ); |
136 | | - cr2 = dr2 + dr3; |
137 | | - ci2 = di2 + di3; |
138 | | - ch[ chRef( i - 1, 1, k, 3, ido ) + chOffset ] = cc[ ccRef( i - 1, k, 1, l1, ido ) + ccOffset ] + cr2; |
139 | | - ch[ chRef( i, 1, k, 3, ido ) + chOffset ] = cc[ ccRef( i, k, 1, l1, ido ) + ccOffset ] + ci2; |
140 | | - tr2 = cc[ ccRef( i - 1, k, 1, l1, ido ) + ccOffset ] + ( taur * cr2 ); |
141 | | - ti2 = cc[ ccRef( i, k, 1, l1, ido ) + ccOffset ] + ( taur * ci2 ); |
142 | | - tr3 = taui * ( di2 - di3 ); |
143 | | - ti3 = taui * ( dr3 - dr2 ); |
144 | | - ch[ chRef( i - 1, 3, k, 3, ido ) + chOffset ] = tr2 + tr3; |
145 | | - ch[ chRef( ic - 1, 2, k, 3, ido ) + chOffset ] = tr2 - tr3; |
146 | | - ch[ chRef( i, 3, k, 3, ido ) + chOffset ] = ti2 + ti3; |
147 | | - ch[ chRef( ic, 2, k, 3, ido ) + chOffset ] = ti3 - ti2; |
148 | | - } |
149 | | - } |
| 148 | +function iptr( i, k, j, L, M, stride, offset ) { |
| 149 | + var n = i + ( ( k+(j*L) ) * M ); |
| 150 | + return ( n*stride ) + offset; |
150 | 151 | } |
151 | 152 |
|
| 153 | +/** |
| 154 | +* Resolves an index into the output array. |
| 155 | +* |
| 156 | +* ## Notes |
| 157 | +* |
| 158 | +* When writing to an output array stored in linear memory, we can reinterpret the array as a three-dimensional logical view containing `L` independent sub-sequences having three "columns" corresponding to the three components of a radix-3 stage (with real and imaginary parts interleaved along each sub-sequence) and where each "column" has `M` elements. |
| 159 | +* |
| 160 | +* Accordingly, the following is a logical view of an output array (zero-based indexing) which contains `L = 3` transforms and in which each "column" sub-sequence has length `M = 4`: |
| 161 | +* |
| 162 | +* ```text |
| 163 | +* j = 0 (component 0) j = 1 (component 1) j = 2 (component 2) |
| 164 | +* k = 0 ─┬───────────────────────────────────────┬───────────────────────────────────────┬───────────────────────────────────────┐ |
| 165 | +* │ out(0,0,0) out(1,0,0) ... out(3,0,0) │ out(0,1,0) out(1,1,0) ... out(3,1,0) │ out(0,2,0) out(1,2,0) ... out(3,2,0) │ |
| 166 | +* └───────────────────────────────────────┴───────────────────────────────────────┴───────────────────────────────────────┤ |
| 167 | +* k = 1 ─┬───────────────────────────────────────┬───────────────────────────────────────┬───────────────────────────────────────┤ |
| 168 | +* │ out(0,0,1) out(1,0,1) ... out(3,0,1) │ out(0,1,1) out(1,1,1) ... out(3,1,1) │ out(0,2,1) out(1,2,1) ... out(3,2,1) │ |
| 169 | +* └───────────────────────────────────────┴───────────────────────────────────────┴───────────────────────────────────────┤ |
| 170 | +* k = 2 ─┬───────────────────────────────────────┬───────────────────────────────────────┬───────────────────────────────────────┤ |
| 171 | +* │ out(0,0,2) out(1,0,2) ... out(3,0,2) │ out(0,1,2) out(1,1,2) ... out(3,1,2) │ out(0,2,2) out(1,2,2) ... out(3,2,2) │ |
| 172 | +* └───────────────────────────────────────┴───────────────────────────────────────┴───────────────────────────────────────┘ |
| 173 | +* ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ |
| 174 | +* i = 0 1 M-1 0 1 M-1 0 1 M-1 |
| 175 | +* ``` |
| 176 | +* |
| 177 | +* In the above, |
| 178 | +* |
| 179 | +* - `i` is the fastest varying index, which walks within one short "column" sub-sequence. |
| 180 | +* - `j` selects which of the three components we are in (0, 1, or 2). |
| 181 | +* - `k` specifies the index of one of the `L` independent transforms we are processing. |
| 182 | +* |
| 183 | +* In linear memory, the three-dimensional logical view is arranged as follows: |
| 184 | +* |
| 185 | +* ```text |
| 186 | +* | out(0,0,0)...out(3,0,0) | out(0,1,0)...out(3,1,0) | out(0,2,0)...out(3,2,0) | out(0,0,1)...out(3,0,1) | ... | out(0,2,2)...out(3,2,2) | |
| 187 | +* ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ ↑ |
| 188 | +* 0 M-1 M 2M-1 2M 3M-1 3M 4M-1 (3L-1)M 3LM-1 |
| 189 | +* ``` |
| 190 | +* |
| 191 | +* As may be observed, when resolving an index in the output array, the `j` and `k` dimensions are swapped relative index resolution in the input array. This stems from `radf3` being only one stage in a multi-stage driver which alternates between using `cc` and `out` as workspace buffers. After each stage, the next stage reads what the previous stage wrote. |
| 192 | +* |
| 193 | +* Each stage expects a transpose, and, in order to avoid explicit transposition between the stages, we swap the last two logical dimensions while still maintaining cache locality within the inner loop logical dimension, as indexed by `i`. |
| 194 | +* |
| 195 | +* @private |
| 196 | +* @param {NonNegativeInteger} i - index of an element within a sub-sequence |
| 197 | +* @param {NonNegativeInteger} j - index specifying which of the three complex components we are in (0, 1, or 2) |
| 198 | +* @param {NonNegativeInteger} k - index of the sub-sequence being transformed |
| 199 | +* @param {NonNegativeInteger} M - sub-sequence length |
| 200 | +* @param {integer} stride - stride length of the output array |
| 201 | +* @param {NonNegativeInteger} offset - index specifying the first indexed element in the output array |
| 202 | +* @returns {NonNegativeInteger} computed index |
| 203 | +* |
| 204 | +* @example |
| 205 | +* var stride = 1; |
| 206 | +* var offset = 0; |
| 207 | +* |
| 208 | +* var M = 4; // sub-sequence length |
| 209 | +* var L = 3; // number of sub-sequences |
| 210 | +* |
| 211 | +* var idx = optr( 0, 0, 0, M, stride, offset ); |
| 212 | +* // returns 0 |
| 213 | +* |
| 214 | +* idx = optr( 1, 0, 0, M, stride, offset ); |
| 215 | +* // returns 1 |
| 216 | +* |
| 217 | +* idx = optr( M-1, 0, 0, M, stride, offset ); |
| 218 | +* // returns 3 |
| 219 | +* |
| 220 | +* idx = optr( 0, 1, 0, M, stride, offset ); |
| 221 | +* // returns 4 |
| 222 | +* |
| 223 | +* // ... |
| 224 | +* |
| 225 | +* idx = optr( M-1, 2, L-1, M, stride, offset ); |
| 226 | +* // returns 35 |
| 227 | +*/ |
| 228 | +function optr( i, j, k, M, stride, offset ) { |
| 229 | + var n = i + ( ( j+(k*3) ) * M ); |
| 230 | + return ( n*stride ) + offset; |
| 231 | +} |
152 | 232 |
|
153 | | -// EXPORTS // |
154 | | - |
155 | | -module.exports = radf3; |
| 233 | +// Check diff with radf2 to see JSDoc changes |
0 commit comments