FFT span optimization

vitalsong · vitalsong · commit 62dfa6f85d0a · 2025-04-24T13:13:28.000+03:00
diff --git a/lib/fft/cmplx-ifft.h b/lib/fft/cmplx-ifft.h
@@ -12,26 +12,32 @@ class CmplxIfftPlan : public IfftPlanC
     }
 
     arr_cmplx solve(span_t<cmplx_t> x) const final {
-        const real_t m = real_t(1) / x.size();
-        arr_cmplx y(x);
-        y *= m;
-        _inplace_conj(y);
-        y = fft_->solve(y);
-        _inplace_conj(y);
-        return y;
+        arr_cmplx r(x);
+        this->solve(x, r);
+        return r;
+    }
+
+    void solve(span_t<cmplx_t> x, mut_span_t<cmplx_t> r) const final {
+        const int n = fft_->size();
+        DSPLIB_ASSERT(x.size() == n, "array size error");
+        DSPLIB_ASSERT(x.size() == r.size(), "array size error");
+        arr_cmplx t(n);
+        const real_t m = real_t(1) / n;
+        for (int i = 0; i < n; ++i) {
+            t[i].re = x[i].re * m;
+            t[i].im = -(x[i].im * m);
+        }
+        fft_->solve(t, r);
+        for (int i = 0; i < n; ++i) {
+            r[i].im = -r[i].im;
+        }
     }
 
     int size() const noexcept final {
         return fft_->size();
     }
 
 private:
-    static void _inplace_conj(arr_cmplx& x) {
-        for (auto& v : x) {
-            v.im = -v.im;
-        }
-    }
-
     std::shared_ptr<FftPlanC> fft_;
 };
 
diff --git a/lib/fft/fact-fft.cpp b/lib/fft/fact-fft.cpp
@@ -59,12 +59,12 @@ class PlanTree
         return _n;
     }
 
-    [[nodiscard]] PlanTree* q_plan() const noexcept {
+    [[nodiscard]] const PlanTree* q_plan() const noexcept {
         assert(has_next());
         return _q;
     }
 
-    [[nodiscard]] PlanTree* p_plan() const noexcept {
+    [[nodiscard]] const PlanTree* p_plan() const noexcept {
         assert(has_next());
         return _p;
     }
@@ -102,8 +102,8 @@ class PlanTree
     }
 
     const int _n;
-    PlanTree* _p{nullptr};
-    PlanTree* _q{nullptr};
+    const PlanTree* _p{nullptr};
+    const PlanTree* _q{nullptr};
     std::shared_ptr<FftPlanC> _solver;
 };
 
@@ -175,19 +175,26 @@ void _facfft(const PlanTree* plan, cmplx_t* restrict x, cmplx_t* restrict mem, c
 //-----------------------------------------------------------------------------------------------------------------------------
 FactorFFTPlan::FactorFFTPlan(int n)
   : _n{n}
-  , _px(n) {
+  , _twiddle{expj(-2 * pi * arange(n) / n)}   //TODO: only part of the table is needed
+{
     DSPLIB_ASSERT(!isprime(n), "fft size must not be a prime number");
-    _twiddle = expj(-2 * pi * arange(n) / n);   //TODO: only part of the table is needed
     _plan = std::make_shared<PlanTree>(n);
 }
 
 [[nodiscard]] arr_cmplx FactorFFTPlan::solve(span_t<cmplx_t> x) const {
-    DSPLIB_ASSERT(x.size() == _n, "input vector size is not equal fft size");
-    arr_cmplx r(x);   //TODO: remove copy
-    _facfft(_plan.get(), r.data(), _px.data(), _twiddle.data(), _n);
+    arr_cmplx r(_n);
+    this->solve(x, r);
     return r;
 }
 
+void FactorFFTPlan::solve(span_t<cmplx_t> x, mut_span_t<cmplx_t> r) const {
+    DSPLIB_ASSERT(x.size() == _n, "input array size is not equal fft size");
+    DSPLIB_ASSERT(x.size() == r.size(), "output array size error");
+    arr_cmplx tmp(_n);
+    r = x;   //TODO: remove copy
+    _facfft(_plan.get(), r.data(), tmp.data(), _twiddle.data(), _n);
+}
+
 [[nodiscard]] int FactorFFTPlan::size() const noexcept {
     return _n;
 }
diff --git a/lib/fft/fact-fft.h b/lib/fft/fact-fft.h
@@ -2,7 +2,6 @@
 
 #include <dsplib/math.h>
 #include <dsplib/fft.h>
-#include <dsplib/math.h>
 
 #include <memory>
 
@@ -15,14 +14,14 @@ class FactorFFTPlan : public FftPlanC
 {
 public:
     explicit FactorFFTPlan(int n);
-    ~FactorFFTPlan() = default;
+    ~FactorFFTPlan() override = default;
     [[nodiscard]] arr_cmplx solve(span_t<cmplx_t> x) const final;
+    void solve(span_t<cmplx_t> x, mut_span_t<cmplx_t> r) const final;
     [[nodiscard]] int size() const noexcept final;
 
 private:
-    int _n;
-    arr_cmplx _twiddle;
-    mutable arr_cmplx _px;   ///< tmp matrix for transpose
+    const int _n;
+    const arr_cmplx _twiddle;
     std::shared_ptr<PlanTree> _plan;
 };
 
diff --git a/lib/fft/pow2-fft.cpp b/lib/fft/pow2-fft.cpp
@@ -65,10 +65,10 @@ void _bitreverse(const cmplx_t* restrict x, cmplx_t* restrict y, const int32_t*
 
 Pow2FftPlan::Pow2FftPlan(int n)
   : n_{n}
-  , l_{nextpow2(n_)} {
+  , l_{nextpow2(n_)}
+  , bitrev_{_gen_bitrev_table(n)}
+  , coeffs_{_gen_coeffs_table(n)} {
     DSPLIB_ASSERT(ispow2(n), "FFT size must be power of 2");
-    bitrev_ = _gen_bitrev_table(n);
-    coeffs_ = _gen_coeffs_table(n);
 }
 
 void Pow2FftPlan::solve(span_t<cmplx_t> x, mut_span_t<cmplx_t> r) const {
diff --git a/lib/fft/pow2-fft.h b/lib/fft/pow2-fft.h
@@ -19,8 +19,8 @@ class Pow2FftPlan : public FftPlanC
 
     const int n_;
     const int l_;
-    std::vector<int32_t> bitrev_;
-    std::vector<cmplx_t> coeffs_;
+    const std::vector<int32_t> bitrev_;
+    const std::vector<cmplx_t> coeffs_;
 };
 
 }   // namespace dsplib
diff --git a/lib/fft/small-fft.h b/lib/fft/small-fft.h
@@ -1,7 +1,5 @@
 #pragma once
 
-#include "dsplib/array.h"
-#include "dsplib/types.h"
 #include <dsplib/fft.h>
 #include <dsplib/assert.h>
 
@@ -22,30 +20,35 @@ class SmallFftC : public FftPlanC
     ~SmallFftC() override {
     }
 
-    [[nodiscard]] arr_cmplx solve(span_t<cmplx_t> x) const final {
+    void solve(span_t<cmplx_t> x, mut_span_t<cmplx_t> r) const final {
         DSPLIB_ASSERT(x.size() == n_, "input size error");
-        arr_cmplx y(x.size());
+        DSPLIB_ASSERT(x.size() == r.size(), "input size error");
         switch (n_) {
         case 1:
-            y[0] = x[0];
+            r[0] = x[0];
             break;
         case 2:
-            _fft_n2(x.data(), y.data());
+            _fft_n2(x.data(), r.data());
             break;
         case 3:
-            _fft_n3(x.data(), y.data());
+            _fft_n3(x.data(), r.data());
             break;
         case 4:
-            _fft_n4(x.data(), y.data());
+            _fft_n4(x.data(), r.data());
             break;
         case 8:
-            _fft_n8(x.data(), y.data());
+            _fft_n8(x.data(), r.data());
             break;
         default:
             DSPLIB_THROW("size not supported");
             break;
         }
-        return y;
+    }
+
+    [[nodiscard]] arr_cmplx solve(span_t<cmplx_t> x) const final {
+        arr_cmplx r(x.size());
+        this->solve(x, r);
+        return r;
     }
 
     [[nodiscard]] int size() const noexcept final {
@@ -137,30 +140,36 @@ class SmallFftR : public FftPlanR
     ~SmallFftR() override {
     }
 
-    [[nodiscard]] arr_cmplx solve(span_t<real_t> x) const final {
+    void solve(span_t<real_t> x, mut_span_t<cmplx_t> r) const final {
         DSPLIB_ASSERT(x.size() == n_, "input size error");
-        arr_cmplx y(x.size());
+        DSPLIB_ASSERT(x.size() == r.size(), "input size error");
         switch (n_) {
         case 1:
-            y[0] = x[0];
+            r[0].re = x[0];
+            r[0].im = 0;
             break;
         case 2:
-            _fft_n2(x.data(), y.data());
+            _fft_n2(x.data(), r.data());
             break;
         case 3:
-            _fft_n3(x.data(), y.data());
+            _fft_n3(x.data(), r.data());
             break;
         case 4:
-            _fft_n4(x.data(), y.data());
+            _fft_n4(x.data(), r.data());
             break;
         case 8:
-            _fft_n8(x.data(), y.data());
+            _fft_n8(x.data(), r.data());
             break;
         default:
             DSPLIB_THROW("size not supported");
             break;
         }
-        return y;
+    }
+
+    [[nodiscard]] arr_cmplx solve(span_t<real_t> x) const final {
+        arr_cmplx r(x.size());
+        this->solve(x, r);
+        return r;
     }
 
     [[nodiscard]] int size() const noexcept final {
diff --git a/lib/subband.cpp b/lib/subband.cpp
@@ -182,11 +182,12 @@ class ChannelizerImpl : public DFTFilterBank
             const auto* restrict buf = buf_[decim_ * k];
             const auto* restrict flt = fview_[k];
             for (int m = 0; m < nbands_; m++) {
-                pout[nbands_ - m - 1] += flt[m] * buf[m];
+                pout[m] += flt[m] * buf[m];
             }
         }
 
-        return fft_->solve(pout);
+        //TODO: remove conj
+        return conj(fft_->solve(pout));
     }
 
 private:
@@ -212,6 +213,8 @@ class ChannelSynthesizerImpl : public DFTFilterBank
 
         auto xx = ifft_->solve(x);
         xx *= nbands_;
+
+        //TODO: fft and flip?
         buf_.push(xx, true);
 
         // calculate outputs of polyphase filters

Original file line number	Diff line number	Diff line change
`@@ -182,11 +182,12 @@ class ChannelizerImpl : public DFTFilterBank`
`182`	`182`	`const auto* restrict buf = buf_[decim_ * k];`
`183`	`183`	`const auto* restrict flt = fview_[k];`
`184`	`184`	`for (int m = 0; m < nbands_; m++) {`
`185`		`- pout[nbands_ - m - 1] += flt[m] * buf[m];`
	`185`	`+ pout[m] += flt[m] * buf[m];`
`186`	`186`	`}`
`187`	`187`	`}`
`188`	`188`
`189`		`- return fft_->solve(pout);`
	`189`	`+ //TODO: remove conj`
	`190`	`+ return conj(fft_->solve(pout));`
`190`	`191`	`}`
`191`	`192`
`192`	`193`	`private:`
`@@ -212,6 +213,8 @@ class ChannelSynthesizerImpl : public DFTFilterBank`
`212`	`213`
`213`	`214`	`auto xx = ifft_->solve(x);`
`214`	`215`	`xx *= nbands_;`
	`216`	`+`
	`217`	`+ //TODO: fft and flip?`
`215`	`218`	`buf_.push(xx, true);`
`216`	`219`
`217`	`220`	`// calculate outputs of polyphase filters`