dont cache FFT of size 3, update SmallFft class (#75)

vitalsong · web-flow · commit 7c360c631a81 · 2024-12-18T14:48:25.000+03:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.10)
-project(dsplib LANGUAGES CXX VERSION 0.54.6)
+project(dsplib LANGUAGES CXX VERSION 0.54.7)
 
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
diff --git a/lib/fft/factory.cpp b/lib/fft/factory.cpp
@@ -46,8 +46,8 @@ std::shared_ptr<BaseFftPlanR> _get_rfft_plan(int n) {
 //-------------------------------------------------------------------------------------------------
 std::shared_ptr<BaseFftPlanC> create_fft_plan(int n) {
     //dont cache small fft plans
-    if ((n == 1) || (n == 2) || (n == 4) || (n == 8)) {
-        return std::make_shared<SmallFftPow2C>(n);
+    if (SmallFftC::is_supported(n)) {
+        return std::make_shared<SmallFftC>(n);
     }
 
     //TODO: use weak_ptr cache to prevent duplication
@@ -61,8 +61,8 @@ std::shared_ptr<BaseFftPlanC> create_fft_plan(int n) {
 }
 
 std::shared_ptr<BaseFftPlanR> create_rfft_plan(int n) {
-    if ((n == 1) || (n == 2) || (n == 4) || (n == 8)) {
-        return std::make_shared<SmallFftPow2R>(n);
+    if (SmallFftR::is_supported(n)) {
+        return std::make_shared<SmallFftR>(n);
     }
 
     thread_local LRUCache<int, std::shared_ptr<BaseFftPlanR>> cache{FFT_CACHE_SIZE};
diff --git a/lib/fft/primes-fft.h b/lib/fft/primes-fft.h
@@ -20,7 +20,7 @@ class PrimesFftC : public BaseFftPlanC
     explicit PrimesFftC(int n)
       : n_{n} {
         DSPLIB_ASSERT(isprime(n_), "`n` must be a prime number");
-        DSPLIB_ASSERT(n_ >= 3, "`n` must be greater than or equal to 3");
+        DSPLIB_ASSERT(n_ >= 5, "`n` must be greater than or equal to 5");
         if (n > MAX_DFT_SIZE) {
             const cmplx_t w = expj(-2 * pi / n);
             czt_ = std::make_shared<CztPlan>(n, n, w);
@@ -45,31 +45,10 @@ class PrimesFftC : public BaseFftPlanC
     }
 
 private:
-    static void _dft_n3(const cmplx_t* restrict x, cmplx_t* restrict y) noexcept {
-        constexpr real_t c = -0.5;
-        constexpr real_t d = 0.866025403784439;
-
-        y[0].re = x[0].re + x[1].re + x[2].re;
-        y[0].im = x[0].im + x[1].im + x[2].im;
-
-        const real_t re1_c = x[1].re * c;
-        const real_t im1_d = x[1].im * d;
-        const real_t re2_c = x[2].re * c;
-        const real_t im2_d = x[2].im * d;
-        y[1].re = x[0].re + (re1_c + im1_d) + (re2_c - im2_d);
-        y[2].re = x[0].re + (re1_c - im1_d) + (re2_c + im2_d);
-
-        const real_t re1_d = x[1].re * d;
-        const real_t im1_c = x[1].im * c;
-        const real_t re2_d = x[2].re * d;
-        const real_t im2_c = x[2].im * c;
-        y[1].im = x[0].im + (-re1_d + im1_c) + (re2_d + im2_c);
-        y[2].im = x[0].im + (re1_d + im1_c) + (-re2_d + im2_c);
-    }
-
     //TODO: add dft5, dft7
 
-    static void _dft_slow(const cmplx_t* restrict x, cmplx_t* restrict y, uint32_t n, const cmplx_t* restrict tw) noexcept {
+    static void _dft_slow(const cmplx_t* restrict x, cmplx_t* restrict y, uint32_t n,
+                          const cmplx_t* restrict tw) noexcept {
         DSPLIB_ASSUME(n <= MAX_DFT_SIZE);
         std::memset(reinterpret_cast<real_t*>(y), 0, n * sizeof(cmplx_t));
 
@@ -90,11 +69,6 @@ class PrimesFftC : public BaseFftPlanC
     void _dft(const cmplx_t* restrict x, cmplx_t* restrict y, int n) const {
         assert(n == n_);
 
-        if (n == 3) {
-            _dft_n3(x, y);
-            return;
-        }
-
         if (n <= MAX_DFT_SIZE) {
             assert(!w_.empty());
             _dft_slow(x, y, n, w_.data());
diff --git a/lib/fft/small-fft.h b/lib/fft/small-fft.h
@@ -9,17 +9,17 @@ namespace dsplib {
 
 //-------------------------------------------------------------------------------------------------------------
 //FFT implementation for small sizes
-class SmallFftPow2C : public BaseFftPlanC
+class SmallFftC : public BaseFftPlanC
 {
 public:
-    friend class SmallFftPow2R;
+    friend class SmallFftR;
 
-    explicit SmallFftPow2C(int n)
+    explicit SmallFftC(int n)
       : n_{n} {
-        DSPLIB_ASSERT((n >= 1) && (n <= 8), "only small power-of-two sizes are supported: 1, 2, 4, 8");
+        DSPLIB_ASSERT(is_supported(n), "only small sizes are supported: 1, 2, 3, 4, 8");
     }
 
-    ~SmallFftPow2C() override {
+    ~SmallFftC() override {
     }
 
     void solve(const cmplx_t* x, cmplx_t* y, int n) const final {
@@ -31,6 +31,9 @@ class SmallFftPow2C : public BaseFftPlanC
         case 2:
             _fft_n2(x, y);
             break;
+        case 3:
+            _fft_n3(x, y);
+            break;
         case 4:
             _fft_n4(x, y);
             break;
@@ -53,6 +56,10 @@ class SmallFftPow2C : public BaseFftPlanC
         return n_;
     }
 
+    static bool is_supported(int n) noexcept {
+        return (n == 1 || n == 2 || n == 3 || n == 4 || n == 8);
+    }
+
 private:
     static void _fft_n2(const cmplx_t* restrict x, cmplx_t* restrict y) noexcept {
         y[0].re = x[0].re + x[1].re;
@@ -61,6 +68,28 @@ class SmallFftPow2C : public BaseFftPlanC
         y[1].im = x[0].im - x[1].im;
     }
 
+    static void _fft_n3(const cmplx_t* restrict x, cmplx_t* restrict y) noexcept {
+        constexpr real_t c = -0.5;
+        constexpr real_t d = 0.866025403784439;
+
+        y[0].re = x[0].re + x[1].re + x[2].re;
+        y[0].im = x[0].im + x[1].im + x[2].im;
+
+        const real_t re1_c = x[1].re * c;
+        const real_t im1_d = x[1].im * d;
+        const real_t re2_c = x[2].re * c;
+        const real_t im2_d = x[2].im * d;
+        y[1].re = x[0].re + (re1_c + im1_d) + (re2_c - im2_d);
+        y[2].re = x[0].re + (re1_c - im1_d) + (re2_c + im2_d);
+
+        const real_t re1_d = x[1].re * d;
+        const real_t im1_c = x[1].im * c;
+        const real_t re2_d = x[2].re * d;
+        const real_t im2_c = x[2].im * c;
+        y[1].im = x[0].im + (-re1_d + im1_c) + (re2_d + im2_c);
+        y[2].im = x[0].im + (re1_d + im1_c) + (-re2_d + im2_c);
+    }
+
     static void _fft_n4(const cmplx_t* restrict x, cmplx_t* restrict y) noexcept {
         y[0].re = x[0].re + x[1].re + x[2].re + x[3].re;
         y[0].im = x[0].im + x[1].im + x[2].im + x[3].im;
@@ -101,15 +130,15 @@ class SmallFftPow2C : public BaseFftPlanC
 };
 
 //-------------------------------------------------------------------------------------------------------------
-class SmallFftPow2R : public BaseFftPlanR
+class SmallFftR : public BaseFftPlanR
 {
 public:
-    explicit SmallFftPow2R(int n)
+    explicit SmallFftR(int n)
       : n_{n} {
-        DSPLIB_ASSERT((n >= 1) && (n <= 8), "only small power-of-two sizes are supported: 1, 2, 4, 8");
+        DSPLIB_ASSERT(is_supported(n), "only small sizes are supported: 1, 2, 3, 4, 8");
     }
 
-    ~SmallFftPow2R() override {
+    ~SmallFftR() override {
     }
 
     void solve(const real_t* x, cmplx_t* y, int n) const final {
@@ -121,6 +150,9 @@ class SmallFftPow2R : public BaseFftPlanR
         case 2:
             _fft_n2(x, y);
             break;
+        case 3:
+            _fft_n3(x, y);
+            break;
         case 4:
             _fft_n4(x, y);
             break;
@@ -143,6 +175,10 @@ class SmallFftPow2R : public BaseFftPlanR
         return n_;
     }
 
+    static bool is_supported(int n) noexcept {
+        return (n == 1 || n == 2 || n == 3 || n == 4 || n == 8);
+    }
+
 private:
     static void _fft_n2(const real_t* restrict x, cmplx_t* restrict y) noexcept {
         y[0].re = x[0] + x[1];
@@ -151,6 +187,24 @@ class SmallFftPow2R : public BaseFftPlanR
         y[1].im = 0;
     }
 
+    static void _fft_n3(const real_t* restrict x, cmplx_t* restrict y) noexcept {
+        constexpr real_t c = -0.5;
+        constexpr real_t d = 0.866025403784439;
+
+        y[0].re = x[0] + x[1] + x[2];
+        y[0].im = 0;
+
+        const real_t re1_c = x[1] * c;
+        const real_t re2_c = x[2] * c;
+        y[1].re = x[0] + re1_c + re2_c;
+        y[2].re = y[1].re;
+
+        const real_t re1_d = x[1] * d;
+        const real_t re2_d = x[2] * d;
+        y[1].im = (-re1_d) + (re2_d);
+        y[2].im = -y[1].im;
+    }
+
     static void _fft_n4(const real_t* restrict x, cmplx_t* restrict y) noexcept {
         y[0].re = x[0] + x[1] + x[2] + x[3];
         y[0].im = 0;
@@ -169,7 +223,7 @@ class SmallFftPow2R : public BaseFftPlanR
         p1[1] = x[1] + x[5];
         p1[2] = x[2] + x[6];
         p1[3] = x[3] + x[7];
-        SmallFftPow2R::_fft_n4(p1, r1);
+        SmallFftR::_fft_n4(p1, r1);
 
         cmplx_t p2[4];
         cmplx_t r2[4];
@@ -178,7 +232,7 @@ class SmallFftPow2R : public BaseFftPlanR
         p2[2].re = 0;
         p2[2].im = x[6] - x[2];
         p2[3] = (x[3] - x[7]) * cmplx_t{-0.707106781186548, -0.707106781186548};
-        SmallFftPow2C::_fft_n4(p2, r2);
+        SmallFftC::_fft_n4(p2, r2);
 
         for (int i = 0; i < 4; ++i) {
             *y++ = r1[i];
diff --git a/tests/fft_test.cpp b/tests/fft_test.cpp
@@ -250,8 +250,8 @@ TEST(FFT, SmallFft) {
     using namespace std::complex_literals;
 
     {
-        SmallFftPow2R plan_r(1);
-        SmallFftPow2C plan_c(1);
+        SmallFftR plan_r(1);
+        SmallFftC plan_c(1);
         arr_real x = {10};
         arr_cmplx ref = {10};
         auto y1 = plan_r.solve(x);
@@ -262,8 +262,8 @@ TEST(FFT, SmallFft) {
         ASSERT_EQ_ARR_CMPLX(y3, ref);
     }
     {
-        SmallFftPow2R plan_r(2);
-        SmallFftPow2C plan_c(2);
+        SmallFftR plan_r(2);
+        SmallFftC plan_c(2);
         arr_real x = {1, 2};
         arr_cmplx ref = {3, -1};
         auto y1 = plan_r.solve(x);
@@ -274,8 +274,31 @@ TEST(FFT, SmallFft) {
         ASSERT_EQ_ARR_CMPLX(y3, ref);
     }
     {
-        SmallFftPow2R plan_r(4);
-        SmallFftPow2C plan_c(4);
+        SmallFftR plan_r(3);
+        SmallFftC plan_c(3);
+        arr_real x = {1, 2, 3};
+        arr_cmplx ref = {6.00000000000000 + 0.00000000000000i, -1.50000000000000 + 0.866025403784439i,
+                         -1.50000000000000 - 0.866025403784439i};
+        auto y1 = plan_r.solve(x);
+        auto y2 = plan_c.solve(complex(x));
+        auto y3 = fft(x);
+        ASSERT_EQ_ARR_CMPLX(y1, ref);
+        ASSERT_EQ_ARR_CMPLX(y2, ref);
+        ASSERT_EQ_ARR_CMPLX(y3, ref);
+    }
+    {
+        SmallFftC plan(3);
+        arr_cmplx x = {1 + 1i, 2 + 2i, 3 - 3i};
+        arr_cmplx ref = {6.00000000000000 + 0.00000000000000i, 2.83012701892219 + 2.36602540378444i,
+                         -5.83012701892219 + 0.633974596215561i};
+        auto y1 = plan.solve(x);
+        auto y2 = fft(x);
+        ASSERT_EQ_ARR_CMPLX(y1, ref);
+        ASSERT_EQ_ARR_CMPLX(y2, ref);
+    }
+    {
+        SmallFftR plan_r(4);
+        SmallFftC plan_c(4);
         arr_real x = {1, 2, 3, 4};
         arr_cmplx ref = {10.0000000000000 + 0.00000000000000i, -2.00000000000000 + 2.00000000000000i,
                          -2.00000000000000 + 0.00000000000000i, -2.00000000000000 - 2.00000000000000i};
@@ -287,8 +310,8 @@ TEST(FFT, SmallFft) {
         ASSERT_EQ_ARR_CMPLX(y3, ref);
     }
     {
-        SmallFftPow2R plan_r(8);
-        SmallFftPow2C plan_c(8);
+        SmallFftR plan_r(8);
+        SmallFftC plan_c(8);
         arr_real x = {1, 2, 3, 4, 5, 6, 7, 8};
         arr_cmplx ref = {36.0000000000000 + 0.00000000000000i,  -4.00000000000000 + 9.65685424949238i,
                          -4.00000000000000 + 4.00000000000000i, -4.00000000000000 + 1.65685424949238i,

Original file line number	Diff line number	Diff line change
`@@ -46,8 +46,8 @@ std::shared_ptr<BaseFftPlanR> _get_rfft_plan(int n) {`
`46`	`46`	`//-------------------------------------------------------------------------------------------------`
`47`	`47`	`std::shared_ptr<BaseFftPlanC> create_fft_plan(int n) {`
`48`	`48`	`//dont cache small fft plans`
`49`		`- if ((n == 1) \|\| (n == 2) \|\| (n == 4) \|\| (n == 8)) {`
`50`		`- return std::make_shared<SmallFftPow2C>(n);`
	`49`	`+ if (SmallFftC::is_supported(n)) {`
	`50`	`+ return std::make_shared<SmallFftC>(n);`
`51`	`51`	`}`
`52`	`52`
`53`	`53`	`//TODO: use weak_ptr cache to prevent duplication`
`@@ -61,8 +61,8 @@ std::shared_ptr<BaseFftPlanC> create_fft_plan(int n) {`
`61`	`61`	`}`
`62`	`62`
`63`	`63`	`std::shared_ptr<BaseFftPlanR> create_rfft_plan(int n) {`
`64`		`- if ((n == 1) \|\| (n == 2) \|\| (n == 4) \|\| (n == 8)) {`
`65`		`- return std::make_shared<SmallFftPow2R>(n);`
	`64`	`+ if (SmallFftR::is_supported(n)) {`
	`65`	`+ return std::make_shared<SmallFftR>(n);`
`66`	`66`	`}`
`67`	`67`
`68`	`68`	`thread_local LRUCache<int, std::shared_ptr<BaseFftPlanR>> cache{FFT_CACHE_SIZE};`