AMReX-Codes · WeiqunZhang · Mar 25, 2026 · Mar 24, 2026 · Mar 25, 2026 · Mar 25, 2026
diff --git a/Src/FFT/AMReX_FFT_Helper.H b/Src/FFT/AMReX_FFT_Helper.H
@@ -74,7 +74,13 @@ struct Info
     //! batch size.
     bool twod_mode = false;
 
-    //! We might have a special twod_mode: nx or ny == 1 && nz > 1.
+    //! - 2D builds: oned_mode=true means we FFT in x only and treat y as the
+    //!   batch size.
+    //! - 3D builds with twod_mode=false: same behavior as the 2D case, but the
+    //!   z-dimension also participates in the batch.
+    //! - 3D builds with twod_mode=true: setting oned_mode=true further signals
+    //!   that exactly one of {nx, ny} is 1, so only the non-degenerate
+    //!   direction needs FFT work while the other stays batched.
     bool oned_mode = false;
 
     //! Batched FFT size. Only support in R2C, not R2X.

diff --git a/Src/FFT/AMReX_FFT_R2C.H b/Src/FFT/AMReX_FFT_R2C.H
@@ -545,7 +545,6 @@ R2C<T,D,C>::R2C (Box const& domain, Info const& info)
             m_slab_decomp = true;
         }
     }
-
 #endif
 
     auto const ncomp = m_info.batch_size;
@@ -566,7 +565,8 @@ R2C<T,D,C>::R2C (Box const& domain, Info const& info)
         m_cx.define(cbax, dmx, ncomp, 0, MFInfo().SetAlloc(false));
     }
 
-    m_do_alld_fft = (ParallelDescriptor::NProcs() == 1) && (! m_info.twod_mode);
+    m_do_alld_fft = (ParallelDescriptor::NProcs() == 1) &&
+                    (! m_info.twod_mode) && (! m_info.oned_mode);
 
     if (!m_do_alld_fft) // do a series of 1d or 2d ffts
     {
@@ -590,8 +590,9 @@ R2C<T,D,C>::R2C (Box const& domain, Info const& info)
 #endif
 
 #if (AMREX_SPACEDIM == 3)
-        if (m_real_domain.length(1) > 1 &&
-            (! m_info.twod_mode && m_real_domain.length(2) > 1))
+        if (!m_info.oned_mode && !m_info.twod_mode &&
+            m_real_domain.length(1) > 1 &&
+            m_real_domain.length(2) > 1)
         {
             auto cbaz = amrex::decompose(m_spectral_domain_z, nprocs,
                                          {false,true,true}, true);
@@ -1249,10 +1250,16 @@ template <typename T, Direction D, bool C>
 T R2C<T,D,C>::scalingFactor () const
 {
 #if (AMREX_SPACEDIM == 3)
-    if (m_info.twod_mode) {
+    if (m_info.oned_mode && !m_info.twod_mode) {
+        return T(1)/T(Long(m_real_domain.length(0)));
+    } else if (m_info.twod_mode) {
         return T(1)/T(Long(m_real_domain.length(0)) *
                       Long(m_real_domain.length(1)));
     } else
+#elif (AMREX_SPACEDIM == 2)
+    if (m_info.oned_mode) {
+        return T(1)/T(Long(m_real_domain.length(0)));
+    } else
 #endif
     {
         return T(1)/T(m_real_domain.numPts());

diff --git a/Tests/FFT/Batch/main.cpp b/Tests/FFT/Batch/main.cpp
@@ -162,6 +162,328 @@ int main (int argc, char* argv[])
 #endif
             AMREX_ALWAYS_ASSERT(error < eps);
         }
+
+#if (AMREX_SPACEDIM >= 2)
+#if (AMREX_SPACEDIM == 2)
+        constexpr const char* oned_mode_dim_tag = "2D";
+#else
+        constexpr const char* oned_mode_dim_tag = "3D";
+#endif
+        {
+            MultiFab oned_mf(ba, dm, 1, 0);
+            auto const& oa = oned_mf.arrays();
+            int nx = geom.Domain().length(0);
+            Real two_pi = 2._rt * Math::pi<Real>();
+            ParallelFor(oned_mf, IntVect(0), 1,
+                        [=] AMREX_GPU_DEVICE (int b, int i, int j, int k, int n)
+            {
+                Real base = 0._rt;
+                Real cos_amp = 0._rt;
+#if (AMREX_SPACEDIM == 2)
+                amrex::ignore_unused(k);
+                base = (1._rt + Real(j)) * 0.25_rt;
+                cos_amp = 0.05_rt * Real(j+1);
+#else
+                base = 0.2_rt + 0.05_rt*Real(j+1) + 0.02_rt*Real(k+1);
+                cos_amp = 0.01_rt * Real(j+1) * Real(k+1);
+#endif
+                Real theta = two_pi * Real(i) / Real(nx);
+                oa[b](i,j,k,n) = base + cos_amp * std::cos(theta);
+            });
+
+            FFT::Info info{};
+            info.setOneDMode(true);
+            FFT::R2C<Real,FFT::Direction::both> r2c(geom.Domain(), info);
+            auto const& [cba, cdm] = r2c.getSpectralDataLayout();
+            cMultiFab spec(cba, cdm, 1, 0);
+            r2c.forward(oned_mf, spec);
+
+            MultiFab err(spec.boxArray(), spec.DistributionMap(), 1, 0);
+            auto const& ca = spec.const_arrays();
+            auto const& ea = err.arrays();
+            ParallelFor(err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
+            {
+                Real base = 0._rt;
+                Real cos_amp = 0._rt;
+#if (AMREX_SPACEDIM == 2)
+                amrex::ignore_unused(k);
+                base = (1._rt + Real(j)) * 0.25_rt;
+                cos_amp = 0.05_rt * Real(j+1);
+#else
+                base = 0.2_rt + 0.05_rt*Real(j+1) + 0.02_rt*Real(k+1);
+                cos_amp = 0.01_rt * Real(j+1) * Real(k+1);
+#endif
+                Real expected_real = 0._rt;
+                if (i == 0) {
+                    expected_real = base * Real(nx);
+                } else if (i == 1) {
+                    expected_real = cos_amp * Real(nx) * 0.5_rt;
+                }
+                auto c = ca[b](i,j,k,0);
+                ea[b](i,j,k) = amrex::norm(c - GpuComplex<Real>(expected_real, 0._rt));
+            });
+
+#ifdef AMREX_USE_FLOAT
+            Real tol = 5.e-6_rt;
+#else
+            Real tol = 1.e-14_rt;
+#endif
+            Real err_norm = err.norminf();
+            amrex::Print() << "  Expected to be close to zero (" << oned_mode_dim_tag
+                           << " R2C one-d-mode): " << err_norm << "\n";
+            AMREX_ALWAYS_ASSERT(err_norm < tol);
+
+            MultiFab recon(ba, dm, 1, 0);
+            r2c.backward(spec, recon);
+            MultiFab back_err(ba, dm, 1, 0);
+            auto const back_scaling = r2c.scalingFactor();
+            auto const& orig_a = oned_mf.const_arrays();
+            auto const& recon_a = recon.const_arrays();
+            auto const& back_a = back_err.arrays();
+            ParallelFor(back_err, IntVect(0), 1,
+                        [=] AMREX_GPU_DEVICE (int b, int i, int j, int k, int n)
+            {
+                Real diff = orig_a[b](i,j,k,n) - recon_a[b](i,j,k,n) * back_scaling;
+                back_a[b](i,j,k,n) = amrex::Math::abs(diff);
+            });
+            Real back_norm = back_err.norminf();
+            amrex::Print() << "  Expected to be close to zero (" << oned_mode_dim_tag
+                           << " R2C one-d-mode backward): " << back_norm << "\n";
+            AMREX_ALWAYS_ASSERT(back_norm < tol);
+        }
+
+        {
+            cMultiFab cin(ba, dm, 1, 0);
+            auto const& cia = cin.arrays();
+            int nx = geom.Domain().length(0);
+            Real two_pi = 2._rt * Math::pi<Real>();
+            ParallelFor(cin, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
+            {
+                Real theta = two_pi * Real(i) / Real(nx);
+                GpuComplex<Real> base(0._rt, 0._rt);
+                GpuComplex<Real> cos_amp(0._rt, 0._rt);
+#if (AMREX_SPACEDIM == 2)
+                amrex::ignore_unused(k);
+                Real mag = 0.01_rt * Real(j+1);
+                base = GpuComplex<Real>(mag, -0.125_rt*mag);
+                cos_amp = GpuComplex<Real>(0.2_rt*mag, 0.05_rt*mag);
+#else
+                Real mag = 0.01_rt * Real(j+1) + 0.005_rt * Real(k+1);
+                base = GpuComplex<Real>(mag, -0.1_rt*mag);
+                cos_amp = GpuComplex<Real>(0.15_rt*Real(j+1), 0.05_rt*Real(k+1));
+#endif
+                cia[b](i,j,k) = base + cos_amp * std::cos(theta);
+            });
+
+            FFT::Info info{};
+            info.setOneDMode(true);
+            FFT::C2C<Real,FFT::Direction::both> c2c(geom.Domain(), info);
+            auto const& [cba, cdm] = c2c.getSpectralDataLayout();
+            cMultiFab spec(cba, cdm, 1, 0);
+            c2c.forward(cin, spec);
+
+            MultiFab err(spec.boxArray(), spec.DistributionMap(), 1, 0);
+            auto const& ca = spec.const_arrays();
+            auto const& ea = err.arrays();
+            ParallelFor(err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
+            {
+                GpuComplex<Real> base(0._rt, 0._rt);
+                GpuComplex<Real> cos_amp(0._rt, 0._rt);
+#if (AMREX_SPACEDIM == 2)
+                amrex::ignore_unused(k);
+                Real mag = 0.01_rt * Real(j+1);
+                base = GpuComplex<Real>(mag, -0.125_rt*mag);
+                cos_amp = GpuComplex<Real>(0.2_rt*mag, 0.05_rt*mag);
+#else
+                Real mag = 0.01_rt * Real(j+1) + 0.005_rt * Real(k+1);
+                base = GpuComplex<Real>(mag, -0.1_rt*mag);
+                cos_amp = GpuComplex<Real>(0.15_rt*Real(j+1), 0.05_rt*Real(k+1));
+#endif
+                GpuComplex<Real> expected(0._rt, 0._rt);
+                if (i == 0) {
+                    expected = base * Real(nx);
+                } else if (i == 1 || i == nx-1) {
+                    expected = cos_amp * (Real(nx) * 0.5_rt);
+                }
+                auto c = ca[b](i,j,k,0);
+                ea[b](i,j,k) = amrex::norm(c - expected);
+            });
+
+#ifdef AMREX_USE_FLOAT
+            Real tol = 5.e-6_rt;
+#else
+            Real tol = 1.e-14_rt;
+#endif
+            Real err_norm = err.norminf();
+            amrex::Print() << "  Expected to be close to zero (" << oned_mode_dim_tag
+                           << " C2C one-d-mode): " << err_norm << "\n";
+            AMREX_ALWAYS_ASSERT(err_norm < tol);
+
+            cMultiFab recon(cin.boxArray(), cin.DistributionMap(), 1, 0);
+            c2c.backward(spec, recon);
+
+            MultiFab back_err(cin.boxArray(), cin.DistributionMap(), 1, 0);
+            auto const back_scaling = c2c.scalingFactor();
+            auto const& cin_a = cin.const_arrays();
+            auto const& recon_a = recon.const_arrays();
+            auto const& back_a = back_err.arrays();
+            ParallelFor(back_err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
+            {
+                auto diff = cin_a[b](i,j,k);
+                auto tmp = recon_a[b](i,j,k);
+                tmp *= back_scaling;
+                diff -= tmp;
+                back_a[b](i,j,k) = amrex::norm(diff);
+            });
+            Real back_norm = back_err.norminf();
+            amrex::Print() << "  Expected to be close to zero (" << oned_mode_dim_tag
+                           << " C2C one-d-mode backward): " << back_norm << "\n";
+            AMREX_ALWAYS_ASSERT(back_norm < tol);
+        }
+#endif
+
+#if (AMREX_SPACEDIM == 3)
+        {
+            MultiFab twod_mf(ba, dm, 1, 0);
+            auto const& ta = twod_mf.arrays();
+            int nx = geom.Domain().length(0);
+            int ny = geom.Domain().length(1);
+            Real two_pi = 2._rt * Math::pi<Real>();
+            ParallelFor(twod_mf, IntVect(0), 1,
+                        [=] AMREX_GPU_DEVICE (int b, int i, int j, int k, int n)
+            {
+                Real base = 1._rt + 0.05_rt*Real(k+1);
+                Real cos_amp = 0.02_rt * Real(k+1);
+                Real theta = two_pi * Real(i) / Real(nx);
+                ta[b](i,j,k,n) = base + cos_amp * std::cos(theta);
+            });
+
+            FFT::Info info{};
+            info.setTwoDMode(true);
+            FFT::R2C<Real,FFT::Direction::both> r2c(geom.Domain(), info);
+            auto const& [cba, cdm] = r2c.getSpectralDataLayout();
+            cMultiFab spec(cba, cdm, 1, 0);
+            r2c.forward(twod_mf, spec);
+
+            MultiFab err(spec.boxArray(), spec.DistributionMap(), 1, 0);
+            auto const& ca = spec.const_arrays();
+            auto const& ea = err.arrays();
+            ParallelFor(err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
+            {
+                Real base = 1._rt + 0.05_rt*Real(k+1);
+                Real cos_amp = 0.02_rt * Real(k+1);
+                Real expected_real = 0._rt;
+                bool x_mode = (i == 1 || i == nx-1) && (j == 0);
+                if (i == 0 && j == 0) {
+                    expected_real = base * Real(nx*ny);
+                } else if (x_mode) {
+                    expected_real = cos_amp * Real(nx) * 0.5_rt * Real(ny);
+                }
+                auto c = ca[b](i,j,k,0);
+                ea[b](i,j,k) = amrex::norm(c - GpuComplex<Real>(expected_real, 0._rt));
+            });
+
+#ifdef AMREX_USE_FLOAT
+            Real tol = 5.e-6_rt;
+#else
+            Real tol = 1.e-14_rt;
+#endif
+            Real err_norm = err.norminf();
+            amrex::Print() << "  Expected to be close to zero (3D R2C two-d-mode): "
+                           << err_norm << "\n";
+            AMREX_ALWAYS_ASSERT(err_norm < tol);
+
+            MultiFab recon(ba, dm, 1, 0);
+            r2c.backward(spec, recon);
+            MultiFab back_err(ba, dm, 1, 0);
+            auto const back_scaling = r2c.scalingFactor();
+            auto const& orig_a = twod_mf.const_arrays();
+            auto const& recon_a = recon.const_arrays();
+            auto const& back_a = back_err.arrays();
+            ParallelFor(back_err, IntVect(0), 1,
+                        [=] AMREX_GPU_DEVICE (int b, int i, int j, int k, int n)
+            {
+                Real diff = orig_a[b](i,j,k,n) - recon_a[b](i,j,k,n) * back_scaling;
+                back_a[b](i,j,k,n) = amrex::Math::abs(diff);
+            });
+            Real back_norm = back_err.norminf();
+            amrex::Print() << "  Expected to be close to zero (3D R2C two-d-mode backward): "
+                           << back_norm << "\n";
+            AMREX_ALWAYS_ASSERT(back_norm < tol);
+        }
+
+        {
+            cMultiFab cin(ba, dm, 1, 0);
+            auto const& cia = cin.arrays();
+            int nx = geom.Domain().length(0);
+            int ny = geom.Domain().length(1);
+            Real two_pi = 2._rt * Math::pi<Real>();
+            ParallelFor(cin, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
+            {
+                Real mag = 0.02_rt * Real(k+1);
+                GpuComplex<Real> base(mag, 0.1_rt*mag);
+                GpuComplex<Real> cos_amp(0.3_rt*mag, -0.15_rt*mag);
+                Real theta = two_pi * Real(i) / Real(nx);
+                cia[b](i,j,k) = base + cos_amp * std::cos(theta);
+            });
+
+            FFT::Info info{};
+            info.setTwoDMode(true);
+            FFT::C2C<Real,FFT::Direction::both> c2c(geom.Domain(), info);
+            auto const& [cba, cdm] = c2c.getSpectralDataLayout();
+            cMultiFab spec(cba, cdm, 1, 0);
+            c2c.forward(cin, spec);
+
+            MultiFab err(spec.boxArray(), spec.DistributionMap(), 1, 0);
+            auto const& ca = spec.const_arrays();
+            auto const& ea = err.arrays();
+            ParallelFor(err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
+            {
+                Real mag = 0.02_rt * Real(k+1);
+                GpuComplex<Real> base(mag, 0.1_rt*mag);
+                GpuComplex<Real> cos_amp(0.3_rt*mag, -0.15_rt*mag);
+                GpuComplex<Real> expected(0._rt, 0._rt);
+                bool x_mode = (i == 1 || i == nx-1) && (j == 0);
+                if (i == 0 && j == 0) {
+                    expected = base * Real(nx*ny);
+                } else if (x_mode) {
+                    expected = cos_amp * (Real(nx) * 0.5_rt * Real(ny));
+                }
+                auto c = ca[b](i,j,k,0);
+                ea[b](i,j,k) = amrex::norm(c - expected);
+            });
+
+#ifdef AMREX_USE_FLOAT
+            Real tol = 5.e-6_rt;
+#else
+            Real tol = 1.e-14_rt;
+#endif
+            Real err_norm = err.norminf();
+            amrex::Print() << "  Expected to be close to zero (3D C2C two-d-mode): "
+                           << err_norm << "\n";
+            AMREX_ALWAYS_ASSERT(err_norm < tol);
+
+            cMultiFab recon(ba, dm, 1, 0);
+            c2c.backward(spec, recon);
+            MultiFab back_err(ba, dm, 1, 0);
+            auto const back_scaling = c2c.scalingFactor();
+            auto const& cin_a = cin.const_arrays();
+            auto const& recon_a = recon.const_arrays();
+            auto const& back_a = back_err.arrays();
+            ParallelFor(back_err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
+            {
+                auto diff = cin_a[b](i,j,k);
+                auto tmp = recon_a[b](i,j,k);
+                tmp *= back_scaling;
+                diff -= tmp;
+                back_a[b](i,j,k) = amrex::norm(diff);
+            });
+            Real back_norm = back_err.norminf();
+            amrex::Print() << "  Expected to be close to zero (3D C2C two-d-mode backward): "
+                           << back_norm << "\n";
+            AMREX_ALWAYS_ASSERT(back_norm < tol);
+        }
+#endif
     }
     amrex::Finalize();
 }