diff --git a/Src/FFT/AMReX_FFT_Helper.H b/Src/FFT/AMReX_FFT_Helper.H index c747525fa8d..f005542fa9d 100644 --- a/Src/FFT/AMReX_FFT_Helper.H +++ b/Src/FFT/AMReX_FFT_Helper.H @@ -74,7 +74,13 @@ struct Info //! batch size. bool twod_mode = false; - //! We might have a special twod_mode: nx or ny == 1 && nz > 1. + //! - 2D builds: oned_mode=true means we FFT in x only and treat y as the + //! batch size. + //! - 3D builds with twod_mode=false: same behavior as the 2D case, but the + //! z-dimension also participates in the batch. + //! - 3D builds with twod_mode=true: setting oned_mode=true further signals + //! that exactly one of {nx, ny} is 1, so only the non-degenerate + //! direction needs FFT work while the other stays batched. bool oned_mode = false; //! Batched FFT size. Only support in R2C, not R2X. diff --git a/Src/FFT/AMReX_FFT_R2C.H b/Src/FFT/AMReX_FFT_R2C.H index 39f13ca3b95..c1f3d1521ed 100644 --- a/Src/FFT/AMReX_FFT_R2C.H +++ b/Src/FFT/AMReX_FFT_R2C.H @@ -545,7 +545,6 @@ R2C::R2C (Box const& domain, Info const& info) m_slab_decomp = true; } } - #endif auto const ncomp = m_info.batch_size; @@ -566,7 +565,8 @@ R2C::R2C (Box const& domain, Info const& info) m_cx.define(cbax, dmx, ncomp, 0, MFInfo().SetAlloc(false)); } - m_do_alld_fft = (ParallelDescriptor::NProcs() == 1) && (! m_info.twod_mode); + m_do_alld_fft = (ParallelDescriptor::NProcs() == 1) && + (! m_info.twod_mode) && (! m_info.oned_mode); if (!m_do_alld_fft) // do a series of 1d or 2d ffts { @@ -590,8 +590,9 @@ R2C::R2C (Box const& domain, Info const& info) #endif #if (AMREX_SPACEDIM == 3) - if (m_real_domain.length(1) > 1 && - (! m_info.twod_mode && m_real_domain.length(2) > 1)) + if (!m_info.oned_mode && !m_info.twod_mode && + m_real_domain.length(1) > 1 && + m_real_domain.length(2) > 1) { auto cbaz = amrex::decompose(m_spectral_domain_z, nprocs, {false,true,true}, true); @@ -1249,10 +1250,16 @@ template T R2C::scalingFactor () const { #if (AMREX_SPACEDIM == 3) - if (m_info.twod_mode) { + if (m_info.oned_mode && !m_info.twod_mode) { + return T(1)/T(Long(m_real_domain.length(0))); + } else if (m_info.twod_mode) { return T(1)/T(Long(m_real_domain.length(0)) * Long(m_real_domain.length(1))); } else +#elif (AMREX_SPACEDIM == 2) + if (m_info.oned_mode) { + return T(1)/T(Long(m_real_domain.length(0))); + } else #endif { return T(1)/T(m_real_domain.numPts()); diff --git a/Tests/FFT/Batch/main.cpp b/Tests/FFT/Batch/main.cpp index 6b2dec3d8da..2c12a4a8838 100644 --- a/Tests/FFT/Batch/main.cpp +++ b/Tests/FFT/Batch/main.cpp @@ -162,6 +162,328 @@ int main (int argc, char* argv[]) #endif AMREX_ALWAYS_ASSERT(error < eps); } + +#if (AMREX_SPACEDIM >= 2) +#if (AMREX_SPACEDIM == 2) + constexpr const char* oned_mode_dim_tag = "2D"; +#else + constexpr const char* oned_mode_dim_tag = "3D"; +#endif + { + MultiFab oned_mf(ba, dm, 1, 0); + auto const& oa = oned_mf.arrays(); + int nx = geom.Domain().length(0); + Real two_pi = 2._rt * Math::pi(); + ParallelFor(oned_mf, IntVect(0), 1, + [=] AMREX_GPU_DEVICE (int b, int i, int j, int k, int n) + { + Real base = 0._rt; + Real cos_amp = 0._rt; +#if (AMREX_SPACEDIM == 2) + amrex::ignore_unused(k); + base = (1._rt + Real(j)) * 0.25_rt; + cos_amp = 0.05_rt * Real(j+1); +#else + base = 0.2_rt + 0.05_rt*Real(j+1) + 0.02_rt*Real(k+1); + cos_amp = 0.01_rt * Real(j+1) * Real(k+1); +#endif + Real theta = two_pi * Real(i) / Real(nx); + oa[b](i,j,k,n) = base + cos_amp * std::cos(theta); + }); + + FFT::Info info{}; + info.setOneDMode(true); + FFT::R2C r2c(geom.Domain(), info); + auto const& [cba, cdm] = r2c.getSpectralDataLayout(); + cMultiFab spec(cba, cdm, 1, 0); + r2c.forward(oned_mf, spec); + + MultiFab err(spec.boxArray(), spec.DistributionMap(), 1, 0); + auto const& ca = spec.const_arrays(); + auto const& ea = err.arrays(); + ParallelFor(err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) + { + Real base = 0._rt; + Real cos_amp = 0._rt; +#if (AMREX_SPACEDIM == 2) + amrex::ignore_unused(k); + base = (1._rt + Real(j)) * 0.25_rt; + cos_amp = 0.05_rt * Real(j+1); +#else + base = 0.2_rt + 0.05_rt*Real(j+1) + 0.02_rt*Real(k+1); + cos_amp = 0.01_rt * Real(j+1) * Real(k+1); +#endif + Real expected_real = 0._rt; + if (i == 0) { + expected_real = base * Real(nx); + } else if (i == 1) { + expected_real = cos_amp * Real(nx) * 0.5_rt; + } + auto c = ca[b](i,j,k,0); + ea[b](i,j,k) = amrex::norm(c - GpuComplex(expected_real, 0._rt)); + }); + +#ifdef AMREX_USE_FLOAT + Real tol = 5.e-6_rt; +#else + Real tol = 1.e-14_rt; +#endif + Real err_norm = err.norminf(); + amrex::Print() << " Expected to be close to zero (" << oned_mode_dim_tag + << " R2C one-d-mode): " << err_norm << "\n"; + AMREX_ALWAYS_ASSERT(err_norm < tol); + + MultiFab recon(ba, dm, 1, 0); + r2c.backward(spec, recon); + MultiFab back_err(ba, dm, 1, 0); + auto const back_scaling = r2c.scalingFactor(); + auto const& orig_a = oned_mf.const_arrays(); + auto const& recon_a = recon.const_arrays(); + auto const& back_a = back_err.arrays(); + ParallelFor(back_err, IntVect(0), 1, + [=] AMREX_GPU_DEVICE (int b, int i, int j, int k, int n) + { + Real diff = orig_a[b](i,j,k,n) - recon_a[b](i,j,k,n) * back_scaling; + back_a[b](i,j,k,n) = amrex::Math::abs(diff); + }); + Real back_norm = back_err.norminf(); + amrex::Print() << " Expected to be close to zero (" << oned_mode_dim_tag + << " R2C one-d-mode backward): " << back_norm << "\n"; + AMREX_ALWAYS_ASSERT(back_norm < tol); + } + + { + cMultiFab cin(ba, dm, 1, 0); + auto const& cia = cin.arrays(); + int nx = geom.Domain().length(0); + Real two_pi = 2._rt * Math::pi(); + ParallelFor(cin, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) + { + Real theta = two_pi * Real(i) / Real(nx); + GpuComplex base(0._rt, 0._rt); + GpuComplex cos_amp(0._rt, 0._rt); +#if (AMREX_SPACEDIM == 2) + amrex::ignore_unused(k); + Real mag = 0.01_rt * Real(j+1); + base = GpuComplex(mag, -0.125_rt*mag); + cos_amp = GpuComplex(0.2_rt*mag, 0.05_rt*mag); +#else + Real mag = 0.01_rt * Real(j+1) + 0.005_rt * Real(k+1); + base = GpuComplex(mag, -0.1_rt*mag); + cos_amp = GpuComplex(0.15_rt*Real(j+1), 0.05_rt*Real(k+1)); +#endif + cia[b](i,j,k) = base + cos_amp * std::cos(theta); + }); + + FFT::Info info{}; + info.setOneDMode(true); + FFT::C2C c2c(geom.Domain(), info); + auto const& [cba, cdm] = c2c.getSpectralDataLayout(); + cMultiFab spec(cba, cdm, 1, 0); + c2c.forward(cin, spec); + + MultiFab err(spec.boxArray(), spec.DistributionMap(), 1, 0); + auto const& ca = spec.const_arrays(); + auto const& ea = err.arrays(); + ParallelFor(err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) + { + GpuComplex base(0._rt, 0._rt); + GpuComplex cos_amp(0._rt, 0._rt); +#if (AMREX_SPACEDIM == 2) + amrex::ignore_unused(k); + Real mag = 0.01_rt * Real(j+1); + base = GpuComplex(mag, -0.125_rt*mag); + cos_amp = GpuComplex(0.2_rt*mag, 0.05_rt*mag); +#else + Real mag = 0.01_rt * Real(j+1) + 0.005_rt * Real(k+1); + base = GpuComplex(mag, -0.1_rt*mag); + cos_amp = GpuComplex(0.15_rt*Real(j+1), 0.05_rt*Real(k+1)); +#endif + GpuComplex expected(0._rt, 0._rt); + if (i == 0) { + expected = base * Real(nx); + } else if (i == 1 || i == nx-1) { + expected = cos_amp * (Real(nx) * 0.5_rt); + } + auto c = ca[b](i,j,k,0); + ea[b](i,j,k) = amrex::norm(c - expected); + }); + +#ifdef AMREX_USE_FLOAT + Real tol = 5.e-6_rt; +#else + Real tol = 1.e-14_rt; +#endif + Real err_norm = err.norminf(); + amrex::Print() << " Expected to be close to zero (" << oned_mode_dim_tag + << " C2C one-d-mode): " << err_norm << "\n"; + AMREX_ALWAYS_ASSERT(err_norm < tol); + + cMultiFab recon(cin.boxArray(), cin.DistributionMap(), 1, 0); + c2c.backward(spec, recon); + + MultiFab back_err(cin.boxArray(), cin.DistributionMap(), 1, 0); + auto const back_scaling = c2c.scalingFactor(); + auto const& cin_a = cin.const_arrays(); + auto const& recon_a = recon.const_arrays(); + auto const& back_a = back_err.arrays(); + ParallelFor(back_err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) + { + auto diff = cin_a[b](i,j,k); + auto tmp = recon_a[b](i,j,k); + tmp *= back_scaling; + diff -= tmp; + back_a[b](i,j,k) = amrex::norm(diff); + }); + Real back_norm = back_err.norminf(); + amrex::Print() << " Expected to be close to zero (" << oned_mode_dim_tag + << " C2C one-d-mode backward): " << back_norm << "\n"; + AMREX_ALWAYS_ASSERT(back_norm < tol); + } +#endif + +#if (AMREX_SPACEDIM == 3) + { + MultiFab twod_mf(ba, dm, 1, 0); + auto const& ta = twod_mf.arrays(); + int nx = geom.Domain().length(0); + int ny = geom.Domain().length(1); + Real two_pi = 2._rt * Math::pi(); + ParallelFor(twod_mf, IntVect(0), 1, + [=] AMREX_GPU_DEVICE (int b, int i, int j, int k, int n) + { + Real base = 1._rt + 0.05_rt*Real(k+1); + Real cos_amp = 0.02_rt * Real(k+1); + Real theta = two_pi * Real(i) / Real(nx); + ta[b](i,j,k,n) = base + cos_amp * std::cos(theta); + }); + + FFT::Info info{}; + info.setTwoDMode(true); + FFT::R2C r2c(geom.Domain(), info); + auto const& [cba, cdm] = r2c.getSpectralDataLayout(); + cMultiFab spec(cba, cdm, 1, 0); + r2c.forward(twod_mf, spec); + + MultiFab err(spec.boxArray(), spec.DistributionMap(), 1, 0); + auto const& ca = spec.const_arrays(); + auto const& ea = err.arrays(); + ParallelFor(err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) + { + Real base = 1._rt + 0.05_rt*Real(k+1); + Real cos_amp = 0.02_rt * Real(k+1); + Real expected_real = 0._rt; + bool x_mode = (i == 1 || i == nx-1) && (j == 0); + if (i == 0 && j == 0) { + expected_real = base * Real(nx*ny); + } else if (x_mode) { + expected_real = cos_amp * Real(nx) * 0.5_rt * Real(ny); + } + auto c = ca[b](i,j,k,0); + ea[b](i,j,k) = amrex::norm(c - GpuComplex(expected_real, 0._rt)); + }); + +#ifdef AMREX_USE_FLOAT + Real tol = 5.e-6_rt; +#else + Real tol = 1.e-14_rt; +#endif + Real err_norm = err.norminf(); + amrex::Print() << " Expected to be close to zero (3D R2C two-d-mode): " + << err_norm << "\n"; + AMREX_ALWAYS_ASSERT(err_norm < tol); + + MultiFab recon(ba, dm, 1, 0); + r2c.backward(spec, recon); + MultiFab back_err(ba, dm, 1, 0); + auto const back_scaling = r2c.scalingFactor(); + auto const& orig_a = twod_mf.const_arrays(); + auto const& recon_a = recon.const_arrays(); + auto const& back_a = back_err.arrays(); + ParallelFor(back_err, IntVect(0), 1, + [=] AMREX_GPU_DEVICE (int b, int i, int j, int k, int n) + { + Real diff = orig_a[b](i,j,k,n) - recon_a[b](i,j,k,n) * back_scaling; + back_a[b](i,j,k,n) = amrex::Math::abs(diff); + }); + Real back_norm = back_err.norminf(); + amrex::Print() << " Expected to be close to zero (3D R2C two-d-mode backward): " + << back_norm << "\n"; + AMREX_ALWAYS_ASSERT(back_norm < tol); + } + + { + cMultiFab cin(ba, dm, 1, 0); + auto const& cia = cin.arrays(); + int nx = geom.Domain().length(0); + int ny = geom.Domain().length(1); + Real two_pi = 2._rt * Math::pi(); + ParallelFor(cin, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) + { + Real mag = 0.02_rt * Real(k+1); + GpuComplex base(mag, 0.1_rt*mag); + GpuComplex cos_amp(0.3_rt*mag, -0.15_rt*mag); + Real theta = two_pi * Real(i) / Real(nx); + cia[b](i,j,k) = base + cos_amp * std::cos(theta); + }); + + FFT::Info info{}; + info.setTwoDMode(true); + FFT::C2C c2c(geom.Domain(), info); + auto const& [cba, cdm] = c2c.getSpectralDataLayout(); + cMultiFab spec(cba, cdm, 1, 0); + c2c.forward(cin, spec); + + MultiFab err(spec.boxArray(), spec.DistributionMap(), 1, 0); + auto const& ca = spec.const_arrays(); + auto const& ea = err.arrays(); + ParallelFor(err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) + { + Real mag = 0.02_rt * Real(k+1); + GpuComplex base(mag, 0.1_rt*mag); + GpuComplex cos_amp(0.3_rt*mag, -0.15_rt*mag); + GpuComplex expected(0._rt, 0._rt); + bool x_mode = (i == 1 || i == nx-1) && (j == 0); + if (i == 0 && j == 0) { + expected = base * Real(nx*ny); + } else if (x_mode) { + expected = cos_amp * (Real(nx) * 0.5_rt * Real(ny)); + } + auto c = ca[b](i,j,k,0); + ea[b](i,j,k) = amrex::norm(c - expected); + }); + +#ifdef AMREX_USE_FLOAT + Real tol = 5.e-6_rt; +#else + Real tol = 1.e-14_rt; +#endif + Real err_norm = err.norminf(); + amrex::Print() << " Expected to be close to zero (3D C2C two-d-mode): " + << err_norm << "\n"; + AMREX_ALWAYS_ASSERT(err_norm < tol); + + cMultiFab recon(ba, dm, 1, 0); + c2c.backward(spec, recon); + MultiFab back_err(ba, dm, 1, 0); + auto const back_scaling = c2c.scalingFactor(); + auto const& cin_a = cin.const_arrays(); + auto const& recon_a = recon.const_arrays(); + auto const& back_a = back_err.arrays(); + ParallelFor(back_err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) + { + auto diff = cin_a[b](i,j,k); + auto tmp = recon_a[b](i,j,k); + tmp *= back_scaling; + diff -= tmp; + back_a[b](i,j,k) = amrex::norm(diff); + }); + Real back_norm = back_err.norminf(); + amrex::Print() << " Expected to be close to zero (3D C2C two-d-mode backward): " + << back_norm << "\n"; + AMREX_ALWAYS_ASSERT(back_norm < tol); + } +#endif } amrex::Finalize(); }