Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion Src/FFT/AMReX_FFT_Helper.H
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,13 @@ struct Info
//! batch size.
bool twod_mode = false;

//! We might have a special twod_mode: nx or ny == 1 && nz > 1.
//! - 2D builds: oned_mode=true means we FFT in x only and treat y as the
//! batch size.
//! - 3D builds with twod_mode=false: same behavior as the 2D case, but the
//! z-dimension also participates in the batch.
//! - 3D builds with twod_mode=true: setting oned_mode=true further signals
//! that exactly one of {nx, ny} is 1, so only the non-degenerate
//! direction needs FFT work while the other stays batched.
bool oned_mode = false;

//! Batched FFT size. Only support in R2C, not R2X.
Expand Down
17 changes: 12 additions & 5 deletions Src/FFT/AMReX_FFT_R2C.H
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,6 @@ R2C<T,D,C>::R2C (Box const& domain, Info const& info)
m_slab_decomp = true;
}
}

#endif

auto const ncomp = m_info.batch_size;
Expand All @@ -566,7 +565,8 @@ R2C<T,D,C>::R2C (Box const& domain, Info const& info)
m_cx.define(cbax, dmx, ncomp, 0, MFInfo().SetAlloc(false));
}

m_do_alld_fft = (ParallelDescriptor::NProcs() == 1) && (! m_info.twod_mode);
m_do_alld_fft = (ParallelDescriptor::NProcs() == 1) &&
(! m_info.twod_mode) && (! m_info.oned_mode);

if (!m_do_alld_fft) // do a series of 1d or 2d ffts
{
Expand All @@ -590,8 +590,9 @@ R2C<T,D,C>::R2C (Box const& domain, Info const& info)
#endif

#if (AMREX_SPACEDIM == 3)
if (m_real_domain.length(1) > 1 &&
(! m_info.twod_mode && m_real_domain.length(2) > 1))
if (!m_info.oned_mode && !m_info.twod_mode &&
m_real_domain.length(1) > 1 &&
m_real_domain.length(2) > 1)
{
auto cbaz = amrex::decompose(m_spectral_domain_z, nprocs,
{false,true,true}, true);
Expand Down Expand Up @@ -1249,10 +1250,16 @@ template <typename T, Direction D, bool C>
T R2C<T,D,C>::scalingFactor () const
{
#if (AMREX_SPACEDIM == 3)
if (m_info.twod_mode) {
if (m_info.oned_mode && !m_info.twod_mode) {
return T(1)/T(Long(m_real_domain.length(0)));
} else if (m_info.twod_mode) {
return T(1)/T(Long(m_real_domain.length(0)) *
Long(m_real_domain.length(1)));
} else
#elif (AMREX_SPACEDIM == 2)
if (m_info.oned_mode) {
return T(1)/T(Long(m_real_domain.length(0)));
} else
#endif
{
return T(1)/T(m_real_domain.numPts());
Expand Down
322 changes: 322 additions & 0 deletions Tests/FFT/Batch/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,328 @@ int main (int argc, char* argv[])
#endif
AMREX_ALWAYS_ASSERT(error < eps);
}

#if (AMREX_SPACEDIM >= 2)
#if (AMREX_SPACEDIM == 2)
constexpr const char* oned_mode_dim_tag = "2D";
#else
constexpr const char* oned_mode_dim_tag = "3D";
#endif
{
MultiFab oned_mf(ba, dm, 1, 0);
auto const& oa = oned_mf.arrays();
int nx = geom.Domain().length(0);
Real two_pi = 2._rt * Math::pi<Real>();
ParallelFor(oned_mf, IntVect(0), 1,
[=] AMREX_GPU_DEVICE (int b, int i, int j, int k, int n)
{
Real base = 0._rt;
Real cos_amp = 0._rt;
#if (AMREX_SPACEDIM == 2)
amrex::ignore_unused(k);
base = (1._rt + Real(j)) * 0.25_rt;
cos_amp = 0.05_rt * Real(j+1);
#else
base = 0.2_rt + 0.05_rt*Real(j+1) + 0.02_rt*Real(k+1);
cos_amp = 0.01_rt * Real(j+1) * Real(k+1);
#endif
Real theta = two_pi * Real(i) / Real(nx);
oa[b](i,j,k,n) = base + cos_amp * std::cos(theta);
});

FFT::Info info{};
info.setOneDMode(true);
FFT::R2C<Real,FFT::Direction::both> r2c(geom.Domain(), info);
auto const& [cba, cdm] = r2c.getSpectralDataLayout();
cMultiFab spec(cba, cdm, 1, 0);
r2c.forward(oned_mf, spec);

MultiFab err(spec.boxArray(), spec.DistributionMap(), 1, 0);
auto const& ca = spec.const_arrays();
auto const& ea = err.arrays();
ParallelFor(err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
{
Real base = 0._rt;
Real cos_amp = 0._rt;
#if (AMREX_SPACEDIM == 2)
amrex::ignore_unused(k);
base = (1._rt + Real(j)) * 0.25_rt;
cos_amp = 0.05_rt * Real(j+1);
#else
base = 0.2_rt + 0.05_rt*Real(j+1) + 0.02_rt*Real(k+1);
cos_amp = 0.01_rt * Real(j+1) * Real(k+1);
#endif
Real expected_real = 0._rt;
if (i == 0) {
expected_real = base * Real(nx);
} else if (i == 1) {
expected_real = cos_amp * Real(nx) * 0.5_rt;
}
auto c = ca[b](i,j,k,0);
ea[b](i,j,k) = amrex::norm(c - GpuComplex<Real>(expected_real, 0._rt));
});

#ifdef AMREX_USE_FLOAT
Real tol = 5.e-6_rt;
#else
Real tol = 1.e-14_rt;
#endif
Real err_norm = err.norminf();
amrex::Print() << " Expected to be close to zero (" << oned_mode_dim_tag
<< " R2C one-d-mode): " << err_norm << "\n";
AMREX_ALWAYS_ASSERT(err_norm < tol);

MultiFab recon(ba, dm, 1, 0);
r2c.backward(spec, recon);
MultiFab back_err(ba, dm, 1, 0);
auto const back_scaling = r2c.scalingFactor();
auto const& orig_a = oned_mf.const_arrays();
auto const& recon_a = recon.const_arrays();
auto const& back_a = back_err.arrays();
ParallelFor(back_err, IntVect(0), 1,
[=] AMREX_GPU_DEVICE (int b, int i, int j, int k, int n)
{
Real diff = orig_a[b](i,j,k,n) - recon_a[b](i,j,k,n) * back_scaling;
back_a[b](i,j,k,n) = amrex::Math::abs(diff);
});
Real back_norm = back_err.norminf();
amrex::Print() << " Expected to be close to zero (" << oned_mode_dim_tag
<< " R2C one-d-mode backward): " << back_norm << "\n";
AMREX_ALWAYS_ASSERT(back_norm < tol);
}

{
cMultiFab cin(ba, dm, 1, 0);
auto const& cia = cin.arrays();
int nx = geom.Domain().length(0);
Real two_pi = 2._rt * Math::pi<Real>();
ParallelFor(cin, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
{
Real theta = two_pi * Real(i) / Real(nx);
GpuComplex<Real> base(0._rt, 0._rt);
GpuComplex<Real> cos_amp(0._rt, 0._rt);
#if (AMREX_SPACEDIM == 2)
amrex::ignore_unused(k);
Real mag = 0.01_rt * Real(j+1);
base = GpuComplex<Real>(mag, -0.125_rt*mag);
cos_amp = GpuComplex<Real>(0.2_rt*mag, 0.05_rt*mag);
#else
Real mag = 0.01_rt * Real(j+1) + 0.005_rt * Real(k+1);
base = GpuComplex<Real>(mag, -0.1_rt*mag);
cos_amp = GpuComplex<Real>(0.15_rt*Real(j+1), 0.05_rt*Real(k+1));
#endif
cia[b](i,j,k) = base + cos_amp * std::cos(theta);
});

FFT::Info info{};
info.setOneDMode(true);
FFT::C2C<Real,FFT::Direction::both> c2c(geom.Domain(), info);
auto const& [cba, cdm] = c2c.getSpectralDataLayout();
cMultiFab spec(cba, cdm, 1, 0);
c2c.forward(cin, spec);

MultiFab err(spec.boxArray(), spec.DistributionMap(), 1, 0);
auto const& ca = spec.const_arrays();
auto const& ea = err.arrays();
ParallelFor(err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
{
GpuComplex<Real> base(0._rt, 0._rt);
GpuComplex<Real> cos_amp(0._rt, 0._rt);
#if (AMREX_SPACEDIM == 2)
amrex::ignore_unused(k);
Real mag = 0.01_rt * Real(j+1);
base = GpuComplex<Real>(mag, -0.125_rt*mag);
cos_amp = GpuComplex<Real>(0.2_rt*mag, 0.05_rt*mag);
#else
Real mag = 0.01_rt * Real(j+1) + 0.005_rt * Real(k+1);
base = GpuComplex<Real>(mag, -0.1_rt*mag);
cos_amp = GpuComplex<Real>(0.15_rt*Real(j+1), 0.05_rt*Real(k+1));
#endif
GpuComplex<Real> expected(0._rt, 0._rt);
if (i == 0) {
expected = base * Real(nx);
} else if (i == 1 || i == nx-1) {
expected = cos_amp * (Real(nx) * 0.5_rt);
}
auto c = ca[b](i,j,k,0);
ea[b](i,j,k) = amrex::norm(c - expected);
});

#ifdef AMREX_USE_FLOAT
Real tol = 5.e-6_rt;
#else
Real tol = 1.e-14_rt;
#endif
Real err_norm = err.norminf();
amrex::Print() << " Expected to be close to zero (" << oned_mode_dim_tag
<< " C2C one-d-mode): " << err_norm << "\n";
AMREX_ALWAYS_ASSERT(err_norm < tol);

cMultiFab recon(cin.boxArray(), cin.DistributionMap(), 1, 0);
c2c.backward(spec, recon);

MultiFab back_err(cin.boxArray(), cin.DistributionMap(), 1, 0);
auto const back_scaling = c2c.scalingFactor();
auto const& cin_a = cin.const_arrays();
auto const& recon_a = recon.const_arrays();
auto const& back_a = back_err.arrays();
ParallelFor(back_err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
{
auto diff = cin_a[b](i,j,k);
auto tmp = recon_a[b](i,j,k);
tmp *= back_scaling;
diff -= tmp;
back_a[b](i,j,k) = amrex::norm(diff);
});
Real back_norm = back_err.norminf();
amrex::Print() << " Expected to be close to zero (" << oned_mode_dim_tag
<< " C2C one-d-mode backward): " << back_norm << "\n";
AMREX_ALWAYS_ASSERT(back_norm < tol);
}
#endif

#if (AMREX_SPACEDIM == 3)
{
MultiFab twod_mf(ba, dm, 1, 0);
auto const& ta = twod_mf.arrays();
int nx = geom.Domain().length(0);
int ny = geom.Domain().length(1);
Real two_pi = 2._rt * Math::pi<Real>();
ParallelFor(twod_mf, IntVect(0), 1,
[=] AMREX_GPU_DEVICE (int b, int i, int j, int k, int n)
{
Real base = 1._rt + 0.05_rt*Real(k+1);
Real cos_amp = 0.02_rt * Real(k+1);
Real theta = two_pi * Real(i) / Real(nx);
ta[b](i,j,k,n) = base + cos_amp * std::cos(theta);
});

FFT::Info info{};
info.setTwoDMode(true);
FFT::R2C<Real,FFT::Direction::both> r2c(geom.Domain(), info);
auto const& [cba, cdm] = r2c.getSpectralDataLayout();
cMultiFab spec(cba, cdm, 1, 0);
r2c.forward(twod_mf, spec);

MultiFab err(spec.boxArray(), spec.DistributionMap(), 1, 0);
auto const& ca = spec.const_arrays();
auto const& ea = err.arrays();
ParallelFor(err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
{
Real base = 1._rt + 0.05_rt*Real(k+1);
Real cos_amp = 0.02_rt * Real(k+1);
Real expected_real = 0._rt;
bool x_mode = (i == 1 || i == nx-1) && (j == 0);
if (i == 0 && j == 0) {
expected_real = base * Real(nx*ny);
} else if (x_mode) {
expected_real = cos_amp * Real(nx) * 0.5_rt * Real(ny);
}
auto c = ca[b](i,j,k,0);
ea[b](i,j,k) = amrex::norm(c - GpuComplex<Real>(expected_real, 0._rt));
});

#ifdef AMREX_USE_FLOAT
Real tol = 5.e-6_rt;
#else
Real tol = 1.e-14_rt;
#endif
Real err_norm = err.norminf();
amrex::Print() << " Expected to be close to zero (3D R2C two-d-mode): "
<< err_norm << "\n";
AMREX_ALWAYS_ASSERT(err_norm < tol);

MultiFab recon(ba, dm, 1, 0);
r2c.backward(spec, recon);
MultiFab back_err(ba, dm, 1, 0);
auto const back_scaling = r2c.scalingFactor();
auto const& orig_a = twod_mf.const_arrays();
auto const& recon_a = recon.const_arrays();
auto const& back_a = back_err.arrays();
ParallelFor(back_err, IntVect(0), 1,
[=] AMREX_GPU_DEVICE (int b, int i, int j, int k, int n)
{
Real diff = orig_a[b](i,j,k,n) - recon_a[b](i,j,k,n) * back_scaling;
back_a[b](i,j,k,n) = amrex::Math::abs(diff);
});
Real back_norm = back_err.norminf();
amrex::Print() << " Expected to be close to zero (3D R2C two-d-mode backward): "
<< back_norm << "\n";
AMREX_ALWAYS_ASSERT(back_norm < tol);
}

{
cMultiFab cin(ba, dm, 1, 0);
auto const& cia = cin.arrays();
int nx = geom.Domain().length(0);
int ny = geom.Domain().length(1);
Real two_pi = 2._rt * Math::pi<Real>();
ParallelFor(cin, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
{
Real mag = 0.02_rt * Real(k+1);
GpuComplex<Real> base(mag, 0.1_rt*mag);
GpuComplex<Real> cos_amp(0.3_rt*mag, -0.15_rt*mag);
Real theta = two_pi * Real(i) / Real(nx);
cia[b](i,j,k) = base + cos_amp * std::cos(theta);
});

FFT::Info info{};
info.setTwoDMode(true);
FFT::C2C<Real,FFT::Direction::both> c2c(geom.Domain(), info);
auto const& [cba, cdm] = c2c.getSpectralDataLayout();
cMultiFab spec(cba, cdm, 1, 0);
c2c.forward(cin, spec);

MultiFab err(spec.boxArray(), spec.DistributionMap(), 1, 0);
auto const& ca = spec.const_arrays();
auto const& ea = err.arrays();
ParallelFor(err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
{
Real mag = 0.02_rt * Real(k+1);
GpuComplex<Real> base(mag, 0.1_rt*mag);
GpuComplex<Real> cos_amp(0.3_rt*mag, -0.15_rt*mag);
GpuComplex<Real> expected(0._rt, 0._rt);
bool x_mode = (i == 1 || i == nx-1) && (j == 0);
if (i == 0 && j == 0) {
expected = base * Real(nx*ny);
} else if (x_mode) {
expected = cos_amp * (Real(nx) * 0.5_rt * Real(ny));
}
auto c = ca[b](i,j,k,0);
ea[b](i,j,k) = amrex::norm(c - expected);
});

#ifdef AMREX_USE_FLOAT
Real tol = 5.e-6_rt;
#else
Real tol = 1.e-14_rt;
#endif
Real err_norm = err.norminf();
amrex::Print() << " Expected to be close to zero (3D C2C two-d-mode): "
<< err_norm << "\n";
AMREX_ALWAYS_ASSERT(err_norm < tol);

cMultiFab recon(ba, dm, 1, 0);
c2c.backward(spec, recon);
MultiFab back_err(ba, dm, 1, 0);
auto const back_scaling = c2c.scalingFactor();
auto const& cin_a = cin.const_arrays();
auto const& recon_a = recon.const_arrays();
auto const& back_a = back_err.arrays();
ParallelFor(back_err, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
{
auto diff = cin_a[b](i,j,k);
auto tmp = recon_a[b](i,j,k);
tmp *= back_scaling;
diff -= tmp;
back_a[b](i,j,k) = amrex::norm(diff);
});
Real back_norm = back_err.norminf();
amrex::Print() << " Expected to be close to zero (3D C2C two-d-mode backward): "
<< back_norm << "\n";
AMREX_ALWAYS_ASSERT(back_norm < tol);
}
#endif
}
amrex::Finalize();
}
Loading