diff --git a/CMakeLists.txt b/CMakeLists.txt index 91e072bf8..db77c210f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,8 +23,12 @@ add_compile_options (-Wunused) add_compile_options (-Wextra) add_compile_options (-Werror) + # allow deprecated functions add_compile_options (-Wno-deprecated-declarations) +add_compile_options (-Wno-stringop-overflow) +add_compile_options (-Wno-array-bounds) +add_compile_options (-Wno-restrict) option (USE_STATIC_LIBRARIES "Link with static libraries if available" ON) @@ -62,6 +66,13 @@ if (ENABLE_FFT) message (STATUS "Found Heffte_DIR: ${Heffte_DIR}") endif () +option (ENABLE_NUFFT "Enable NUFFT transform" OFF) +if (ENABLE_NUFFT) + add_definitions (-DENABLE_NUFFT) + find_package(CUFINUFFT REQUIRED) + message (STATUS "Found CUFINUFFT_DIR: ${CUFINUFFT_DIR}") +endif () + option (ENABLE_SOLVERS "Enable IPPL solvers" OFF) add_subdirectory (src) diff --git a/CMakeModules/FindCUFINUFFT.cmake b/CMakeModules/FindCUFINUFFT.cmake new file mode 100644 index 000000000..202a044a3 --- /dev/null +++ b/CMakeModules/FindCUFINUFFT.cmake @@ -0,0 +1,32 @@ +# +# Find CUFINUFFT includes and library +# +# CUFINUFFT_INCLUDE_DIR - where to find cufinufft.h +# CUFINUFFT_LIBRARY - libcufinufft.so path +# CUFINUFFT_FOUND - do not attempt to use if "no" or undefined. + +FIND_PATH(CUFINUFFT_INCLUDE_DIR cufinufft.h + HINTS $ENV{CUFINUFFT_INCLUDE_PATH} $ENV{CUFINUFFT_INCLUDE_DIR} $ENV{CUFINUFFT_PREFIX}/include $ENV{CUFINUFFT_DIR}/include ${PROJECT_SOURCE_DIR}/include + PATHS ENV CPP_INCLUDE_PATH +) +#Static library has some issues and gives a cuda error at the end of compilation +FIND_LIBRARY(CUFINUFFT_LIBRARY_DIR libcufinufft.so + HINTS $ENV{CUFINUFFT_LIBRARY_PATH} $ENV{CUFINUFFT_LIBRARY_DIR} $ENV{CUFINUFFT_PREFIX}/lib $ENV{CUFINUFFT_DIR}/lib $ENV{CUFINUFFT}/lib ${PROJECT_SOURCE_DIR}/lib + PATHS ENV LIBRARY_PATH +) + +IF(CUFINUFFT_INCLUDE_DIR AND CUFINUFFT_LIBRARY_DIR) + SET( CUFINUFFT_FOUND "YES" ) + SET( CUFINUFFT_DIR $ENV{CUFINUFFT_DIR} ) +ENDIF() + +IF (CUFINUFFT_FOUND) + IF (NOT CUFINUFFT_FIND_QUIETLY) + MESSAGE(STATUS "Found cufinufft library dir: ${CUFINUFFT_LIBRARY_DIR}") + MESSAGE(STATUS "Found cufinufft include dir: ${CUFINUFFT_INCLUDE_DIR}") + ENDIF (NOT CUFINUFFT_FIND_QUIETLY) +ELSE (CUFINUFFT_FOUND) + IF (CUFINUFFT_FIND_REQUIRED) + MESSAGE(FATAL_ERROR "Could not find CUFINUFFT!") + ENDIF (CUFINUFFT_FIND_REQUIRED) +ENDIF (CUFINUFFT_FOUND) diff --git a/alpine/CMakeLists.txt b/alpine/CMakeLists.txt index 68dc41f4b..a3884ca14 100644 --- a/alpine/CMakeLists.txt +++ b/alpine/CMakeLists.txt @@ -1,29 +1,22 @@ -file (RELATIVE_PATH _relPath "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}") -message (STATUS "Adding index test found in ${_relPath}") - -include_directories ( - ${CMAKE_SOURCE_DIR}/src -) - -link_directories ( - ${CMAKE_CURRENT_SOURCE_DIR} - ${Kokkos_DIR}/.. -) - -set (IPPL_LIBS ippl ${MPI_CXX_LIBRARIES}) -set (COMPILE_FLAGS ${OPAL_CXX_FLAGS}) - -add_executable (PenningTrap PenningTrap.cpp) -target_link_libraries (PenningTrap ${IPPL_LIBS}) - -add_executable (UniformPlasmaTest UniformPlasmaTest.cpp) -target_link_libraries (UniformPlasmaTest ${IPPL_LIBS}) - -add_executable (LandauDamping LandauDamping.cpp) -target_link_libraries (LandauDamping ${IPPL_LIBS}) - -add_executable (BumponTailInstability BumponTailInstability.cpp) -target_link_libraries (BumponTailInstability ${IPPL_LIBS}) +macro(list_subdirectories retval curdir) + file(GLOB sub-dir RELATIVE ${curdir} *) + set(list_of_dirs "") + foreach(dir ${sub-dir}) + if(IS_DIRECTORY ${curdir}/${dir}) + set(list_of_dirs ${list_of_dirs} ${dir}) + endif() + endforeach() + set(${retval} ${list_of_dirs}) +endmacro() + +#list_subdirectories("TESTS" ${CMAKE_CURRENT_SOURCE_DIR}) +#foreach (test ${TESTS}) +# add_subdirectory (${test}) +#endforeach() + +add_subdirectory (ElectrostaticPIC) +add_subdirectory (ElectrostaticPIF) +add_subdirectory (PinT) # vi: set et ts=4 sw=4 sts=4: diff --git a/alpine/BumponTailInstability.cpp b/alpine/ElectrostaticPIC/BumponTailInstability.cpp similarity index 99% rename from alpine/BumponTailInstability.cpp rename to alpine/ElectrostaticPIC/BumponTailInstability.cpp index c15cf60aa..07e595cbc 100644 --- a/alpine/BumponTailInstability.cpp +++ b/alpine/ElectrostaticPIC/BumponTailInstability.cpp @@ -252,7 +252,7 @@ int main(int argc, char *argv[]){ Vector_t hr = {dx, dy, dz}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; - const double dt = 0.5*dx;//0.05 + const double dt = std::atof(argv[9]);;//0.5*dx; const bool isAllPeriodic=true; Mesh_t mesh(domain, hr, origin); @@ -383,6 +383,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpDataTimer); P->dumpBumponTail(); + P->dumpEnergy(totalP); P->gatherStatistics(totalP); //P->dumpLocalDomains(FL, 0); IpplTimings::stopTimer(dumpDataTimer); @@ -442,6 +443,7 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); P->dumpBumponTail(); + P->dumpEnergy(totalP); P->gatherStatistics(totalP); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; diff --git a/alpine/ElectrostaticPIC/CMakeLists.txt b/alpine/ElectrostaticPIC/CMakeLists.txt new file mode 100644 index 000000000..68dc41f4b --- /dev/null +++ b/alpine/ElectrostaticPIC/CMakeLists.txt @@ -0,0 +1,35 @@ +file (RELATIVE_PATH _relPath "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}") +message (STATUS "Adding index test found in ${_relPath}") + +include_directories ( + ${CMAKE_SOURCE_DIR}/src +) + +link_directories ( + ${CMAKE_CURRENT_SOURCE_DIR} + ${Kokkos_DIR}/.. +) + +set (IPPL_LIBS ippl ${MPI_CXX_LIBRARIES}) +set (COMPILE_FLAGS ${OPAL_CXX_FLAGS}) + +add_executable (PenningTrap PenningTrap.cpp) +target_link_libraries (PenningTrap ${IPPL_LIBS}) + +add_executable (UniformPlasmaTest UniformPlasmaTest.cpp) +target_link_libraries (UniformPlasmaTest ${IPPL_LIBS}) + +add_executable (LandauDamping LandauDamping.cpp) +target_link_libraries (LandauDamping ${IPPL_LIBS}) + +add_executable (BumponTailInstability BumponTailInstability.cpp) +target_link_libraries (BumponTailInstability ${IPPL_LIBS}) + +# vi: set et ts=4 sw=4 sts=4: + +# Local Variables: +# mode: cmake +# cmake-tab-width: 4 +# indent-tabs-mode: nil +# require-final-newline: nil +# End: diff --git a/alpine/ChargedParticles.hpp b/alpine/ElectrostaticPIC/ChargedParticles.hpp similarity index 83% rename from alpine/ChargedParticles.hpp rename to alpine/ElectrostaticPIC/ChargedParticles.hpp index e64417e19..61730648d 100644 --- a/alpine/ChargedParticles.hpp +++ b/alpine/ElectrostaticPIC/ChargedParticles.hpp @@ -508,7 +508,126 @@ class ChargedParticles : public ippl::ParticleBase { Ippl::Comm->barrier(); } - + + void dumpLandauParticle(size_type totalP) { + + auto Eview = E.getView(); + + double fieldEnergy, ExAmp; + double temp = 0.0; + + Kokkos::parallel_reduce("Ex energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = Eview(i)[0] * Eview(i)[0]; + valL += myVal; + }, Kokkos::Sum(temp)); + + double globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + fieldEnergy = globaltemp * volume / totalP ; + + double tempMax = 0.0; + Kokkos::parallel_reduce("Ex max norm", this->getLocalNum(), + KOKKOS_LAMBDA(const size_t i, double& valL) + { + double myVal = std::fabs(Eview(i)[0]); + if(myVal > valL) valL = myVal; + }, Kokkos::Max(tempMax)); + ExAmp = 0.0; + MPI_Reduce(&tempMax, &ExAmp, 1, MPI_DOUBLE, MPI_MAX, 0, Ippl::getComm()); + + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/FieldLandau_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Ex_field_energy, Ex_max_norm" << endl; + } + + csvout << time_m << " " + << fieldEnergy << " " + << ExAmp << endl; + + } + + Ippl::Comm->barrier(); + } + + + void dumpEnergy(size_type /*totalP*/) { + + + double potentialEnergy, kineticEnergy; + //auto Eview = E.getView(); + double temp = 0.0; + + //Kokkos::parallel_reduce("Potential energy", this->getLocalNum(), + // KOKKOS_LAMBDA(const int i, double& valL){ + // double myVal = dot(Eview(i), Eview(i)).apply(); + // valL += myVal; + // }, Kokkos::Sum(temp)); + + double globaltemp = 0.0; + //MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + //double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + //potentialEnergy = 0.5 * globaltemp * volume / totalP ; + + rho_m = dot(E_m, E_m); + potentialEnergy = 0.5 * hr_m[0] * hr_m[1] * hr_m[2] * rho_m.sum(); + + auto Pview = P.getView(); + auto qView = q.getView(); + + temp = 0.0; + + Kokkos::parallel_reduce("Kinetic Energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = dot(Pview(i), Pview(i)).apply(); + myVal *= -qView(i); + valL += myVal; + }, Kokkos::Sum(temp)); + + temp *= 0.5; + globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + + kineticEnergy = globaltemp; + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/Energy_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; + } + + csvout << time_m << " " + << potentialEnergy << " " + << kineticEnergy << " " + << potentialEnergy + kineticEnergy << endl; + + } + + Ippl::Comm->barrier(); + } + + void dumpBumponTail() { const int nghostE = E_m.getNghost(); diff --git a/alpine/LandauDamping.cpp b/alpine/ElectrostaticPIC/LandauDamping.cpp similarity index 98% rename from alpine/LandauDamping.cpp rename to alpine/ElectrostaticPIC/LandauDamping.cpp index 26f8a3a88..0aed5ebc8 100644 --- a/alpine/LandauDamping.cpp +++ b/alpine/ElectrostaticPIC/LandauDamping.cpp @@ -1,6 +1,6 @@ // Landau Damping Test // Usage: -// srun ./LandauDamping --info 10 +// srun ./LandauDamping
--info 10 // nx = No. cell-centered points in the x-direction // ny = No. cell-centered points in the y-direction // nz = No. cell-centered points in the z-direction @@ -13,6 +13,7 @@ // simulations. // ovfactor = Over-allocation factor for the buffers used in the communication. Typical // values are 1.0, 2.0. Value 1.0 means no over-allocation. +// dt = Time stepize // Example: // srun ./LandauDamping 128 128 128 10000 10 FFT 0.01 2.0 --info 10 // @@ -177,6 +178,7 @@ int main(int argc, char *argv[]){ const size_type totalP = std::atoll(argv[4]); const unsigned int nt = std::atoi(argv[5]); + const double dt = std::atof(argv[9]);;//0.5*dx; msg << "Landau damping" << endl @@ -200,7 +202,7 @@ int main(int argc, char *argv[]){ // create mesh and layout objects for this problem domain Vector_t kw = {0.5, 0.5, 0.5}; - double alpha = 0.05; + double alpha = 0.5; Vector_t rmin(0.0); Vector_t rmax = 2 * pi / kw ; double dx = rmax[0] / nr[0]; @@ -209,7 +211,6 @@ int main(int argc, char *argv[]){ Vector_t hr = {dx, dy, dz}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; - const double dt = 0.5*dx; const bool isAllPeriodic=true; Mesh_t mesh(domain, hr, origin); @@ -331,7 +332,8 @@ int main(int argc, char *argv[]){ P->gatherCIC(); IpplTimings::startTimer(dumpDataTimer); - P->dumpLandau(); + P->dumpLandauParticle(totalP); + P->dumpEnergy(totalP); P->gatherStatistics(totalP); //P->dumpLocalDomains(FL, 0); IpplTimings::stopTimer(dumpDataTimer); @@ -390,7 +392,8 @@ int main(int argc, char *argv[]){ P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); - P->dumpLandau(); + P->dumpLandauParticle(totalP); + P->dumpEnergy(totalP); P->gatherStatistics(totalP); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; diff --git a/alpine/PenningTrap.cpp b/alpine/ElectrostaticPIC/PenningTrap.cpp similarity index 93% rename from alpine/PenningTrap.cpp rename to alpine/ElectrostaticPIC/PenningTrap.cpp index 87a9a6534..4cb27474c 100644 --- a/alpine/PenningTrap.cpp +++ b/alpine/ElectrostaticPIC/PenningTrap.cpp @@ -205,17 +205,18 @@ int main(int argc, char *argv[]){ } // create mesh and layout objects for this problem domain - Vector_t rmin(0.0); - Vector_t rmax(20.0); + Vector_t rmin = {0.0, 0.0, 0.0}; + //Vector_t rmax = {20.0, 20.0, 20.0}; + Vector_t rmax = {25.0, 25.0, 25.0}; double dx = rmax[0] / nr[0]; double dy = rmax[1] / nr[1]; double dz = rmax[2] / nr[2]; Vector_t hr = {dx, dy, dz}; Vector_t origin = {rmin[0], rmin[1], rmin[2]}; - unsigned int nrMax = 2048;// Max grid size in our studies - double dxFinest = rmax[0] / nrMax; - const double dt = 0.5 * dxFinest;//size of timestep + //unsigned int nrMax = 2048;// Max grid size in our studies + //double dxFinest = rmax[0] / nrMax; + const double dt = std::atof(argv[9]);;//0.5*dx; const bool isAllPeriodic=true; Mesh_t mesh(domain, hr, origin); @@ -236,9 +237,12 @@ int main(int argc, char *argv[]){ for (unsigned d = 0; dE_m.initialize(mesh, FL); P->rho_m.initialize(mesh, FL); @@ -352,6 +356,7 @@ int main(int argc, char *argv[]){ IpplTimings::startTimer(dumpDataTimer); P->dumpData(); + P->dumpEnergy(totalP); P->gatherStatistics(totalP); //P->dumpLocalDomains(FL, 0); IpplTimings::stopTimer(dumpDataTimer); @@ -381,13 +386,13 @@ int main(int argc, char *argv[]){ double Eext_y = -(Rview(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); double Eext_z = (Rview(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); - Eview(j)[0] += Eext_x; - Eview(j)[1] += Eext_y; - Eview(j)[2] += Eext_z; + Eext_x += Eview(j)[0]; + Eext_y += Eview(j)[1]; + Eext_z += Eview(j)[2]; - Pview(j)[0] += alpha * (Eview(j)[0] + Pview(j)[1] * Bext); - Pview(j)[1] += alpha * (Eview(j)[1] - Pview(j)[0] * Bext); - Pview(j)[2] += alpha * Eview(j)[2]; + Pview(j)[0] += alpha * (Eext_x + Pview(j)[1] * Bext); + Pview(j)[1] += alpha * (Eext_y - Pview(j)[0] * Bext); + Pview(j)[2] += alpha * Eext_z; }); IpplTimings::stopTimer(PTimer); @@ -434,20 +439,22 @@ int main(int argc, char *argv[]){ double Eext_y = -(R2view(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); double Eext_z = (R2view(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); - E2view(j)[0] += Eext_x; - E2view(j)[1] += Eext_y; - E2view(j)[2] += Eext_z; - P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (E2view(j)[0] - + P2view(j)[1] * Bext + alpha * Bext * E2view(j)[1]) ); - P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (E2view(j)[1] - - P2view(j)[0] * Bext - alpha * Bext * E2view(j)[0]) ); - P2view(j)[2] += alpha * E2view(j)[2]; + Eext_x += E2view(j)[0]; + Eext_y += E2view(j)[1]; + Eext_z += E2view(j)[2]; + + P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (Eext_x + + P2view(j)[1] * Bext + alpha * Bext * Eext_y) ); + P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (Eext_y + - P2view(j)[0] * Bext - alpha * Bext * Eext_x) ); + P2view(j)[2] += alpha * Eext_z; }); IpplTimings::stopTimer(PTimer); P->time_m += dt; IpplTimings::startTimer(dumpDataTimer); P->dumpData(); + P->dumpEnergy(totalP); P->gatherStatistics(totalP); IpplTimings::stopTimer(dumpDataTimer); msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; diff --git a/alpine/UniformPlasmaTest.cpp b/alpine/ElectrostaticPIC/UniformPlasmaTest.cpp similarity index 100% rename from alpine/UniformPlasmaTest.cpp rename to alpine/ElectrostaticPIC/UniformPlasmaTest.cpp diff --git a/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp new file mode 100644 index 000000000..3ef320c57 --- /dev/null +++ b/alpine/ElectrostaticPIF/BumponTailInstabilityPIF.cpp @@ -0,0 +1,401 @@ +// Electrostatic Two-stream/Bump-on-tail instability test with Particle-in-Fourier schemes +// Usage: +// srun ./BumponTailInstabilityPIF
--info 5 +// nx = No. of Fourier modes in the x-direction +// ny = No. of Fourier modes in the y-direction +// nz = No. of Fourier modes in the z-direction +// Np = Total no. of macro-particles in the simulation +// Nt = Number of time steps +// dt = Time stepsize +// ShapeType = Shape function type B-spline only for the moment +// degree = B-spline degree (-1 for delta function) +// tol = tolerance of NUFFT +// Example: +// srun ./BumponTailInstabilityPIF 32 32 32 655360 20 0.05 B-spline 1 1e-4 --info 5 +// +// Copyright (c) 2023, Sriramkrishnan Muralikrishnan, +// Jülich Supercomputing Centre, Jülich, Germany. +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "ChargedParticlesPIF.hpp" +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "Utility/IpplTimings.h" + +template +struct Newton1D { + + double tol = 1e-12; + int max_iter = 20; + double pi = std::acos(-1.0); + + T k, delta, u; + + KOKKOS_INLINE_FUNCTION + Newton1D() {} + + KOKKOS_INLINE_FUNCTION + Newton1D(const T& k_, const T& delta_, + const T& u_) + : k(k_), delta(delta_), u(u_) {} + + KOKKOS_INLINE_FUNCTION + ~Newton1D() {} + + KOKKOS_INLINE_FUNCTION + T f(T& x) { + T F; + F = x + (delta * (std::sin(k * x) / k)) - u; + return F; + } + + KOKKOS_INLINE_FUNCTION + T fprime(T& x) { + T Fprime; + Fprime = 1 + (delta * std::cos(k * x)); + return Fprime; + } + + KOKKOS_FUNCTION + void solve(T& x) { + int iterations = 0; + while (iterations < max_iter && std::fabs(f(x)) > tol) { + x = x - (f(x)/fprime(x)); + iterations += 1; + } + } +}; + + +template +struct generate_random { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x, v; + + // The GeneratorPool + GeneratorPool rand_pool; + + value_type delta, sigma, muBulk, muBeam; + size_type nlocBulk; + + T k, minU, maxU; + + // Initialize all members + generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, + value_type& delta_, T& k_, value_type& sigma_, + value_type& muBulk_, value_type& muBeam_, + size_type& nlocBulk_, T& minU_, T& maxU_) + : x(x_), v(v_), rand_pool(rand_pool_), + delta(delta_), sigma(sigma_), muBulk(muBulk_), muBeam(muBeam_), + nlocBulk(nlocBulk_), k(k_), minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + bool isBeam = (i >= nlocBulk); + + value_type muZ = (value_type)(((!isBeam) * muBulk) + (isBeam * muBeam)); + + for (unsigned d = 0; d < Dim-1; ++d) { + + x(i)[d] = rand_gen.drand(minU[d], maxU[d]); + v(i)[d] = rand_gen.normal(0.0, sigma); + } + v(i)[Dim-1] = rand_gen.normal(muZ, sigma); + + value_type u = rand_gen.drand(minU[Dim-1], maxU[Dim-1]); + x(i)[Dim-1] = u / (1 + delta); + Newton1D solver(k[Dim-1], delta, u); + solver.solve(x(i)[Dim-1]); + + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +double CDF(const double& x, const double& delta, const double& k, + const unsigned& dim) { + + bool isDimZ = (dim == (Dim-1)); + double cdf = x + (double)(isDimZ * ((delta / k) * std::sin(k * x))); + return cdf; +} + +const char* TestName = "TwoStreamInstabilityPIF"; + +int main(int argc, char *argv[]){ + Ippl ippl(argc, argv); + + Inform msg(TestName); + Inform msg2all(TestName,INFORM_ALL_NODES); + + ippl::Vector nr = { + std::atoi(argv[1]), + std::atoi(argv[2]), + std::atoi(argv[3]) + }; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("kick"); + static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("drift"); + static IpplTimings::TimerRef BCTimer = IpplTimings::getTimer("particleBC"); + static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); + + IpplTimings::startTimer(mainTimer); + + const size_type totalP = std::atoll(argv[4]); + const unsigned int nt = std::atoi(argv[5]); + const double dt = std::atof(argv[6]); + + + using bunch_type = ChargedParticlesPIF; + + std::unique_ptr P; + + ippl::NDIndex domain; + for (unsigned i = 0; i< Dim; i++) { + domain[i] = ippl::Index(nr[i]); + } + + ippl::e_dim_tag decomp[Dim]; + for (unsigned d = 0; d < Dim; ++d) { + decomp[d] = ippl::SERIAL; + } + + // create mesh and layout objects for this problem domain + Vector_t kw; + double sigma, muBulk, muBeam, epsilon, delta; + + if(std::strcmp(TestName,"TwoStreamInstabilityPIF") == 0) { + // Parameters for two stream instability as in + // https://www.frontiersin.org/articles/10.3389/fphy.2018.00105/full + kw = {0.5, 0.5, 0.5}; + sigma = 0.1; + epsilon = 0.5; + muBulk = -pi / 2.0; + muBeam = pi / 2.0; + delta = 0.01; + } + else if(std::strcmp(TestName,"BumponTailInstabilityPIF") == 0) { + kw = {0.21, 0.21, 0.21}; + sigma = 1.0 / std::sqrt(2.0); + epsilon = 0.1; + muBulk = 0.0; + muBeam = 4.0; + delta = 0.01; + } + else { + //Default value is two stream instability + kw = {0.5, 0.5, 0.5}; + sigma = 0.1; + epsilon = 0.5; + muBulk = -pi / 2.0; + muBeam = pi / 2.0; + delta = 0.01; + } + + + Vector_t rmin(0.0); + Vector_t rmax = 2 * pi / kw ; + double dx = rmax[0] / nr[0]; + double dy = rmax[1] / nr[1]; + double dz = rmax[2] / nr[2]; + + Vector_t hr = {dx, dy, dz}; + Vector_t origin = {rmin[0], rmin[1], rmin[2]}; + + const bool isAllPeriodic=true; + Mesh_t mesh(domain, hr, origin); + FieldLayout_t FL(domain, decomp, isAllPeriodic); + PLayout_t PL(FL, mesh); + + double factorConf = 1.0/Ippl::Comm->size(); + double factorVelBulk = 1.0 - epsilon; + double factorVelBeam = 1.0 - factorVelBulk; + size_type nlocBulk = (size_type)(factorConf * factorVelBulk * totalP); + size_type nlocBeam = (size_type)(factorConf * factorVelBeam * totalP); + size_type nloc = nlocBulk + nlocBeam; + size_type Total_particles = 0; + + MPI_Allreduce(&nloc, &Total_particles, 1, + MPI_UNSIGNED_LONG, MPI_SUM, Ippl::getComm()); + + msg << TestName + << endl + << "nt " << nt << " Np= " + << Total_particles << " Fourier modes = " << nr + << endl; + + + //Q = -\int\int f dx dv + double Q = -rmax[0] * rmax[1] * rmax[2]; + P = std::make_unique(PL,hr,rmin,rmax,decomp,Q,Total_particles); + + P->nr_m = nr; + + P->rho_m.initialize(mesh, FL); + P->Sk_m.initialize(mesh, FL); + + //////////////////////////////////////////////////////////// + //Initialize an FFT object for getting rho in real space and + //doing charge conservation check + + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_reorder", false); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::p2p_pl); + fftParams.add("r2c_direction", 0); + + ippl::NDIndex domainPIFhalf; + + for(unsigned d = 0; d < Dim; ++d) { + if(fftParams.template get("r2c_direction") == (int)d) + domainPIFhalf[d] = ippl::Index(domain[d].length()/2 + 1); + else + domainPIFhalf[d] = ippl::Index(domain[d].length()); + } + + + FieldLayout_t FLPIFhalf(domainPIFhalf, decomp); + + ippl::Vector hDummy = {1.0, 1.0, 1.0}; + ippl::Vector originDummy = {0.0, 0.0, 0.0}; + Mesh_t meshPIFhalf(domainPIFhalf, hDummy, originDummy); + + P->rhoPIFreal_m.initialize(mesh, FL); + P->rhoPIFhalf_m.initialize(meshPIFhalf, FLPIFhalf); + + P->fft_mp = std::make_shared(FL, FLPIFhalf, fftParams); + + //////////////////////////////////////////////////////////// + + + P->time_m = 0.0; + + P->shapetype_m = argv[7]; + P->shapedegree_m = std::atoi(argv[8]); + + IpplTimings::startTimer(particleCreation); + + Vector_t minU, maxU; + for (unsigned d = 0; d rank() < rest ) + // ++nloc; + + P->create(nloc); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + P->R.getView(), P->P.getView(), rand_pool64, delta, kw, sigma, muBulk, + muBeam, nlocBulk, minU, maxU)); + + Kokkos::fence(); + Ippl::Comm->barrier(); + IpplTimings::stopTimer(particleCreation); + + P->q = P->Q_m/Total_particles; + msg << "particles created and initial conditions assigned " << endl; + + IpplTimings::startTimer(initializeShapeFunctionPIF); + P->initializeShapeFunctionPIF(); + IpplTimings::stopTimer(initializeShapeFunctionPIF); + + + double tol = std::atof(argv[9]); + P->initNUFFT(FL,tol); + + P->scatter(); + + P->gather(); + + IpplTimings::startTimer(dumpDataTimer); + P->dumpBumponTail(); + P->dumpEnergy(); + IpplTimings::stopTimer(dumpDataTimer); + + // begin main timestep loop + msg << "Starting iterations ..." << endl; + for (unsigned int it=0; itP = P->P - 0.5 * dt * P->E; + IpplTimings::stopTimer(PTimer); + + //drift + IpplTimings::startTimer(RTimer); + P->R = P->R + dt * P->P; + IpplTimings::stopTimer(RTimer); + + //Apply particle BC + IpplTimings::startTimer(BCTimer); + PL.applyBC(P->R, PL.getRegionLayout().getDomain()); + IpplTimings::stopTimer(BCTimer); + + //scatter the charge onto the underlying grid + P->scatter(); + + // Solve for and gather E field + P->gather(); + + //kick + IpplTimings::startTimer(PTimer); + P->P = P->P - 0.5 * dt * P->E; + IpplTimings::stopTimer(PTimer); + + P->time_m += dt; + IpplTimings::startTimer(dumpDataTimer); + P->dumpBumponTail(); + P->dumpEnergy(); + IpplTimings::stopTimer(dumpDataTimer); + msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; + } + + msg << "BumponTailInstability: End." << endl; + IpplTimings::stopTimer(mainTimer); + IpplTimings::print(); + IpplTimings::print(std::string("timing.dat")); + + return 0; +} diff --git a/alpine/ElectrostaticPIF/CMakeLists.txt b/alpine/ElectrostaticPIF/CMakeLists.txt new file mode 100644 index 000000000..969202fa2 --- /dev/null +++ b/alpine/ElectrostaticPIF/CMakeLists.txt @@ -0,0 +1,32 @@ +file (RELATIVE_PATH _relPath "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}") +message (STATUS "Adding index test found in ${_relPath}") + +include_directories ( + ${CMAKE_SOURCE_DIR}/src +) + +link_directories ( + ${CMAKE_CURRENT_SOURCE_DIR} + ${Kokkos_DIR}/.. +) + +set (IPPL_LIBS ippl ${MPI_CXX_LIBRARIES}) +set (COMPILE_FLAGS ${OPAL_CXX_FLAGS}) + +add_executable (LandauDampingPIF LandauDampingPIF.cpp) +target_link_libraries (LandauDampingPIF ${IPPL_LIBS}) + +add_executable (BumponTailInstabilityPIF BumponTailInstabilityPIF.cpp) +target_link_libraries (BumponTailInstabilityPIF ${IPPL_LIBS}) + +add_executable (PenningTrapPIF PenningTrapPIF.cpp) +target_link_libraries (PenningTrapPIF ${IPPL_LIBS}) + +# vi: set et ts=4 sw=4 sts=4: + +# Local Variables: +# mode: cmake +# cmake-tab-width: 4 +# indent-tabs-mode: nil +# require-final-newline: nil +# End: diff --git a/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp new file mode 100644 index 000000000..ecc061798 --- /dev/null +++ b/alpine/ElectrostaticPIF/ChargedParticlesPIF.hpp @@ -0,0 +1,703 @@ +// ChargedParticlesPIF header file +// Defines a particle attribute for charged particles to be used in +// test programs +// +// Copyright (c) 2021 Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "Ippl.h" + +// dimension of our positions +constexpr unsigned Dim = 3; + +// some typedefs +typedef ippl::ParticleSpatialLayout PLayout_t; +typedef ippl::UniformCartesian Mesh_t; +typedef ippl::FieldLayout FieldLayout_t; + +using size_type = ippl::detail::size_type; + +template +using Vector = ippl::Vector; + +template +using Field = ippl::Field; + +template +using ParticleAttrib = ippl::ParticleAttrib; + +typedef Vector Vector_t; +typedef Field Field_t; +typedef Field, Dim> CxField_t; +typedef Field VField_t; + +typedef ippl::FFT FFT_t; + +const double pi = std::acos(-1.0); + +// Test programs have to define this variable for VTK dump purposes +extern const char* TestName; + +void dumpVTK(Field_t& rho, int nx, int ny, int nz, int iteration, + double dx, double dy, double dz) { + + typename Field_t::view_type::host_mirror_type host_view = rho.getHostMirror(); + + std::stringstream fname; + fname << "data/scalar_"; + fname << std::setw(4) << std::setfill('0') << iteration; + fname << ".vtk"; + + Kokkos::deep_copy(host_view, rho.getView()); + + Inform vtkout(NULL, fname.str().c_str(), Inform::OVERWRITE); + vtkout.precision(10); + vtkout.setf(std::ios::scientific, std::ios::floatfield); + + // start with header + vtkout << "# vtk DataFile Version 2.0" << endl; + vtkout << TestName << endl; + vtkout << "ASCII" << endl; + vtkout << "DATASET STRUCTURED_POINTS" << endl; + vtkout << "DIMENSIONS " << nx+3 << " " << ny+3 << " " << nz+3 << endl; + vtkout << "ORIGIN " << -dx << " " << -dy << " " << -dz << endl; + vtkout << "SPACING " << dx << " " << dy << " " << dz << endl; + vtkout << "CELL_DATA " << (nx+2)*(ny+2)*(nz+2) << endl; + + vtkout << "SCALARS Rho float" << endl; + vtkout << "LOOKUP_TABLE default" << endl; + for (int z=0; z +class ChargedParticlesPIF : public ippl::ParticleBase { +public: + CxField_t rho_m; + CxField_t rhoPIFhalf_m; + Field_t rhoPIFreal_m; + CxField_t rhoDFT_m; + Field_t Sk_m; + + Vector nr_m; + + ippl::e_dim_tag decomp_m[Dim]; + + Vector_t hr_m; + Vector_t rmin_m; + Vector_t rmax_m; + + double Q_m; + + size_type Np_m; + + double time_m; + + double rhoNorm_m; + + std::string shapetype_m; + + int shapedegree_m; + std::shared_ptr fft_mp; + + std::shared_ptr> nufftType1_mp,nufftType2_mp; + +public: + ParticleAttrib q; // charge + typename ippl::ParticleBase::particle_position_type P; // particle velocity + typename ippl::ParticleBase::particle_position_type E; // electric field at particle position + + + /* + This constructor is mandatory for all derived classes from + ParticleBase as the bunch buffer uses this + */ + ChargedParticlesPIF(PLayout& pl) + : ippl::ParticleBase(pl) + { + // register the particle attributes + this->addAttribute(q); + this->addAttribute(P); + this->addAttribute(E); + } + + ChargedParticlesPIF(PLayout& pl, + Vector_t hr, + Vector_t rmin, + Vector_t rmax, + ippl::e_dim_tag decomp[Dim], + double Q, + size_type Np) + : ippl::ParticleBase(pl) + , hr_m(hr) + , rmin_m(rmin) + , rmax_m(rmax) + , Q_m(Q) + , Np_m(Np) + { + // register the particle attributes + this->addAttribute(q); + this->addAttribute(P); + this->addAttribute(E); + setupBCs(); + for (unsigned int i = 0; i < Dim; i++) + decomp_m[i]=decomp[i]; + } + + ~ChargedParticlesPIF(){ } + + void setupBCs() { + setBCAllPeriodic(); + } + + void initNUFFT(FieldLayout_t& FL, double& tol) { + ippl::ParameterList fftParams; + + fftParams.add("gpu_method", 1); + fftParams.add("gpu_sort", 0); + fftParams.add("gpu_kerevalmeth", 1); + fftParams.add("tolerance", tol); + + fftParams.add("use_cufinufft_defaults", false); + //fftParams.add("use_cufinufft_defaults", true); + + nufftType1_mp = std::make_shared>(FL, this->getLocalNum(), 1, fftParams); + nufftType2_mp = std::make_shared>(FL, this->getLocalNum(), 2, fftParams); + } + + void gather() { + + gatherPIFNUFFT(this->E, rho_m, Sk_m, this->R, nufftType2_mp.get(), q); + //gatherPIFNUDFT(this->E, rho_m, Sk_m, this->R); + + //Set the charge back to original as we used this view as a + //temporary buffer during gather + q = Q_m / Np_m; + + } + + void scatter() { + + Inform m("scatter "); + rho_m = {0.0, 0.0}; + scatterPIFNUFFT(q, rho_m, Sk_m, this->R, nufftType1_mp.get()); + //rhoDFT_m = {0.0, 0.0}; + //scatterPIFNUDFT(q, rho_m, Sk_m, this->R); + + //dumpFieldData(); + + rho_m = rho_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); + } + + + void dumpLandau() { + + + double fieldEnergy = 0.0; + double ExAmp = 0.0; + + auto rhoview = rho_m.getView(); + const int nghost = rho_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + + const FieldLayout_t& layout = rho_m.getLayout(); + const Mesh_t& mesh = rho_m.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + + Kokkos::complex imag = {0.0, 1.0}; + double pi = std::acos(-1.0); + Kokkos::parallel_reduce("Ex energy and Max", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& tlSum, + double& tlMax) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); + Dr += kVec[d] * kVec[d]; + } + + Kokkos::complex Ek = {0.0, 0.0}; + auto rho = rhoview(i+nghost,j+nghost,k+nghost); + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + Ek = -(imag * kVec[0] * rho * factor); + double myVal = Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + + tlSum += myVal; + + double myValMax = std::sqrt(myVal); + + if(myValMax > tlMax) tlMax = myValMax; + + }, Kokkos::Sum(fieldEnergy), Kokkos::Max(ExAmp)); + + + Kokkos::fence(); + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + fieldEnergy *= volume; + + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/FieldLandau_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Ex_field_energy, Ex_max_norm" << endl; + } + + csvout << time_m << " " + << fieldEnergy << " " + << ExAmp << endl; + + } + + Ippl::Comm->barrier(); + } + + + void dumpBumponTail() { + + + double fieldEnergy = 0.0; + double EzAmp = 0.0; + + auto rhoview = rho_m.getView(); + const int nghost = rho_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + + const FieldLayout_t& layout = rho_m.getLayout(); + const Mesh_t& mesh = rho_m.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + + Kokkos::complex imag = {0.0, 1.0}; + double pi = std::acos(-1.0); + Kokkos::parallel_reduce("Ez energy and Max", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& tlSum, + double& tlMax) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); + Dr += kVec[d] * kVec[d]; + } + + Kokkos::complex Ek = {0.0, 0.0}; + auto rho = rhoview(i+nghost,j+nghost,k+nghost); + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + Ek = -(imag * kVec[2] * rho * factor); + double myVal = Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + + tlSum += myVal; + + double myValMax = std::sqrt(myVal); + + if(myValMax > tlMax) tlMax = myValMax; + + }, Kokkos::Sum(fieldEnergy), Kokkos::Max(EzAmp)); + + + Kokkos::fence(); + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + fieldEnergy *= volume; + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/FieldBumponTail_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Ez_field_energy, Ez_max_norm" << endl; + } + + csvout << time_m << " " + << fieldEnergy << " " + << EzAmp << endl; + + } + + Ippl::Comm->barrier(); + } + + + void dumpEnergy() { + + + double potentialEnergy, kineticEnergy; + double temp = 0.0; + + auto rhoview = rho_m.getView(); + const int nghost = rho_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + + const FieldLayout_t& layout = rho_m.getLayout(); + const Mesh_t& mesh = rho_m.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + + Kokkos::complex imag = {0.0, 1.0}; + double pi = std::acos(-1.0); + Kokkos::parallel_reduce("Potential energy", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& valL) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); + Dr += kVec[d] * kVec[d]; + } + + Kokkos::complex Ek = {0.0, 0.0}; + double myVal = 0.0; + auto rho = rhoview(i+nghost,j+nghost,k+nghost); + for(size_t d = 0; d < Dim; ++d) { + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + Ek = -(imag * kVec[d] * rho * factor); + myVal += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + } + + valL += myVal; + + }, Kokkos::Sum(temp)); + + + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + potentialEnergy = 0.5 * temp * volume; + + auto Pview = P.getView(); + auto qView = q.getView(); + + temp = 0.0; + + Kokkos::parallel_reduce("Kinetic Energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = dot(Pview(i), Pview(i)).apply(); + myVal *= -qView(i); + valL += myVal; + }, Kokkos::Sum(temp)); + + temp *= 0.5; + double globaltemp = 0.0; + MPI_Reduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, 0, Ippl::getComm()); + + kineticEnergy = globaltemp; + + auto rhoPIFhalfview = rhoPIFhalf_m.getView(); + const int nghostHalf = rhoPIFhalf_m.getNghost(); + + const FieldLayout_t& layoutHalf = rhoPIFhalf_m.getLayout(); + const auto& domainHalf = layoutHalf.getDomain(); + + Vector Nhalf; + for (unsigned d=0; d < Dim; ++d) { + Nhalf[d] = domainHalf[d].length(); + } + + //Heffte needs FFTshifted field whereas the field from cuFINUFFT + //is not shifted. Hence, here we do the shift. + Kokkos::parallel_for("Transfer complex rho to half domain", + mdrange_type({0, 0, 0}, + {Nhalf[0], + Nhalf[1], + Nhalf[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k) + { + Vector iVec = {i, j, k}; + int shift; + for(size_t d = 0; d < Dim; ++d) { + bool isLessThanHalf = (iVec[d] < (Nhalf[d]/2)); + shift = ((int)isLessThanHalf * 2) - 1; + iVec[d] = (iVec[d] + shift * (Nhalf[d]/2)) + nghostHalf; + } + rhoPIFhalfview(Nhalf[0]-1-i+nghostHalf, iVec[1], iVec[2]) = rhoview(i+nghostHalf,j+nghostHalf,k+nghostHalf); + }); + + + rhoPIFreal_m = 0.0; + fft_mp->transform(-1, rhoPIFreal_m, rhoPIFhalf_m); + + + rhoPIFreal_m = (1.0/(nr_m[0]*nr_m[1]*nr_m[2])) * volume * rhoPIFreal_m; + auto rhoPIFrealview = rhoPIFreal_m.getView(); + temp = 0.0; + Kokkos::parallel_reduce("Rho real sum", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& valL) + { + + valL += rhoPIFrealview(i+nghost, j+nghost, k+nghost); + }, Kokkos::Sum(temp)); + + double charge = temp; + + Vector_t totalMomentum = 0.0; + + for(size_t d = 0; d < Dim; ++d) { + double tempD = 0.0; + Kokkos::parallel_reduce("Total Momentum", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + valL += (-qView(i)) * Pview(i)[d]; + }, Kokkos::Sum(tempD)); + totalMomentum[d] = tempD; + } + + Vector_t globalMom; + + double magMomentum = 0.0; + for(size_t d = 0; d < Dim; ++d) { + MPI_Allreduce(&totalMomentum[d], &globalMom[d], 1, MPI_DOUBLE, MPI_SUM, Ippl::getComm()); + magMomentum += globalMom[d] * globalMom[d]; + } + + magMomentum = std::sqrt(magMomentum); + + if (Ippl::Comm->rank() == 0) { + std::stringstream fname; + fname << "data/Energy_"; + fname << Ippl::Comm->size(); + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND); + csvout.precision(17); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(time_m == 0.0) { + csvout << "time, Potential energy, Kinetic energy, Total energy Total charge Total Momentum" << endl; + } + + csvout << time_m << " " + << potentialEnergy << " " + << kineticEnergy << " " + << potentialEnergy + kineticEnergy << " " + << charge << " " + << magMomentum << endl; + + } + + Ippl::Comm->barrier(); + } + + + void initializeShapeFunctionPIF() { + + using mdrange_type = Kokkos::MDRangePolicy>; + auto Skview = Sk_m.getView(); + auto N = nr_m; + const int nghost = Sk_m.getNghost(); + const Mesh_t& mesh = rho_m.get_mesh(); + const Vector_t& dx = mesh.getMeshSpacing(); + const Vector_t& Len = rmax_m - rmin_m; + const double pi = std::acos(-1.0); + int order = shapedegree_m + 1; + if(shapetype_m == "Gaussian") { + + throw IpplException("initializeShapeFunctionPIF", + "Gaussian shape function not implemented yet"); + + } + else if(shapetype_m == "B-spline") { + + Kokkos::parallel_for("B-spline shape functions", + mdrange_type({0, 0, 0}, + {N[0], N[1], N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Sk = 1.0; + for(size_t d = 0; d < Dim; ++d) { + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); + double khbytwo = kVec[d] * dx[d] / 2; + bool isNotZero = (khbytwo != 0.0); + double factor = (1.0 / (khbytwo + ((!isNotZero) * 1.0))); + double arg = isNotZero * (Kokkos::sin(khbytwo) * factor) + + (!isNotZero) * 1.0; + //Fourier transform of CIC + Sk *= std::pow(arg, order); + } + Skview(i+nghost, j+nghost, k+nghost) = Sk; + }); + + } + else { + throw IpplException("initializeShapeFunctionPIF", + "Unrecognized shape function type"); + } + + } + + + void dumpFieldData() { + + typename CxField_t::HostMirror rhoNUFFT_host = rho_m.getHostMirror(); + typename Field_t::HostMirror rhoNUFFT_real = rhoPIFreal_m.getHostMirror(); + //typename CxField_t::HostMirror rhoNUDFT_host = rhoDFT_m.getHostMirror(); + Kokkos::deep_copy(rhoNUFFT_host, rho_m.getView()); + Kokkos::deep_copy(rhoNUFFT_real, rhoPIFreal_m.getView()); + //Kokkos::deep_copy(rhoNUDFT_host, rhoDFT_m.getView()); + const int nghost = rho_m.getNghost(); + std::stringstream pname; + pname << "data/FieldFFT_"; + pname << Ippl::Comm->rank(); + pname << ".csv"; + Inform pcsvout(NULL, pname.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); + pcsvout.precision(10); + pcsvout.setf(std::ios::scientific, std::ios::floatfield); + pcsvout << "rho" << endl; + for (int i = 0; i< nr_m[0]; i++) { + for (int j = 0; j< nr_m[1]; j++) { + for (int k = 0; k< nr_m[2]; k++) { + pcsvout << rhoNUFFT_host(i+nghost,j+nghost, k+nghost) << endl; + } + } + } + std::stringstream pname2; + pname2 << "data/Fieldreal_"; + pname2 << Ippl::Comm->rank(); + pname2 << ".csv"; + Inform pcsvout2(NULL, pname2.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); + pcsvout2.precision(10); + pcsvout2.setf(std::ios::scientific, std::ios::floatfield); + pcsvout2 << "rho" << endl; + for (int i = 0; i< nr_m[0]; i++) { + for (int j = 0; j< nr_m[1]; j++) { + for (int k = 0; k< nr_m[2]; k++) { + pcsvout2 << rhoNUFFT_real(i+nghost,j+nghost, k+nghost) << endl; + } + } + } + Ippl::Comm->barrier(); + } + + + //void dumpParticleData() { + + // typename ParticleAttrib::HostMirror R_host = this->R.getHostMirror(); + // typename ParticleAttrib::HostMirror P_host = this->P.getHostMirror(); + // Kokkos::deep_copy(R_host, this->R.getView()); + // Kokkos::deep_copy(P_host, P.getView()); + // std::stringstream pname; + // pname << "data/ParticleIC_"; + // pname << Ippl::Comm->rank(); + // pname << ".csv"; + // Inform pcsvout(NULL, pname.str().c_str(), Inform::OVERWRITE, Ippl::Comm->rank()); + // pcsvout.precision(10); + // pcsvout.setf(std::ios::scientific, std::ios::floatfield); + // pcsvout << "R_x, R_y, R_z, V_x, V_y, V_z" << endl; + // for (size_type i = 0; i< this->getLocalNum(); i++) { + // pcsvout << R_host(i)[0] << " " + // << R_host(i)[1] << " " + // << R_host(i)[2] << " " + // << P_host(i)[0] << " " + // << P_host(i)[1] << " " + // << P_host(i)[2] << endl; + // } + // Ippl::Comm->barrier(); + //} + +private: + void setBCAllPeriodic() { + + this->setParticleBC(ippl::BC::PERIODIC); + } + +}; diff --git a/alpine/ElectrostaticPIF/LandauDampingPIF.cpp b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp new file mode 100644 index 000000000..e07e249fc --- /dev/null +++ b/alpine/ElectrostaticPIF/LandauDampingPIF.cpp @@ -0,0 +1,363 @@ +// Electrostatic Landau damping test with Particle-in-Fourier schemes +// Usage: +// srun ./LandauDampingPIF
--info 5 +// nx = No. of Fourier modes in the x-direction +// ny = No. of Fourier modes in the y-direction +// nz = No. of Fourier modes in the z-direction +// Np = Total no. of macro-particles in the simulation +// Nt = Number of time steps +// dt = Time stepsize +// ShapeType = Shape function type B-spline only for the moment +// degree = B-spline degree (-1 for delta function) +// tol = tolerance of NUFFT +// Example: +// srun ./LandauDampingPIF 32 32 32 655360 20 0.05 B-spline 1 1e-4 --info 5 +// +// Copyright (c) 2022, Sriramkrishnan Muralikrishnan, +// Jülich Supercomputing Centre, Jülich, Germany. +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "ChargedParticlesPIF.hpp" +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "Utility/IpplTimings.h" + +template +struct Newton1D { + + double tol = 1e-12; + int max_iter = 20; + double pi = std::acos(-1.0); + + T k, alpha, u; + + KOKKOS_INLINE_FUNCTION + Newton1D() {} + + KOKKOS_INLINE_FUNCTION + Newton1D(const T& k_, const T& alpha_, + const T& u_) + : k(k_), alpha(alpha_), u(u_) {} + + KOKKOS_INLINE_FUNCTION + ~Newton1D() {} + + KOKKOS_INLINE_FUNCTION + T f(T& x) { + T F; + F = x + (alpha * (std::sin(k * x) / k)) - u; + return F; + } + + KOKKOS_INLINE_FUNCTION + T fprime(T& x) { + T Fprime; + Fprime = 1 + (alpha * std::cos(k * x)); + return Fprime; + } + + KOKKOS_FUNCTION + void solve(T& x) { + int iterations = 0; + while (iterations < max_iter && std::fabs(f(x)) > tol) { + x = x - (f(x)/fprime(x)); + iterations += 1; + } + } +}; + + +template +struct generate_random { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x, v; + + // The GeneratorPool + GeneratorPool rand_pool; + + value_type alpha; + + T k, minU, maxU; + + // Initialize all members + generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, + value_type& alpha_, T& k_, T& minU_, T& maxU_) + : x(x_), v(v_), rand_pool(rand_pool_), + alpha(alpha_), k(k_), minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + value_type u; + for (unsigned d = 0; d < Dim; ++d) { + + u = rand_gen.drand(minU[d], maxU[d]); + x(i)[d] = u / (1 + alpha); + Newton1D solver(k[d], alpha, u); + solver.solve(x(i)[d]); + v(i)[d] = rand_gen.normal(0.0, 1.0); + } + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +double CDF(const double& x, const double& alpha, const double& k) { + double cdf = x + (alpha / k) * std::sin(k * x); + return cdf; +} + +KOKKOS_FUNCTION +double PDF(const Vector_t& xvec, const double& alpha, + const Vector_t& kw, const unsigned Dim) { + double pdf = 1.0; + + for (unsigned d = 0; d < Dim; ++d) { + pdf *= (1.0 + alpha * std::cos(kw[d] * xvec[d])); + } + return pdf; +} + +const char* TestName = "LandauDampingPIF"; + +int main(int argc, char *argv[]){ + Ippl ippl(argc, argv); + + Inform msg("LandauDampingPIF"); + Inform msg2all("LandauDampingPIF",INFORM_ALL_NODES); + + ippl::Vector nr = { + std::atoi(argv[1]), + std::atoi(argv[2]), + std::atoi(argv[3]) + }; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("kick"); + static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("drift"); + static IpplTimings::TimerRef BCTimer = IpplTimings::getTimer("particleBC"); + static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); + + IpplTimings::startTimer(mainTimer); + + const size_type totalP = std::atoll(argv[4]); + const unsigned int nt = std::atoi(argv[5]); + const double dt = std::atof(argv[6]); + + double factor = 1.0/Ippl::Comm->size(); + size_type nloc = (size_type)(factor * totalP); + size_type Total_particles = 0; + + MPI_Allreduce(&nloc, &Total_particles, 1, + MPI_UNSIGNED_LONG, MPI_SUM, Ippl::getComm()); + + + msg << "Landau damping" + << endl + << "nt " << nt << " Np= " + << Total_particles << " Fourier modes = " << nr + << endl; + + using bunch_type = ChargedParticlesPIF; + + std::unique_ptr P; + + ippl::NDIndex domain; + for (unsigned i = 0; i< Dim; i++) { + domain[i] = ippl::Index(nr[i]); + } + + ippl::e_dim_tag decomp[Dim]; + for (unsigned d = 0; d < Dim; ++d) { + decomp[d] = ippl::SERIAL; + } + + // create mesh and layout objects for this problem domain + Vector_t kw = {0.5, 0.5, 0.5}; + double alpha = 0.05; + Vector_t rmin(0.0); + Vector_t rmax = 2 * pi / kw; + Vector_t length = rmax - rmin; + double dx = length[0] / nr[0]; + double dy = length[1] / nr[1]; + double dz = length[2] / nr[2]; + + Vector_t hr = {dx, dy, dz}; + Vector_t origin = {rmin[0], rmin[1], rmin[2]}; + + const bool isAllPeriodic=true; + Mesh_t mesh(domain, hr, origin); + FieldLayout_t FL(domain, decomp, isAllPeriodic); + PLayout_t PL(FL, mesh); + + //Q = -\int\int f dx dv + double Q = -length[0] * length[1] * length[2]; + P = std::make_unique(PL,hr,rmin,rmax,decomp,Q,Total_particles); + + P->nr_m = nr; + + P->rho_m.initialize(mesh, FL); + P->rhoDFT_m.initialize(mesh, FL); + P->Sk_m.initialize(mesh, FL); + + //////////////////////////////////////////////////////////// + //Initialize an FFT object for getting rho in real space and + //doing charge conservation check + + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_reorder", false); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::p2p_pl); + fftParams.add("r2c_direction", 0); + + ippl::NDIndex domainPIFhalf; + + for(unsigned d = 0; d < Dim; ++d) { + if(fftParams.template get("r2c_direction") == (int)d) + domainPIFhalf[d] = ippl::Index(domain[d].length()/2 + 1); + else + domainPIFhalf[d] = ippl::Index(domain[d].length()); + } + + + FieldLayout_t FLPIFhalf(domainPIFhalf, decomp); + + ippl::Vector hDummy = {1.0, 1.0, 1.0}; + ippl::Vector originDummy = {0.0, 0.0, 0.0}; + Mesh_t meshPIFhalf(domainPIFhalf, hDummy, originDummy); + + P->rhoPIFreal_m.initialize(mesh, FL); + P->rhoPIFhalf_m.initialize(meshPIFhalf, FLPIFhalf); + + P->fft_mp = std::make_shared(FL, FLPIFhalf, fftParams); + + //////////////////////////////////////////////////////////// + + + P->time_m = 0.0; + + P->shapetype_m = argv[7]; + P->shapedegree_m = std::atoi(argv[8]); + + IpplTimings::startTimer(particleCreation); + + Vector_t minU, maxU; + for (unsigned d = 0; d rank() < rest ) + // ++nloc; + + P->create(nloc); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + P->R.getView(), P->P.getView(), rand_pool64, alpha, kw, minU, maxU)); + + Kokkos::fence(); + Ippl::Comm->barrier(); + IpplTimings::stopTimer(particleCreation); + + P->q = P->Q_m/Total_particles; + msg << "particles created and initial conditions assigned " << endl; + + IpplTimings::startTimer(initializeShapeFunctionPIF); + P->initializeShapeFunctionPIF(); + IpplTimings::stopTimer(initializeShapeFunctionPIF); + + double tol = std::atof(argv[9]); + P->initNUFFT(FL,tol); + + P->scatter(); + + P->gather(); + + IpplTimings::startTimer(dumpDataTimer); + P->dumpBumponTail(); + P->dumpEnergy(); + IpplTimings::stopTimer(dumpDataTimer); + + // begin main timestep loop + msg << "Starting iterations ..." << endl; + for (unsigned int it=0; itP = P->P - 0.5 * dt * P->E; + IpplTimings::stopTimer(PTimer); + + //drift + IpplTimings::startTimer(RTimer); + P->R = P->R + dt * P->P; + IpplTimings::stopTimer(RTimer); + + //Apply particle BC + IpplTimings::startTimer(BCTimer); + PL.applyBC(P->R, PL.getRegionLayout().getDomain()); + IpplTimings::stopTimer(BCTimer); + + //scatter the charge onto the underlying grid + P->scatter(); + + // Solve for and gather E field + P->gather(); + + //kick + IpplTimings::startTimer(PTimer); + P->P = P->P - 0.5 * dt * P->E; + IpplTimings::stopTimer(PTimer); + + P->time_m += dt; + IpplTimings::startTimer(dumpDataTimer); + P->dumpBumponTail(); + P->dumpEnergy(); + IpplTimings::stopTimer(dumpDataTimer); + msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; + } + + msg << "LandauDamping: End." << endl; + IpplTimings::stopTimer(mainTimer); + IpplTimings::print(); + IpplTimings::print(std::string("timing.dat")); + + return 0; +} diff --git a/alpine/ElectrostaticPIF/PenningTrapPIF.cpp b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp new file mode 100644 index 000000000..54984352e --- /dev/null +++ b/alpine/ElectrostaticPIF/PenningTrapPIF.cpp @@ -0,0 +1,401 @@ +// Electrostatic Penning trap test with Particle-in-Fourier schemes +// Usage: +// srun ./PenningTrapPIF
--info 5 +// nx = No. of Fourier modes in the x-direction +// ny = No. of Fourier modes in the y-direction +// nz = No. of Fourier modes in the z-direction +// Np = Total no. of macro-particles in the simulation +// Nt = Number of time steps +// dt = Time stepsize +// ShapeType = Shape function type B-spline only for the moment +// degree = B-spline degree (-1 for delta function) +// tol = tolerance of NUFFT +// Example: +// srun ./PenningTrapPIF 32 32 32 655360 20 0.05 B-spline 1 1e-4 --info 5 +// +// Copyright (c) 2023, Sriramkrishnan Muralikrishnan, +// Jülich Supercomputing Centre, Jülich, Germany. +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "ChargedParticlesPIF.hpp" +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "Utility/IpplTimings.h" + +template +struct Newton1D { + + double tol = 1e-12; + int max_iter = 20; + double pi = std::acos(-1.0); + + T mu, sigma, u; + + KOKKOS_INLINE_FUNCTION + Newton1D() {} + + KOKKOS_INLINE_FUNCTION + Newton1D(const T& mu_, const T& sigma_, + const T& u_) + : mu(mu_), sigma(sigma_), u(u_) {} + + KOKKOS_INLINE_FUNCTION + ~Newton1D() {} + + KOKKOS_INLINE_FUNCTION + T f(T& x) { + T F; + F = std::erf((x - mu)/(sigma * std::sqrt(2.0))) + - 2 * u + 1; + return F; + } + + KOKKOS_INLINE_FUNCTION + T fprime(T& x) { + T Fprime; + Fprime = (1 / sigma) * std::sqrt(2 / pi) * + std::exp(-0.5 * (std::pow(((x - mu) / sigma),2))); + return Fprime; + } + + KOKKOS_FUNCTION + void solve(T& x) { + int iterations = 0; + while ((iterations < max_iter) && (std::fabs(f(x)) > tol)) { + x = x - (f(x)/fprime(x)); + iterations += 1; + } + } +}; + + +template +struct generate_random { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x, v; + + // The GeneratorPool + GeneratorPool rand_pool; + + T mu, sigma, minU, maxU; + + double pi = std::acos(-1.0); + + // Initialize all members + generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, + T& mu_, T& sigma_, T& minU_, T& maxU_) + : x(x_), v(v_), rand_pool(rand_pool_), + mu(mu_), sigma(sigma_), minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + value_type u; + for (unsigned d = 0; d < Dim; ++d) { + u = rand_gen.drand(minU[d], maxU[d]); + x(i)[d] = (std::sqrt(pi / 2) * (2 * u - 1)) * + sigma[d] + mu[d]; + Newton1D solver(mu[d], sigma[d], u); + solver.solve(x(i)[d]); + v(i)[d] = rand_gen.normal(0.0, 1.0); + } + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +double CDF(const double& x, const double& mu, const double& sigma) { + double cdf = 0.5 * (1.0 + std::erf((x - mu)/(sigma * std::sqrt(2)))); + return cdf; +} + +const char* TestName = "PenningTrapPIF"; + +int main(int argc, char *argv[]){ + Ippl ippl(argc, argv); + + Inform msg(TestName); + Inform msg2all(TestName,INFORM_ALL_NODES); + + ippl::Vector nr = { + std::atoi(argv[1]), + std::atoi(argv[2]), + std::atoi(argv[3]) + }; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef dumpDataTimer = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef PTimer = IpplTimings::getTimer("kick"); + static IpplTimings::TimerRef RTimer = IpplTimings::getTimer("drift"); + static IpplTimings::TimerRef BCTimer = IpplTimings::getTimer("particleBC"); + static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); + + IpplTimings::startTimer(mainTimer); + + const size_type totalP = std::atoll(argv[4]); + const unsigned int nt = std::atoi(argv[5]); + const double dt = std::atof(argv[6]); + + double factor = 1.0/Ippl::Comm->size(); + size_type nloc = (size_type)(factor * totalP); + size_type Total_particles = 0; + + MPI_Allreduce(&nloc, &Total_particles, 1, + MPI_UNSIGNED_LONG, MPI_SUM, Ippl::getComm()); + + + msg << TestName + << endl + << "nt " << nt << " Np= " + << Total_particles << " Fourier modes = " << nr + << endl; + + using bunch_type = ChargedParticlesPIF; + + std::unique_ptr P; + + ippl::NDIndex domain; + for (unsigned i = 0; i< Dim; i++) { + domain[i] = ippl::Index(nr[i]); + } + + ippl::e_dim_tag decomp[Dim]; + for (unsigned d = 0; d < Dim; ++d) { + decomp[d] = ippl::SERIAL; + } + + // create mesh and layout objects for this problem domain + Vector_t rmin(0.0); + Vector_t rmax(25.0); + double dx = rmax[0] / nr[0]; + double dy = rmax[1] / nr[1]; + double dz = rmax[2] / nr[2]; + + Vector_t length = rmax - rmin; + + Vector_t mu, sd; + + for (unsigned d = 0; d(PL,hr,rmin,rmax,decomp,Q,Total_particles); + + P->nr_m = nr; + + P->rho_m.initialize(mesh, FL); + P->Sk_m.initialize(mesh, FL); + + //////////////////////////////////////////////////////////// + //Initialize an FFT object for getting rho in real space and + //doing charge conservation check + + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_reorder", false); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::p2p_pl); + fftParams.add("r2c_direction", 0); + + ippl::NDIndex domainPIFhalf; + + for(unsigned d = 0; d < Dim; ++d) { + if(fftParams.template get("r2c_direction") == (int)d) + domainPIFhalf[d] = ippl::Index(domain[d].length()/2 + 1); + else + domainPIFhalf[d] = ippl::Index(domain[d].length()); + } + + + FieldLayout_t FLPIFhalf(domainPIFhalf, decomp); + + ippl::Vector hDummy = {1.0, 1.0, 1.0}; + ippl::Vector originDummy = {0.0, 0.0, 0.0}; + Mesh_t meshPIFhalf(domainPIFhalf, hDummy, originDummy); + + P->rhoPIFreal_m.initialize(mesh, FL); + P->rhoPIFhalf_m.initialize(meshPIFhalf, FLPIFhalf); + + P->fft_mp = std::make_shared(FL, FLPIFhalf, fftParams); + + //////////////////////////////////////////////////////////// + + + P->time_m = 0.0; + + P->shapetype_m = argv[7]; + P->shapedegree_m = std::atoi(argv[8]); + + IpplTimings::startTimer(particleCreation); + + Vector_t minU, maxU; + for (unsigned d = 0; d rank() < rest ) + // ++nloc; + + P->create(nloc); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*Ippl::Comm->rank())); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + P->R.getView(), P->P.getView(), rand_pool64, mu, sd, minU, maxU)); + + Kokkos::fence(); + Ippl::Comm->barrier(); + IpplTimings::stopTimer(particleCreation); + + P->q = P->Q_m/Total_particles; + msg << "particles created and initial conditions assigned " << endl; + + IpplTimings::startTimer(initializeShapeFunctionPIF); + P->initializeShapeFunctionPIF(); + IpplTimings::stopTimer(initializeShapeFunctionPIF); + + double tol = std::atof(argv[9]); + P->initNUFFT(FL,tol); + + P->scatter(); + + P->gather(); + + IpplTimings::startTimer(dumpDataTimer); + //P->dumpEnergy(); + IpplTimings::stopTimer(dumpDataTimer); + + double alpha = -0.5 * dt; + double DrInv = 1.0 / (1 + (std::pow((alpha * Bext), 2))); + // begin main timestep loop + msg << "Starting iterations ..." << endl; + for (unsigned int it=0; itR.getView(); + auto Pview = P->P.getView(); + auto Eview = P->E.getView(); + double V0 = 30*rmax[2]; + Kokkos::parallel_for("Kick1", P->getLocalNum(), + KOKKOS_LAMBDA(const size_t j){ + double Eext_x = -(Rview(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_y = -(Rview(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_z = (Rview(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); + + Eext_x += Eview(j)[0]; + Eext_y += Eview(j)[1]; + Eext_z += Eview(j)[2]; + + Pview(j)[0] += alpha * (Eext_x + Pview(j)[1] * Bext); + Pview(j)[1] += alpha * (Eext_y - Pview(j)[0] * Bext); + Pview(j)[2] += alpha * Eext_z; + }); + IpplTimings::stopTimer(PTimer); + + //drift + IpplTimings::startTimer(RTimer); + P->R = P->R + dt * P->P; + IpplTimings::stopTimer(RTimer); + + //Apply particle BC + IpplTimings::startTimer(BCTimer); + PL.applyBC(P->R, PL.getRegionLayout().getDomain()); + IpplTimings::stopTimer(BCTimer); + + //scatter the charge onto the underlying grid + P->scatter(); + + // Solve for and gather E field + P->gather(); + + //kick + IpplTimings::startTimer(PTimer); + auto R2view = P->R.getView(); + auto P2view = P->P.getView(); + auto E2view = P->E.getView(); + Kokkos::parallel_for("Kick2", P->getLocalNum(), + KOKKOS_LAMBDA(const size_t j){ + double Eext_x = -(R2view(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_y = -(R2view(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_z = (R2view(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); + + Eext_x += E2view(j)[0]; + Eext_y += E2view(j)[1]; + Eext_z += E2view(j)[2]; + + P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (Eext_x + + P2view(j)[1] * Bext + alpha * Bext * Eext_y) ); + P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (Eext_y + - P2view(j)[0] * Bext - alpha * Bext * Eext_x) ); + P2view(j)[2] += alpha * Eext_z; + }); + IpplTimings::stopTimer(PTimer); + + P->time_m += dt; + IpplTimings::startTimer(dumpDataTimer); + //P->dumpEnergy(); + IpplTimings::stopTimer(dumpDataTimer); + msg << "Finished time step: " << it+1 << " time: " << P->time_m << endl; + } + + msg << TestName << " End." << endl; + IpplTimings::stopTimer(mainTimer); + IpplTimings::print(); + IpplTimings::print(std::string("timing.dat")); + + return 0; +} diff --git a/alpine/PinT/BumponTailInstabilityPinT.cpp b/alpine/PinT/BumponTailInstabilityPinT.cpp new file mode 100644 index 000000000..94fd4dd8a --- /dev/null +++ b/alpine/PinT/BumponTailInstabilityPinT.cpp @@ -0,0 +1,770 @@ +// Parallel-in-time (PinT) method Parareal combined with Particle-in-cell +// and Particle-in-Fourier schemes. The example is electrostatic Landau +// damping. The implementation of Parareal follows the open source implementation +// https://github.com/Parallel-in-Time/PararealF90 by Daniel Ruprecht. The corresponding +// publication is Ruprecht, Daniel. "Shared memory pipelined parareal." +// European Conference on Parallel Processing. Springer, Cham, 2017. +// +// Usage: +// Usage: +// srun ./BumponTailInstabilityPinT +// +// --info 5 +// nmx = No. of Fourier modes in the x-direction +// nmy = No. of Fourier modes in the y-direction +// nmz = No. of Fourier modes in the z-direction +// nx = No. of grid points in the x-direction (not used if PIF is also used as coarse propagator) +// ny = No. of grid points in the y-direction (not used if PIF is also used as coarse propagator) +// nz = No. of grid points in the z-direction (not used if PIF is also used as coarse propagator) +// Np = Total no. of macro-particles in the simulation +// tolParareal = Parareal tolerance +// nCycles = No. of Parareal blocks/cycles +// ShapeType = Shape function type B-spline only for the moment +// degree = B-spline degree (-1 for delta function) +// No. of space procs = Number of MPI ranks to be used in the spatial parallelization +// No. of time procs = Number of MPI ranks to be used in the time parallelization +// coarseTol = Coarse tolerance for PIF if we use PIF as a coarse propagator (will not be used when PIC is used) +// fineTol = fine tolerance for PIF +// coarseType = Type of coarse propagator (PIF or PIC) +// Example: +// srun ./BumponTailInstabilityPinT 32 32 32 16 16 16 655360 19.2 0.05 0.05 1e-5 1 B-spline 1 4 16 1e-2 1e-4 PIC --info 5 +// +// Copyright (c) 2022, Sriramkrishnan Muralikrishnan, +// Jülich Supercomputing Centre, Jülich, Germany. +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "ChargedParticlesPinT.hpp" +#include "StatesSlice.hpp" +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "Utility/IpplTimings.h" + + +template +struct Newton1D { + + double tol = 1e-12; + int max_iter = 20; + double pi = std::acos(-1.0); + + T k, delta, u; + + KOKKOS_INLINE_FUNCTION + Newton1D() {} + + KOKKOS_INLINE_FUNCTION + Newton1D(const T& k_, const T& delta_, + const T& u_) + : k(k_), delta(delta_), u(u_) {} + + KOKKOS_INLINE_FUNCTION + ~Newton1D() {} + + KOKKOS_INLINE_FUNCTION + T f(T& x) { + T F; + F = x + (delta * (std::sin(k * x) / k)) - u; + return F; + } + + KOKKOS_INLINE_FUNCTION + T fprime(T& x) { + T Fprime; + Fprime = 1 + (delta * std::cos(k * x)); + return Fprime; + } + + KOKKOS_FUNCTION + void solve(T& x) { + int iterations = 0; + while (iterations < max_iter && std::fabs(f(x)) > tol) { + x = x - (f(x)/fprime(x)); + iterations += 1; + } + } +}; + + +template +struct generate_random { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x, v; + + // The GeneratorPool + GeneratorPool rand_pool; + + value_type delta, sigma, muBulk, muBeam; + size_type nlocBulk; + + T k, minU, maxU; + + // Initialize all members + generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, + value_type& delta_, T& k_, value_type& sigma_, + value_type& muBulk_, value_type& muBeam_, + size_type& nlocBulk_, T& minU_, T& maxU_) + : x(x_), v(v_), rand_pool(rand_pool_), + delta(delta_), sigma(sigma_), muBulk(muBulk_), muBeam(muBeam_), + nlocBulk(nlocBulk_), k(k_), minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + bool isBeam = (i >= nlocBulk); + + value_type muZ = (value_type)(((!isBeam) * muBulk) + (isBeam * muBeam)); + + for (unsigned d = 0; d < Dim-1; ++d) { + + x(i)[d] = rand_gen.drand(minU[d], maxU[d]); + v(i)[d] = rand_gen.normal(0.0, sigma); + } + v(i)[Dim-1] = rand_gen.normal(muZ, sigma); + + value_type u = rand_gen.drand(minU[Dim-1], maxU[Dim-1]); + x(i)[Dim-1] = u / (1 + delta); + Newton1D solver(k[Dim-1], delta, u); + solver.solve(x(i)[Dim-1]); + + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +double CDF(const double& x, const double& delta, const double& k, + const unsigned& dim) { + + bool isDimZ = (dim == (Dim-1)); + double cdf = x + (double)(isDimZ * ((delta / k) * std::sin(k * x))); + return cdf; +} + +double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, + Vector_t& length, MPI_Comm& spaceComm) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + + Kokkos::parallel_reduce("Abs. error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + + //This is just to undo the effect of periodic BCs during the + //error calculation. Otherwise even though the actual error is + //small the computed error might be very large. + //The values (e.g. 10) mentioned here are just an adhoc + //value depending on the domain length. + for (unsigned d = 0; d < 3; ++d) { + bool isLeft = (diff[d] <= -10.0); + bool isRight = (diff[d] >= 10.0); + bool isInside = ((diff[d] > -10.0) && (diff[d] < 10.0)); + diff[d] = (isInside * diff[d]) + (isLeft * (diff[d] + length[d])) + +(isRight * (diff[d] - length[d])); + } + + double myValError = dot(diff, diff).apply(); + valLError += myValError; + double myValnorm = dot(Qview(i), Qview(i)).apply(); + valLnorm += myValnorm; + }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); + + Kokkos::fence(); + double globalError = 0.0; + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + double globalNorm = 0.0; + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + + double relError = std::sqrt(globalError) / std::sqrt(globalNorm); + + return relError; + +} + +double computePL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, MPI_Comm& spaceComm) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + + Kokkos::parallel_reduce("Abs. error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + valLError += myValError; + double myValnorm = dot(Qview(i), Qview(i)).apply(); + valLnorm += myValnorm; + }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); + + Kokkos::fence(); + double globalError = 0.0; + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + double globalNorm = 0.0; + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + + double relError = std::sqrt(globalError) / std::sqrt(globalNorm); + + return relError; + +} + +const char* TestName = "TwoStreamInstability"; +//const char* TestName = "BumponTailInstability"; + +int main(int argc, char *argv[]){ + Ippl ippl(argc, argv); + + int spaceColor, timeColor; + MPI_Comm spaceComm, timeComm; + + int spaceProcs = std::atoi(argv[15]); + int timeProcs = std::atoi(argv[16]); + spaceColor = Ippl::Comm->rank() / spaceProcs; + timeColor = Ippl::Comm->rank() % spaceProcs; + + MPI_Comm_split(Ippl::getComm(), spaceColor, Ippl::Comm->rank(), &spaceComm); + MPI_Comm_split(Ippl::getComm(), timeColor, Ippl::Comm->rank(), &timeComm); + + int rankSpace, sizeSpace, rankTime, sizeTime; + MPI_Comm_rank(spaceComm, &rankSpace); + MPI_Comm_size(spaceComm, &sizeSpace); + + MPI_Comm_rank(timeComm, &rankTime); + MPI_Comm_size(timeComm, &sizeTime); + + + Inform msg(TestName, Ippl::Comm->size()-1); + Inform msg2all(TestName,INFORM_ALL_NODES); + + ippl::Vector nmPIF = { + std::atoi(argv[1]), + std::atoi(argv[2]), + std::atoi(argv[3]) + }; + + ippl::Vector nrPIC = { + std::atoi(argv[4]), + std::atoi(argv[5]), + std::atoi(argv[6]) + }; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef timeCommunication = IpplTimings::getTimer("timeCommunication"); + static IpplTimings::TimerRef deepCopy = IpplTimings::getTimer("deepCopy"); + static IpplTimings::TimerRef finePropagator = IpplTimings::getTimer("finePropagator"); + static IpplTimings::TimerRef coarsePropagator = IpplTimings::getTimer("coarsePropagator"); + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); + static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); + + IpplTimings::startTimer(mainTimer); + + const size_type totalP = std::atoll(argv[7]); + const double tEnd = std::atof(argv[8]); + const unsigned int nCycles = std::atoi(argv[12]); + double tEndCycle = tEnd / nCycles; + const double dtSlice = tEndCycle / sizeTime; + const double dtFine = std::atof(argv[9]); + const double dtCoarse = std::atof(argv[10]); + const unsigned int ntFine = std::ceil(dtSlice / dtFine); + const unsigned int ntCoarse = std::ceil(dtSlice / dtCoarse); + const double tol = std::atof(argv[11]); + + using bunch_type = ChargedParticlesPinT; + using states_type = StatesSlice; + + std::unique_ptr Pcoarse; + std::unique_ptr Pbegin; + std::unique_ptr Pend; + + ippl::NDIndex domainPIC; + ippl::NDIndex domainPIF; + for (unsigned i = 0; i< Dim; i++) { + domainPIC[i] = ippl::Index(nrPIC[i]); + domainPIF[i] = ippl::Index(nmPIF[i]); + } + + ippl::e_dim_tag decomp[Dim]; + for (unsigned d = 0; d < Dim; ++d) { + decomp[d] = ippl::SERIAL; + } + + // create mesh and layout objects for this problem domain + Vector_t kw; + double sigma, muBulk, muBeam, epsilon, delta; + + + if(std::strcmp(TestName,"TwoStreamInstability") == 0) { + // Parameters for two stream instability as in + // https://www.frontiersin.org/articles/10.3389/fphy.2018.00105/full + kw = {0.5, 0.5, 0.5}; + sigma = 0.1; + epsilon = 0.5; + muBulk = -pi / 2.0; + muBeam = pi / 2.0; + delta = 0.01; + } + else if(std::strcmp(TestName,"BumponTailInstability") == 0) { + kw = {0.21, 0.21, 0.21}; + sigma = 1.0 / std::sqrt(2.0); + epsilon = 0.1; + muBulk = 0.0; + muBeam = 4.0; + delta = 0.01; + } + else { + //Default value is two stream instability + kw = {0.5, 0.5, 0.5}; + sigma = 0.1; + epsilon = 0.5; + muBulk = -pi / 2.0; + muBeam = pi / 2.0; + delta = 0.01; + } + Vector_t rmin(0.0); + Vector_t rmax = (2 * pi / kw); + Vector_t length = rmax - rmin; + double dxPIC = length[0] / nrPIC[0]; + double dyPIC = length[1] / nrPIC[1]; + double dzPIC = length[2] / nrPIC[2]; + + + double dxPIF = length[0] / nmPIF[0]; + double dyPIF = length[1] / nmPIF[1]; + double dzPIF = length[2] / nmPIF[2]; + Vector_t hrPIC = {dxPIC, dyPIC, dzPIC}; + Vector_t hrPIF = {dxPIF, dyPIF, dzPIF}; + Vector_t origin = {rmin[0], rmin[1], rmin[2]}; + + const bool isAllPeriodic=true; + Mesh_t meshPIC(domainPIC, hrPIC, origin); + Mesh_t meshPIF(domainPIF, hrPIF, origin); + FieldLayout_t FLPIC(domainPIC, decomp, isAllPeriodic); + FieldLayout_t FLPIF(domainPIF, decomp, isAllPeriodic); + PLayout_t PL(FLPIC, meshPIC); + + + double factorVelBulk = 1.0 - epsilon; + double factorVelBeam = 1.0 - factorVelBulk; + double factorConf = 1.0 / sizeSpace; + size_type nlocBulk = (size_type)(factorConf * factorVelBulk * totalP); + size_type nlocBeam = (size_type)(factorConf * factorVelBeam * totalP); + size_type nloc = nlocBulk + nlocBeam; + + size_type Total_particles = 0; + + MPI_Allreduce(&nloc, &Total_particles, 1, + MPI_UNSIGNED_LONG, MPI_SUM, spaceComm); + + //Q = -\int\int f dx dv + double Q = -length[0] * length[1] * length[2]; + Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,Total_particles); + Pbegin = std::make_unique(PL); + Pend = std::make_unique(PL); + + Pcoarse->nr_m = nrPIC; + Pcoarse->nm_m = nmPIF; + + Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); + Pcoarse->Sk_m.initialize(meshPIF, FLPIF); + + Pcoarse->coarsetype_m = argv[19]; + + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->initFFTSolver(); + } + + //////////////////////////////////////////////////////////// + //Initialize an FFT object for getting rho in real space and + //doing charge conservation check + + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_reorder", false); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::p2p_pl); + fftParams.add("r2c_direction", 0); + + ippl::NDIndex domainPIFhalf; + + for(unsigned d = 0; d < Dim; ++d) { + if(fftParams.template get("r2c_direction") == (int)d) + domainPIFhalf[d] = ippl::Index(domainPIF[d].length()/2 + 1); + else + domainPIFhalf[d] = ippl::Index(domainPIF[d].length()); + } + + + FieldLayout_t FLPIFhalf(domainPIFhalf, decomp); + + ippl::Vector hDummy = {1.0, 1.0, 1.0}; + ippl::Vector originDummy = {0.0, 0.0, 0.0}; + Mesh_t meshPIFhalf(domainPIFhalf, hDummy, originDummy); + + Pcoarse->rhoPIFreal_m.initialize(meshPIF, FLPIF); + Pcoarse->rhoPIFhalf_m.initialize(meshPIFhalf, FLPIFhalf); + + Pcoarse->fft_mp = std::make_shared(FLPIF, FLPIFhalf, fftParams); + + //////////////////////////////////////////////////////////// + + Vector_t minU, maxU; + for (unsigned d = 0; d shapetype_m = argv[13]; + Pcoarse->shapedegree_m = std::atoi(argv[14]); + IpplTimings::startTimer(initializeShapeFunctionPIF); + Pcoarse->initializeShapeFunctionPIF(); + IpplTimings::stopTimer(initializeShapeFunctionPIF); + + IpplTimings::startTimer(particleCreation); + + Pcoarse->create(nloc); + Pbegin->create(nloc); + Pend->create(nloc); + + Pcoarse->q = Pcoarse->Q_m/Total_particles; + + IpplTimings::stopTimer(particleCreation); + + + double coarseTol = std::atof(argv[17]); + double fineTol = std::atof(argv[18]); + Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); + std::string coarse = "Coarse"; + std::string fine = "Fine"; + + IpplTimings::startTimer(particleCreation); + +#ifdef KOKKOS_ENABLE_CUDA + + //If we don't do the following even with the same seed the initial + //condition is not the same on different GPUs + + IpplTimings::startTimer(timeCommunication); + //For some reason using the next_tag with multiple cycles is not + //working so we use static tags here + tag = 500;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if(rankTime == 0) { + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, delta, kw, + sigma, muBulk, muBeam, nlocBulk, minU, maxU)); + + + Kokkos::fence(); + } + else { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); + buf->resetReadPos(); + } + IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(coarsePropagator); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); + } + else { + //PIF with coarse tolerance as coarse propagator + Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); + } + IpplTimings::stopTimer(coarsePropagator); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(timeCommunication); + if(rankTime < sizeTime-1) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(rankTime+1, tag, *Pend, *buf, request, nloc, timeComm); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + IpplTimings::stopTimer(timeCommunication); + +#else + //Note the CPU version has not been tested. + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + Pcoarse->R.getView(), Pcoarse->P.getView(), rand_pool64, delta, kw, + sigma, muBulk, muBeam, nlocBulk, minU, maxU)); + + + Kokkos::fence(); + Ippl::Comm->barrier(); +#endif + + msg << "Parareal " + << TestName + << endl + << "Slice dT: " << dtSlice + << endl + << "No. of fine time steps: " << ntFine + << endl + << "No. of coarse time steps: " << ntCoarse + << endl + << "Tolerance: " << tol + << " No. of cycles: " << nCycles + << endl + << "Np= " << Total_particles + << " Fourier modes = " << nmPIF + << " Grid points = " << nrPIC + << endl; + + IpplTimings::stopTimer(particleCreation); + + msg << "particles created and initial conditions assigned " << endl; + + int sign = 1; + for (unsigned int nc=0; nc < nCycles; nc++) { + + double tStartMySlice; + bool sendCriteria, recvCriteria; + bool isConverged = false; + bool isPreviousDomainConverged = false; + + //even cycles + if(nc % 2 == 0) { + sendCriteria = (rankTime < (sizeTime-1)); + recvCriteria = (rankTime > 0); + if(rankTime == 0) { + isPreviousDomainConverged = true; + } + tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); + msg.setPrintNode(Ippl::Comm->size()-1); + } + //odd cycles + else { + recvCriteria = (rankTime < (sizeTime-1)); + sendCriteria = (rankTime > 0); + if(rankTime == (sizeTime - 1)) { + isPreviousDomainConverged = true; + } + tStartMySlice = (nc * tEndCycle) + (((sizeTime - 1) - rankTime) * dtSlice); + msg.setPrintNode(0); + } + + unsigned int it = 0; + + while (!isConverged) { + //Run fine integrator in parallel + IpplTimings::startTimer(finePropagator); + Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, rankTime, rankSpace, fine, spaceComm); + IpplTimings::stopTimer(finePropagator); + + + //Difference = Fine - Coarse + Pend->R = Pbegin->R - Pcoarse->R; + Pend->P = Pbegin->P - Pcoarse->P; + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(timeCommunication); + tag = 1100;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + int tagbool = 1300;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if(recvCriteria && (!isPreviousDomainConverged)) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(rankTime-sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); + buf->resetReadPos(); + MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, rankTime-sign, tagbool, + timeComm, MPI_STATUS_IGNORE); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + } + IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(coarsePropagator); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + } + else { + Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + } + IpplTimings::stopTimer(coarsePropagator); + + Pend->R = Pend->R + Pcoarse->R; + Pend->P = Pend->P + Pcoarse->P; + + PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); + + IpplTimings::startTimer(computeErrors); + double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, length, spaceComm); + double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, spaceComm); + IpplTimings::stopTimer(computeErrors); + + if((Rerror <= tol) && (Perror <= tol) && isPreviousDomainConverged) { + isConverged = true; + } + + IpplTimings::startTimer(timeCommunication); + if(sendCriteria) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(rankTime+sign, tag, *Pend, *buf, request, nloc, timeComm); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + MPI_Send(&isConverged, 1, MPI_C_BOOL, rankTime+sign, tagbool, timeComm); + } + IpplTimings::stopTimer(timeCommunication); + + + msg << "Finished iteration: " << it+1 + << " in cycle: " << nc+1 + << " Rerror: " << Rerror + << " Perror: " << Perror + << endl; + + IpplTimings::startTimer(dumpData); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + IpplTimings::stopTimer(dumpData); + + MPI_Barrier(spaceComm); + + it += 1; + } + + MPI_Barrier(MPI_COMM_WORLD); + if((nCycles > 1) && (nc < (nCycles - 1))) { + tag = 1000;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + //send, receive criteria and tStartMySlice are reversed at the end of the cycle + if(nc % 2 == 0) { + recvCriteria = (rankTime < (sizeTime-1)); + sendCriteria = (rankTime > 0); + tStartMySlice = (nc * tEndCycle) + (((sizeTime - 1) - rankTime) * dtSlice); + } + //odd cycles + else { + sendCriteria = (rankTime < (sizeTime-1)); + recvCriteria = (rankTime > 0); + tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); + } + + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(timeCommunication); + if(recvCriteria) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(rankTime+sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); + buf->resetReadPos(); + } + IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(coarsePropagator); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + } + else { + Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + } + IpplTimings::stopTimer(coarsePropagator); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(timeCommunication); + if(sendCriteria) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(rankTime-sign, tag, *Pend, *buf, request, nloc, timeComm); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + IpplTimings::stopTimer(timeCommunication); + sign *= -1; + } + } + msg << TestName << " Parareal: End." << endl; + IpplTimings::stopTimer(mainTimer); + IpplTimings::print(); + IpplTimings::print(std::string("timing.dat")); + + MPI_Comm_free(&spaceComm); + MPI_Comm_free(&timeComm); + + + return 0; +} diff --git a/alpine/PinT/CMakeLists.txt b/alpine/PinT/CMakeLists.txt new file mode 100644 index 000000000..73976aa27 --- /dev/null +++ b/alpine/PinT/CMakeLists.txt @@ -0,0 +1,32 @@ +file (RELATIVE_PATH _relPath "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}") +message (STATUS "Adding index test found in ${_relPath}") + +include_directories ( + ${CMAKE_SOURCE_DIR}/src +) + +link_directories ( + ${CMAKE_CURRENT_SOURCE_DIR} + ${Kokkos_DIR}/.. +) + +set (IPPL_LIBS ippl ${MPI_CXX_LIBRARIES}) +set (COMPILE_FLAGS ${OPAL_CXX_FLAGS}) + +add_executable (LandauDampingPinT LandauDampingPinT.cpp) +target_link_libraries (LandauDampingPinT ${IPPL_LIBS}) + +add_executable (BumponTailInstabilityPinT BumponTailInstabilityPinT.cpp) +target_link_libraries (BumponTailInstabilityPinT ${IPPL_LIBS}) + +add_executable (PenningTrapPinT PenningTrapPinT.cpp) +target_link_libraries (PenningTrapPinT ${IPPL_LIBS}) + +# vi: set et ts=4 sw=4 sts=4: + +# Local Variables: +# mode: cmake +# cmake-tab-width: 4 +# indent-tabs-mode: nil +# require-final-newline: nil +# End: diff --git a/alpine/PinT/ChargedParticlesPinT.hpp b/alpine/PinT/ChargedParticlesPinT.hpp new file mode 100644 index 000000000..7790c8f26 --- /dev/null +++ b/alpine/PinT/ChargedParticlesPinT.hpp @@ -0,0 +1,913 @@ +// ChargedParticlesPinT header file +// Defines a particle attribute for charged particles to be used in +// test programs +// +// Copyright (c) 2021 Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "Ippl.h" +#include "Solver/FFTPeriodicPoissonSolver.h" + +// dimension of our positions +constexpr unsigned Dim = 3; + +// some typedefs +typedef ippl::ParticleSpatialLayout PLayout_t; +typedef ippl::UniformCartesian Mesh_t; +typedef ippl::FieldLayout FieldLayout_t; + +using size_type = ippl::detail::size_type; + +template +using Vector = ippl::Vector; + +template +using Field = ippl::Field; + +template +using ParticleAttrib = ippl::ParticleAttrib; + +typedef Vector Vector_t; +typedef Field Field_t; +typedef Field, Dim> CxField_t; +typedef Field VField_t; +typedef ippl::FFTPeriodicPoissonSolver Solver_t; + +typedef ippl::FFT FFT_t; + +const double pi = std::acos(-1.0); + +// Test programs have to define this variable for VTK dump purposes +extern const char* TestName; + +template +class ChargedParticlesPinT : public ippl::ParticleBase { +public: + + CxField_t rhoPIF_m; + CxField_t rhoPIFhalf_m; + Field_t rhoPIFreal_m; + Field_t Sk_m; + Field_t rhoPIC_m; + VField_t EfieldPIC_m; + + Vector nr_m; + Vector nm_m; + + ippl::e_dim_tag decomp_m[Dim]; + + Vector_t hr_m; + Vector_t rmin_m; + Vector_t rmax_m; + + double Q_m; + + size_type Np_m; + + std::shared_ptr solver_mp; + std::shared_ptr fft_mp; + + double time_m; + + std::string shapetype_m; + + std::string coarsetype_m; + + int shapedegree_m; + + std::shared_ptr> nufftType1Fine_mp,nufftType2Fine_mp, + nufftType1Coarse_mp,nufftType2Coarse_mp; + +public: + ParticleAttrib q; // charge + typename ippl::ParticleBase::particle_position_type P; // G(P^(k)_n) + typename ippl::ParticleBase::particle_position_type E; // electric field at particle position + + typename ippl::ParticleBase::particle_position_type R0; // Initial particle positions at t=0 + typename ippl::ParticleBase::particle_position_type P0; // Initial particle velocities at t=0 + + typename ippl::ParticleBase::particle_position_type RprevIter; // G(R^(k-1)_n) + typename ippl::ParticleBase::particle_position_type PprevIter; // G(P^(k-1)_n) + + ///* + // This constructor is mandatory for all derived classes from + // ParticleBase as the bunch buffer uses this + //*/ + //ChargedParticlesPinT(PLayout& pl) + //: ippl::ParticleBase(pl) + //{ + // // register the particle attributes + // this->addAttribute(q); + // this->addAttribute(P); + // this->addAttribute(E); + // this->addAttribute(R0); + // this->addAttribute(P0); + // this->addAttribute(RprevIter); + // this->addAttribute(PprevIter); + // //this->addAttribute(Rfine); + // //this->addAttribute(Pfine); + //} + + ChargedParticlesPinT(PLayout& pl, + Vector_t hr, + Vector_t rmin, + Vector_t rmax, + ippl::e_dim_tag decomp[Dim], + double Q, + size_type Np) + : ippl::ParticleBase(pl) + , hr_m(hr) + , rmin_m(rmin) + , rmax_m(rmax) + , Q_m(Q) + , Np_m(Np) + { + // register the particle attributes + this->addAttribute(q); + this->addAttribute(P); + this->addAttribute(E); + this->addAttribute(R0); + this->addAttribute(P0); + this->addAttribute(RprevIter); + this->addAttribute(PprevIter); + setupBCs(); + for (unsigned int i = 0; i < Dim; i++) + decomp_m[i]=decomp[i]; + } + + ~ChargedParticlesPinT(){ } + + void setupBCs() { + setBCAllPeriodic(); + } + + + void initFFTSolver() { + ippl::ParameterList sp; + sp.add("output_type", Solver_t::GRAD); + sp.add("use_heffte_defaults", false); + sp.add("use_pencils", true); + sp.add("use_reorder", false); + sp.add("use_gpu_aware", true); + sp.add("comm", ippl::p2p_pl); + sp.add("r2c_direction", 0); + + solver_mp = std::make_shared(); + + solver_mp->mergeParameters(sp); + + solver_mp->setRhs(rhoPIC_m); + + solver_mp->setLhs(EfieldPIC_m); + } + + + void initNUFFTs(FieldLayout_t& FLPIF, double& coarseTol, + double& fineTol) { + + ippl::ParameterList fftCoarseParams,fftFineParams; + + fftFineParams.add("gpu_method", 1); + fftFineParams.add("gpu_sort", 0); + fftFineParams.add("gpu_kerevalmeth", 1); + fftFineParams.add("tolerance", fineTol); + + fftCoarseParams.add("gpu_method", 1); + fftCoarseParams.add("gpu_sort", 0); + fftCoarseParams.add("gpu_kerevalmeth", 1); + fftCoarseParams.add("tolerance", coarseTol); + + fftFineParams.add("use_cufinufft_defaults", false); + fftCoarseParams.add("use_cufinufft_defaults", false); + + nufftType1Fine_mp = std::make_shared>(FLPIF, this->getLocalNum(), 1, fftFineParams); + nufftType2Fine_mp = std::make_shared>(FLPIF, this->getLocalNum(), 2, fftFineParams); + + nufftType1Coarse_mp = std::make_shared>(FLPIF, this->getLocalNum(), 1, fftCoarseParams); + nufftType2Coarse_mp = std::make_shared>(FLPIF, this->getLocalNum(), 2, fftCoarseParams); + } + + void dumpFieldEnergy(const unsigned int& nc, const unsigned int& iter, int rankTime, int rankSpace) { + + double fieldEnergy = 0.0; + double EzAmp = 0.0; + + auto rhoview = rhoPIF_m.getView(); + const int nghost = rhoPIF_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + + const FieldLayout_t& layout = rhoPIF_m.getLayout(); + const Mesh_t& mesh = rhoPIF_m.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + + Kokkos::complex imag = {0.0, 1.0}; + double pi = std::acos(-1.0); + Kokkos::parallel_reduce("Ez energy and Max", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& tlSum, + double& tlMax) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); + Dr += kVec[d] * kVec[d]; + } + + Kokkos::complex Ek = {0.0, 0.0}; + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + Ek = -(imag * kVec[2] * rhoview(i+nghost,j+nghost,k+nghost) * factor); + double myVal = Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + + tlSum += myVal; + + double myValMax = std::sqrt(myVal); + + if(myValMax > tlMax) tlMax = myValMax; + + }, Kokkos::Sum(fieldEnergy), Kokkos::Max(EzAmp)); + + + Kokkos::fence(); + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + fieldEnergy *= volume; + + + if(rankSpace == 0) { + std::stringstream fname; + fname << "data/FieldBumponTail_rank_"; + fname << rankTime; + fname << "_nc_"; + fname << nc; + fname << "_iter_"; + fname << iter; + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); + csvout.precision(10); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + + csvout << time_m << " " + << fieldEnergy << " " + << EzAmp << endl; + } + } + + void dumpEnergy(const unsigned int& nc, const unsigned int& iter, ParticleAttrib& Ptemp, + int rankTime, int rankSpace, const MPI_Comm& spaceComm = MPI_COMM_WORLD) { + + double potentialEnergy, kineticEnergy; + double temp = 0.0; + + auto rhoview = rhoPIF_m.getView(); + const int nghost = rhoPIF_m.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + + const FieldLayout_t& layout = rhoPIF_m.getLayout(); + const Mesh_t& mesh = rhoPIF_m.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + + Kokkos::complex imag = {0.0, 1.0}; + double pi = std::acos(-1.0); + Kokkos::parallel_reduce("Potential energy", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& valL) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); + Dr += kVec[d] * kVec[d]; + } + + Kokkos::complex Ek = {0.0, 0.0}; + double myVal = 0.0; + auto rho = rhoview(i+nghost,j+nghost,k+nghost); + for(size_t d = 0; d < Dim; ++d) { + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + Ek = -(imag * kVec[d] * rho * factor); + myVal += Ek.real() * Ek.real() + Ek.imag() * Ek.imag(); + } + + valL += myVal; + + }, Kokkos::Sum(temp)); + + double volume = (rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]); + potentialEnergy = 0.5 * temp * volume; + + auto Pview = Ptemp.getView(); + auto qView = q.getView(); + + temp = 0.0; + + Kokkos::parallel_reduce("Kinetic Energy", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + double myVal = dot(Pview(i), Pview(i)).apply(); + myVal *= -qView(i); //q/(q/m) where q/m=-1 + valL += myVal; + }, Kokkos::Sum(temp)); + + temp *= 0.5; + double globaltemp = 0.0; + MPI_Allreduce(&temp, &globaltemp, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + + kineticEnergy = globaltemp; + + auto rhoPIFhalfview = rhoPIFhalf_m.getView(); + const int nghostHalf = rhoPIFhalf_m.getNghost(); + + const FieldLayout_t& layoutHalf = rhoPIFhalf_m.getLayout(); + const auto& domainHalf = layoutHalf.getDomain(); + + Vector Nhalf; + for (unsigned d=0; d < Dim; ++d) { + Nhalf[d] = domainHalf[d].length(); + } + + //Heffte needs FFTshifted field whereas the field from cuFINUFFT + //is not shifted. Hence, here we do the shift. + Kokkos::parallel_for("Transfer complex rho to half domain", + mdrange_type({0, 0, 0}, + {Nhalf[0], + Nhalf[1], + Nhalf[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k) + { + Vector iVec = {i, j, k}; + int shift; + for(size_t d = 0; d < Dim; ++d) { + bool isLessThanHalf = (iVec[d] < (Nhalf[d]/2)); + shift = ((int)isLessThanHalf * 2) - 1; + iVec[d] = (iVec[d] + shift * (Nhalf[d]/2)) + nghostHalf; + } + rhoPIFhalfview(Nhalf[0]-1-i+nghostHalf, iVec[1], iVec[2]) = + rhoview(i+nghostHalf,j+nghostHalf,k+nghostHalf); + }); + + + rhoPIFreal_m = 0.0; + fft_mp->transform(-1, rhoPIFreal_m, rhoPIFhalf_m); + + rhoPIFreal_m = (1.0/(N[0]*N[1]*N[2])) * volume * rhoPIFreal_m; + auto rhoPIFrealview = rhoPIFreal_m.getView(); + temp = 0.0; + Kokkos::parallel_reduce("Rho real sum", + mdrange_type({0, 0, 0}, + {N[0], + N[1], + N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + double& valL) + { + valL += rhoPIFrealview(i+nghost, j+nghost, k+nghost); + }, Kokkos::Sum(temp)); + + double chargeTotal = temp; + + Vector_t totalMomentum = 0.0; + + for(size_t d = 0; d < Dim; ++d) { + double tempD = 0.0; + Kokkos::parallel_reduce("Total Momentum", this->getLocalNum(), + KOKKOS_LAMBDA(const int i, double& valL){ + valL += (-qView(i)) * Pview(i)[d]; + }, Kokkos::Sum(tempD)); + totalMomentum[d] = tempD; + } + + Vector_t globalMom; + + double magMomentum = 0.0; + for(size_t d = 0; d < Dim; ++d) { + MPI_Allreduce(&totalMomentum[d], &globalMom[d], 1, MPI_DOUBLE, MPI_SUM, spaceComm); + magMomentum += globalMom[d] * globalMom[d]; + } + + magMomentum = std::sqrt(magMomentum); + + if(rankSpace == 0) { + std::stringstream fname; + fname << "data/Energy_rank_"; + fname << rankTime; + fname << "_nc_"; + fname << nc; + fname << "_iter_"; + fname << iter; + fname << ".csv"; + + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); + csvout.precision(17); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + //csvout << "time, Potential energy, Kinetic energy, Total energy" << endl; + + csvout << time_m << " " + << potentialEnergy << " " + << kineticEnergy << " " + << potentialEnergy + kineticEnergy << " " + << chargeTotal << " " + << magMomentum << endl; + } + + } + + void writelocalError(double Rerror, double Perror, unsigned int nc, unsigned int iter, int rankTime, int rankSpace) { + + if(rankSpace == 0) { + std::stringstream fname; + fname << "data/localError_rank_"; + fname << rankTime; + fname << "_nc_"; + fname << nc; + fname << ".csv"; + + Inform csvout(NULL, fname.str().c_str(), Inform::APPEND, Ippl::Comm->rank()); + csvout.precision(17); + csvout.setf(std::ios::scientific, std::ios::floatfield); + + if(iter == 1) { + csvout << "Iter, Rerror, Perror" << endl; + } + + csvout << iter << " " + << Rerror << " " + << Perror << endl; + } + + } + + void initializeShapeFunctionPIF() { + + using mdrange_type = Kokkos::MDRangePolicy>; + auto Skview = Sk_m.getView(); + auto N = nm_m; + const int nghost = Sk_m.getNghost(); + const Mesh_t& mesh = rhoPIF_m.get_mesh(); + const Vector_t& dx = mesh.getMeshSpacing(); + const Vector_t& Len = rmax_m - rmin_m; + const double pi = std::acos(-1.0); + int order = shapedegree_m + 1; + + if(shapetype_m == "Gaussian") { + + throw IpplException("initializeShapeFunctionPIF", + "Gaussian shape function not implemented yet"); + + } + else if(shapetype_m == "B-spline") { + + Kokkos::parallel_for("B-spline shape functions", + mdrange_type({0, 0, 0}, + {N[0], N[1], N[2]}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k) + { + + Vector iVec = {i, j, k}; + Vector kVec; + double Sk = 1.0; + for(size_t d = 0; d < Dim; ++d) { + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); + double khbytwo = kVec[d] * dx[d] / 2; + bool isNotZero = (khbytwo != 0.0); + double factor = (1.0 / (khbytwo + ((!isNotZero) * 1.0))); + double arg = isNotZero * (Kokkos::sin(khbytwo) * factor) + + (!isNotZero) * 1.0; + //Fourier transform of CIC + Sk *= std::pow(arg, order); + } + Skview(i+nghost, j+nghost, k+nghost) = Sk; + }); + } + else { + throw IpplException("initializeShapeFunctionPIF", + "Unrecognized shape function type"); + } + + } + + void LeapFrogPIC(ParticleAttrib& Rtemp, + ParticleAttrib& Ptemp, const unsigned int nt, + const double dt, const double& tStartMySlice, MPI_Comm& spaceComm) { + + static IpplTimings::TimerRef fieldSolvePIC = IpplTimings::getTimer("fieldSolvePIC"); + PLayout& PL = this->getLayout(); + rhoPIC_m = 0.0; + scatter(q, rhoPIC_m, Rtemp, spaceComm); + + rhoPIC_m = rhoPIC_m / (hr_m[0] * hr_m[1] * hr_m[2]); + rhoPIC_m = rhoPIC_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); + + //Field solve + solver_mp->solve(); + + // gather E field + gather(E, EfieldPIC_m, Rtemp); + + time_m = tStartMySlice; + + for (unsigned int it=0; itsolve(); + IpplTimings::stopTimer(fieldSolvePIC); + + // gather E field + gather(E, EfieldPIC_m, Rtemp); + + //kick + Ptemp = Ptemp - 0.5 * dt * E; + + time_m += dt; + } + + } + + void BorisPIC(ParticleAttrib& Rtemp, ParticleAttrib& Ptemp, const unsigned int nt, + const double dt, const double& tStartMySlice, const double& Bext, MPI_Comm& spaceComm) { + + static IpplTimings::TimerRef fieldSolvePIC = IpplTimings::getTimer("fieldSolvePIC"); + PLayout& PL = this->getLayout(); + rhoPIC_m = 0.0; + scatter(q, rhoPIC_m, Rtemp, spaceComm); + + rhoPIC_m = rhoPIC_m / (hr_m[0] * hr_m[1] * hr_m[2]); + rhoPIC_m = rhoPIC_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); + + //Field solve + solver_mp->solve(); + + // gather E field + gather(E, EfieldPIC_m, Rtemp); + + time_m = tStartMySlice; + + double alpha = -0.5 * dt; + double DrInv = 1.0 / (1 + (std::pow((alpha * Bext), 2))); + Vector_t rmax = rmax_m; + + for (unsigned int it=0; itgetLocalNum(), + KOKKOS_LAMBDA(const size_t j){ + double Eext_x = -(Rview(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_y = -(Rview(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_z = (Rview(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); + + Eext_x += Eview(j)[0]; + Eext_y += Eview(j)[1]; + Eext_z += Eview(j)[2]; + + Pview(j)[0] += alpha * (Eext_x + Pview(j)[1] * Bext); + Pview(j)[1] += alpha * (Eext_y - Pview(j)[0] * Bext); + Pview(j)[2] += alpha * Eext_z; + }); + + //drift + Rtemp = Rtemp + dt * Ptemp; + + //Apply particle BC + PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + + //scatter the charge onto the underlying grid + rhoPIC_m = 0.0; + scatter(q, rhoPIC_m, Rtemp, spaceComm); + + rhoPIC_m = rhoPIC_m / (hr_m[0] * hr_m[1] * hr_m[2]); + rhoPIC_m = rhoPIC_m - (Q_m/((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2]))); + + //Field solve + IpplTimings::startTimer(fieldSolvePIC); + solver_mp->solve(); + IpplTimings::stopTimer(fieldSolvePIC); + + // gather E field + gather(E, EfieldPIC_m, Rtemp); + + //kick + auto R2view = Rtemp.getView(); + auto P2view = Ptemp.getView(); + auto E2view = E.getView(); + Kokkos::parallel_for("Kick2", this->getLocalNum(), + KOKKOS_LAMBDA(const size_t j){ + double Eext_x = -(R2view(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_y = -(R2view(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_z = (R2view(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); + + Eext_x += E2view(j)[0]; + Eext_y += E2view(j)[1]; + Eext_z += E2view(j)[2]; + + P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (Eext_x + + P2view(j)[1] * Bext + alpha * Bext * Eext_y) ); + P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (Eext_y + - P2view(j)[0] * Bext - alpha * Bext * Eext_x) ); + P2view(j)[2] += alpha * Eext_z; + }); + + time_m += dt; + } + + } + + void LeapFrogPIF(ParticleAttrib& Rtemp, + ParticleAttrib& Ptemp, const unsigned int& nt, + const double& dt, const double& tStartMySlice, const unsigned& nc, + const unsigned int& iter, int rankTime, int rankSpace, + const std::string& propagator, MPI_Comm& spaceComm) { + + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + PLayout& PL = this->getLayout(); + rhoPIF_m = {0.0, 0.0}; + if(propagator == "Coarse") { + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Coarse_mp.get(), spaceComm); + } + else if(propagator == "Fine") { + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Fine_mp.get(), spaceComm); + } + + rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); + + // Solve for and gather E field + if(propagator == "Coarse") { + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Coarse_mp.get(), q); + } + else if(propagator == "Fine") { + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Fine_mp.get(), q); + } + + //Reset the value of q here as we used it as a temporary object in gather to + //save memory + q = Q_m / Np_m; + + time_m = tStartMySlice; + + if((time_m == 0.0) && (propagator == "Fine")) { + IpplTimings::startTimer(dumpData); + dumpFieldEnergy(nc, iter, rankTime, rankSpace); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + IpplTimings::stopTimer(dumpData); + } + for (unsigned int it=0; it& Rtemp, + ParticleAttrib& Ptemp, const unsigned int& nt, + const double& dt, const double& tStartMySlice, const unsigned& nc, + const unsigned int& iter, const double& Bext, + int rankTime, int rankSpace, + const std::string& propagator, MPI_Comm& spaceComm) { + + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + PLayout& PL = this->getLayout(); + rhoPIF_m = {0.0, 0.0}; + if(propagator == "Coarse") { + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Coarse_mp.get(), spaceComm); + } + else if(propagator == "Fine") { + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Fine_mp.get(), spaceComm); + } + + rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); + + // Solve for and gather E field + if(propagator == "Coarse") { + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Coarse_mp.get(), q); + } + else if(propagator == "Fine") { + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Fine_mp.get(), q); + } + + q = Q_m / Np_m; + + time_m = tStartMySlice; + + if((time_m == 0.0) && (propagator == "Fine")) { + IpplTimings::startTimer(dumpData); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + IpplTimings::stopTimer(dumpData); + } + double alpha = -0.5 * dt; + double DrInv = 1.0 / (1 + (std::pow((alpha * Bext), 2))); + Vector_t rmax = rmax_m; + for (unsigned int it=0; itgetLocalNum(), + KOKKOS_LAMBDA(const size_t j){ + double Eext_x = -(Rview(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_y = -(Rview(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_z = (Rview(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); + + Eext_x += Eview(j)[0]; + Eext_y += Eview(j)[1]; + Eext_z += Eview(j)[2]; + + Pview(j)[0] += alpha * (Eext_x + Pview(j)[1] * Bext); + Pview(j)[1] += alpha * (Eext_y - Pview(j)[0] * Bext); + Pview(j)[2] += alpha * Eext_z; + }); + + //drift + Rtemp = Rtemp + dt * Ptemp; + + //Apply particle BC + PL.applyBC(Rtemp, PL.getRegionLayout().getDomain()); + + //scatter the charge onto the underlying grid + rhoPIF_m = {0.0, 0.0}; + if(propagator == "Coarse") { + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Coarse_mp.get(), spaceComm); + } + else if(propagator == "Fine") { + scatterPIFNUFFT(q, rhoPIF_m, Sk_m, Rtemp, nufftType1Fine_mp.get(), spaceComm); + } + + rhoPIF_m = rhoPIF_m / ((rmax_m[0] - rmin_m[0]) * (rmax_m[1] - rmin_m[1]) * (rmax_m[2] - rmin_m[2])); + + // Solve for and gather E field + if(propagator == "Coarse") { + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Coarse_mp.get(), q); + } + else if(propagator == "Fine") { + gatherPIFNUFFT(E, rhoPIF_m, Sk_m, Rtemp, nufftType2Fine_mp.get(), q); + } + + q = Q_m / Np_m; + //kick + auto R2view = Rtemp.getView(); + auto P2view = Ptemp.getView(); + auto E2view = E.getView(); + Kokkos::parallel_for("Kick2", this->getLocalNum(), + KOKKOS_LAMBDA(const size_t j){ + double Eext_x = -(R2view(j)[0] - 0.5*rmax[0]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_y = -(R2view(j)[1] - 0.5*rmax[1]) * (V0/(2*std::pow(rmax[2],2))); + double Eext_z = (R2view(j)[2] - 0.5*rmax[2]) * (V0/(std::pow(rmax[2],2))); + + Eext_x += E2view(j)[0]; + Eext_y += E2view(j)[1]; + Eext_z += E2view(j)[2]; + + P2view(j)[0] = DrInv * ( P2view(j)[0] + alpha * (Eext_x + + P2view(j)[1] * Bext + alpha * Bext * Eext_y) ); + P2view(j)[1] = DrInv * ( P2view(j)[1] + alpha * (Eext_y + - P2view(j)[0] * Bext - alpha * Bext * Eext_x) ); + P2view(j)[2] += alpha * Eext_z; + }); + + time_m += dt; + + if(propagator == "Fine") { + IpplTimings::startTimer(dumpData); + dumpEnergy(nc, iter, Ptemp, rankTime, rankSpace, spaceComm); + IpplTimings::stopTimer(dumpData); + } + } + } + +private: + void setBCAllPeriodic() { + this->setParticleBC(ippl::BC::PERIODIC); + } + +}; diff --git a/alpine/PinT/LandauDampingPinT.cpp b/alpine/PinT/LandauDampingPinT.cpp new file mode 100644 index 000000000..99f625eb9 --- /dev/null +++ b/alpine/PinT/LandauDampingPinT.cpp @@ -0,0 +1,721 @@ +// Parallel-in-time (PinT) method Parareal combined with Particle-in-cell +// and Particle-in-Fourier schemes. The example is electrostatic Landau +// damping. The implementation of Parareal follows the open source implementation +// https://github.com/Parallel-in-Time/PararealF90 by Daniel Ruprecht. The corresponding +// publication is Ruprecht, Daniel. "Shared memory pipelined parareal." +// European Conference on Parallel Processing. Springer, Cham, 2017. +// +// Usage: +// srun ./LandauDampingPinT +// +// --info 5 +// nmx = No. of Fourier modes in the x-direction +// nmy = No. of Fourier modes in the y-direction +// nmz = No. of Fourier modes in the z-direction +// nx = No. of grid points in the x-direction (not used if PIF is also used as coarse propagator) +// ny = No. of grid points in the y-direction (not used if PIF is also used as coarse propagator) +// nz = No. of grid points in the z-direction (not used if PIF is also used as coarse propagator) +// Np = Total no. of macro-particles in the simulation +// tolParareal = Parareal tolerance +// nCycles = No. of Parareal blocks/cycles +// ShapeType = Shape function type B-spline only for the moment +// degree = B-spline degree (-1 for delta function) +// No. of space procs = Number of MPI ranks to be used in the spatial parallelization +// No. of time procs = Number of MPI ranks to be used in the time parallelization +// coarseTol = Coarse tolerance for PIF if we use PIF as a coarse propagator (will not be used when PIC is used) +// fineTol = fine tolerance for PIF +// coarseType = Type of coarse propagator (PIF or PIC) +// Example: +// srun ./LandauDampingPinT 32 32 32 16 16 16 655360 19.2 0.05 0.05 1e-5 1 B-spline 1 4 16 1e-2 1e-4 PIC --info 5 +// +// Copyright (c) 2022, Sriramkrishnan Muralikrishnan, +// Jülich Supercomputing Centre, Jülich, Germany. +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "ChargedParticlesPinT.hpp" +#include "StatesSlice.hpp" +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "Utility/IpplTimings.h" + +template +struct Newton1D { + + double tol = 1e-12; + int max_iter = 20; + double pi = std::acos(-1.0); + + T k, alpha, u; + + KOKKOS_INLINE_FUNCTION + Newton1D() {} + + KOKKOS_INLINE_FUNCTION + Newton1D(const T& k_, const T& alpha_, + const T& u_) + : k(k_), alpha(alpha_), u(u_) {} + + KOKKOS_INLINE_FUNCTION + ~Newton1D() {} + + KOKKOS_INLINE_FUNCTION + T f(T& x) { + T F; + F = x + (alpha * (std::sin(k * x) / k)) - u; + return F; + } + + KOKKOS_INLINE_FUNCTION + T fprime(T& x) { + T Fprime; + Fprime = 1 + (alpha * std::cos(k * x)); + return Fprime; + } + + KOKKOS_FUNCTION + void solve(T& x) { + int iterations = 0; + while (iterations < max_iter && std::fabs(f(x)) > tol) { + x = x - (f(x)/fprime(x)); + iterations += 1; + } + } +}; + + +template +struct generate_random { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x, v; + + // The GeneratorPool + GeneratorPool rand_pool; + + //value_type alpha; + + T alpha, k, minU, maxU; + + // Initialize all members + generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, + T& alpha_, T& k_, T& minU_, T& maxU_) + : x(x_), v(v_), rand_pool(rand_pool_), + alpha(alpha_), k(k_), minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + value_type u; + for (unsigned d = 0; d < Dim; ++d) { + + u = rand_gen.drand(minU[d], maxU[d]); + x(i)[d] = u / (1 + alpha[d]); + Newton1D solver(k[d], alpha[d], u); + solver.solve(x(i)[d]); + v(i)[d] = rand_gen.normal(0.0, 1.0); + } + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +double CDF(const double& x, const double& alpha, const double& k) { + double cdf = x + (alpha / k) * std::sin(k * x); + return cdf; +} + +double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, + Vector_t& length, MPI_Comm& spaceComm) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + + Kokkos::parallel_reduce("Abs. error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + + //This is just to undo the effect of periodic BCs during the + //error calculation. Otherwise even though the actual error is + //small the computed error might be very large. + //The values (e.g. 10) mentioned here are just an adhoc + //value depending on the domain length. + for (unsigned d = 0; d < 3; ++d) { + bool isLeft = (diff[d] <= -10.0); + bool isRight = (diff[d] >= 10.0); + bool isInside = ((diff[d] > -10.0) && (diff[d] < 10.0)); + diff[d] = (isInside * diff[d]) + (isLeft * (diff[d] + length[d])) + +(isRight * (diff[d] - length[d])); + } + + double myValError = dot(diff, diff).apply(); + valLError += myValError; + double myValnorm = dot(Qview(i), Qview(i)).apply(); + valLnorm += myValnorm; + }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); + + Kokkos::fence(); + double globalError = 0.0; + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + double globalNorm = 0.0; + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + + double relError = std::sqrt(globalError) / std::sqrt(globalNorm); + + return relError; + +} + +double computePL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, MPI_Comm& spaceComm) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + + Kokkos::parallel_reduce("Abs. error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + valLError += myValError; + double myValnorm = dot(Qview(i), Qview(i)).apply(); + valLnorm += myValnorm; + }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); + + Kokkos::fence(); + double globalError = 0.0; + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + double globalNorm = 0.0; + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + + double relError = std::sqrt(globalError) / std::sqrt(globalNorm); + + return relError; + +} + +const char* TestName = "LandauDampingPinT"; + +int main(int argc, char *argv[]){ + Ippl ippl(argc, argv); + + int spaceColor, timeColor; + MPI_Comm spaceComm, timeComm; + + int spaceProcs = std::atoi(argv[15]); + int timeProcs = std::atoi(argv[16]); + spaceColor = Ippl::Comm->rank() / spaceProcs; + timeColor = Ippl::Comm->rank() % spaceProcs; + + MPI_Comm_split(Ippl::getComm(), spaceColor, Ippl::Comm->rank(), &spaceComm); + MPI_Comm_split(Ippl::getComm(), timeColor, Ippl::Comm->rank(), &timeComm); + + int rankSpace, sizeSpace, rankTime, sizeTime; + MPI_Comm_rank(spaceComm, &rankSpace); + MPI_Comm_size(spaceComm, &sizeSpace); + + MPI_Comm_rank(timeComm, &rankTime); + MPI_Comm_size(timeComm, &sizeTime); + + Inform msg(TestName, Ippl::Comm->size()-1); + Inform msg2all(TestName,INFORM_ALL_NODES); + + ippl::Vector nmPIF = { + std::atoi(argv[1]), + std::atoi(argv[2]), + std::atoi(argv[3]) + }; + + ippl::Vector nrPIC = { + std::atoi(argv[4]), + std::atoi(argv[5]), + std::atoi(argv[6]) + }; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef timeCommunication = IpplTimings::getTimer("timeCommunication"); + static IpplTimings::TimerRef deepCopy = IpplTimings::getTimer("deepCopy"); + static IpplTimings::TimerRef finePropagator = IpplTimings::getTimer("finePropagator"); + static IpplTimings::TimerRef coarsePropagator = IpplTimings::getTimer("coarsePropagator"); + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); + static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); + + IpplTimings::startTimer(mainTimer); + + const size_type totalP = std::atoll(argv[7]); + const double tEnd = std::atof(argv[8]); + const unsigned int nCycles = std::atoi(argv[12]); + double tEndCycle = tEnd / nCycles; + const double dtSlice = tEndCycle / sizeTime; + const double dtFine = std::atof(argv[9]); + const double dtCoarse = std::atof(argv[10]); + const unsigned int ntFine = std::ceil(dtSlice / dtFine); + const unsigned int ntCoarse = std::ceil(dtSlice / dtCoarse); + const double tol = std::atof(argv[11]); + + + using bunch_type = ChargedParticlesPinT; + using states_type = StatesSlice; + + std::unique_ptr Pcoarse; + std::unique_ptr Pbegin; + std::unique_ptr Pend; + + ippl::NDIndex domainPIC; + ippl::NDIndex domainPIF; + for (unsigned i = 0; i< Dim; i++) { + domainPIC[i] = ippl::Index(nrPIC[i]); + domainPIF[i] = ippl::Index(nmPIF[i]); + } + + ippl::e_dim_tag decomp[Dim]; + for (unsigned d = 0; d < Dim; ++d) { + decomp[d] = ippl::SERIAL; + } + + // create mesh and layout objects for this problem domain + Vector_t kw = {0.5, 0.5, 0.5}; + //Vector_t kw = {1.0, 1.0, 1.0}; + Vector_t alpha = {0.05, 0.05, 0.05}; + //Vector_t alpha = {0.5, 0.5, 0.5}; + Vector_t rmin(0.0); + Vector_t rmax = 2 * pi / kw ; + Vector_t length = rmax - rmin; + double dxPIC = length[0] / nrPIC[0]; + double dyPIC = length[1] / nrPIC[1]; + double dzPIC = length[2] / nrPIC[2]; + + + double dxPIF = length[0] / nmPIF[0]; + double dyPIF = length[1] / nmPIF[1]; + double dzPIF = length[2] / nmPIF[2]; + Vector_t hrPIC = {dxPIC, dyPIC, dzPIC}; + Vector_t hrPIF = {dxPIF, dyPIF, dzPIF}; + Vector_t origin = {rmin[0], rmin[1], rmin[2]}; + + const bool isAllPeriodic=true; + Mesh_t meshPIC(domainPIC, hrPIC, origin); + Mesh_t meshPIF(domainPIF, hrPIF, origin); + FieldLayout_t FLPIC(domainPIC, decomp, isAllPeriodic); + FieldLayout_t FLPIF(domainPIF, decomp, isAllPeriodic); + PLayout_t PL(FLPIC, meshPIC); + + + double factor = 1.0 / sizeSpace; + size_type nloc = (size_type)(factor * totalP); + + size_type Total_particles = 0; + + MPI_Allreduce(&nloc, &Total_particles, 1, + MPI_UNSIGNED_LONG, MPI_SUM, spaceComm); + + //Q = -\int\int f dx dv + double Q = -length[0] * length[1] * length[2]; + Pcoarse = std::make_unique(PL,hrPIC,rmin,rmax,decomp,Q,Total_particles); + Pbegin = std::make_unique(PL); + Pend = std::make_unique(PL); + + Pcoarse->nr_m = nrPIC; + Pcoarse->nm_m = nmPIF; + + Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); + Pcoarse->Sk_m.initialize(meshPIF, FLPIF); + + Pcoarse->coarsetype_m = argv[19]; + + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->initFFTSolver(); + } + + //////////////////////////////////////////////////////////// + //Initialize an FFT object for getting rho in real space and + //doing charge conservation check + + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_reorder", false); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::p2p_pl); + fftParams.add("r2c_direction", 0); + + ippl::NDIndex domainPIFhalf; + + for(unsigned d = 0; d < Dim; ++d) { + if(fftParams.template get("r2c_direction") == (int)d) + domainPIFhalf[d] = ippl::Index(domainPIF[d].length()/2 + 1); + else + domainPIFhalf[d] = ippl::Index(domainPIF[d].length()); + } + + + FieldLayout_t FLPIFhalf(domainPIFhalf, decomp); + + ippl::Vector hDummy = {1.0, 1.0, 1.0}; + ippl::Vector originDummy = {0.0, 0.0, 0.0}; + Mesh_t meshPIFhalf(domainPIFhalf, hDummy, originDummy); + + Pcoarse->rhoPIFreal_m.initialize(meshPIF, FLPIF); + Pcoarse->rhoPIFhalf_m.initialize(meshPIFhalf, FLPIFhalf); + + Pcoarse->fft_mp = std::make_shared(FLPIF, FLPIFhalf, fftParams); + + //////////////////////////////////////////////////////////// + + + Vector_t minU, maxU; + for (unsigned d = 0; d shapetype_m = argv[13]; + Pcoarse->shapedegree_m = std::atoi(argv[14]); + IpplTimings::startTimer(initializeShapeFunctionPIF); + Pcoarse->initializeShapeFunctionPIF(); + IpplTimings::stopTimer(initializeShapeFunctionPIF); + + IpplTimings::startTimer(particleCreation); + + Pcoarse->create(nloc); + Pbegin->create(nloc); + Pend->create(nloc); + + Pcoarse->q = Pcoarse->Q_m/Total_particles; + + IpplTimings::stopTimer(particleCreation); + + double coarseTol = std::atof(argv[17]); + double fineTol = std::atof(argv[18]); + Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); + std::string coarse = "Coarse"; + std::string fine = "Fine"; + + IpplTimings::startTimer(particleCreation); + +#ifdef KOKKOS_ENABLE_CUDA + //If we don't do the following even with the same seed the initial + //condition is not the same on different GPUs + + + IpplTimings::startTimer(timeCommunication); + //For some reason using the next_tag with multiple cycles is not + //working so we use static tags here + tag = 500;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if(rankTime == 0) { + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); + //Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(79 + 100*rankSpace)); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, alpha, kw, minU, maxU)); + + + Kokkos::fence(); + } + else { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); + buf->resetReadPos(); + } + IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(coarsePropagator); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, spaceComm); + } + else { + //PIF with coarse tolerance as coarse propagator + Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, 0, 0, coarse, spaceComm); + } + IpplTimings::stopTimer(coarsePropagator); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(timeCommunication); + if(rankTime < sizeTime-1) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(rankTime+1, tag, *Pend, *buf, request, nloc, timeComm); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + IpplTimings::stopTimer(timeCommunication); +#else + //Note the CPU version has not been tested. + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + Pcoarse->R.getView(), Pcoarse->P.getView(), rand_pool64, alpha, kw, minU, maxU)); + + Kokkos::fence(); + Ippl::Comm->barrier(); +#endif + + msg << "Parareal " + << TestName + << endl + << "Slice dT: " << dtSlice + << endl + << "No. of fine time steps: " << ntFine + << endl + << "No. of coarse time steps: " << ntCoarse + << endl + << "Tolerance: " << tol + << " No. of cycles: " << nCycles + << endl + << "Np= " << Total_particles + << " Fourier modes = " << nmPIF + << " Grid points = " << nrPIC + << endl; + + + IpplTimings::stopTimer(particleCreation); + + msg << "particles created and initial conditions assigned " << endl; + + int sign = 1; + for (unsigned int nc=0; nc < nCycles; nc++) { + double tStartMySlice; + bool sendCriteria, recvCriteria; + bool isConverged = false; + bool isPreviousDomainConverged = false; + + //even cycles + if(nc % 2 == 0) { + sendCriteria = (rankTime < (sizeTime-1)); + recvCriteria = (rankTime > 0); + if(rankTime == 0) { + isPreviousDomainConverged = true; + } + tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); + msg.setPrintNode(Ippl::Comm->size()-1); + } + //odd cycles + else { + recvCriteria = (rankTime < (sizeTime-1)); + sendCriteria = (rankTime > 0); + if(rankTime == (sizeTime - 1)) { + isPreviousDomainConverged = true; + } + tStartMySlice = (nc * tEndCycle) + (((sizeTime - 1) - rankTime) * dtSlice); + msg.setPrintNode(0); + } + + unsigned int it = 0; + while (!isConverged) { + //Run fine integrator in parallel + IpplTimings::startTimer(finePropagator); + Pcoarse->LeapFrogPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, rankTime, rankSpace, fine, spaceComm); + IpplTimings::stopTimer(finePropagator); + + + //Difference = Fine - Coarse + Pend->R = Pbegin->R - Pcoarse->R; + Pend->P = Pbegin->P - Pcoarse->P; + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(timeCommunication); + tag = 1100;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + int tagbool = 1300;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if(recvCriteria && (!isPreviousDomainConverged)) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(rankTime-sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); + buf->resetReadPos(); + MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, rankTime-sign, tagbool, + timeComm, MPI_STATUS_IGNORE); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + } + IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(coarsePropagator); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->LeapFrogPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + } + else { + Pcoarse->LeapFrogPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + } + IpplTimings::stopTimer(coarsePropagator); + + Pend->R = Pend->R + Pcoarse->R; + Pend->P = Pend->P + Pcoarse->P; + + PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); + + IpplTimings::startTimer(computeErrors); + double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, length, spaceComm); + double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, spaceComm); + IpplTimings::stopTimer(computeErrors); + + + if((Rerror <= tol) && (Perror <= tol) && isPreviousDomainConverged) { + isConverged = true; + } + + IpplTimings::startTimer(timeCommunication); + if(sendCriteria) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(rankTime+sign, tag, *Pend, *buf, request, nloc, timeComm); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + MPI_Send(&isConverged, 1, MPI_C_BOOL, rankTime+sign, tagbool, timeComm); + } + IpplTimings::stopTimer(timeCommunication); + + + msg << "Finished iteration: " << it+1 + << " in cycle: " << nc+1 + << " Rerror: " << Rerror + << " Perror: " << Perror + << endl; + + IpplTimings::startTimer(dumpData); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + IpplTimings::stopTimer(dumpData); + + MPI_Barrier(spaceComm); + + it += 1; + } + + MPI_Barrier(MPI_COMM_WORLD); + if((nCycles > 1) && (nc < (nCycles - 1))) { + tag = 1000;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + //send, receive criteria and tStartMySlice are reversed at the end of the cycle + if(nc % 2 == 0) { + recvCriteria = (rankTime < (sizeTime-1)); + sendCriteria = (rankTime > 0); + tStartMySlice = (nc * tEndCycle) + (((sizeTime - 1) - rankTime) * dtSlice); + } + //odd cycles + else { + sendCriteria = (rankTime < (sizeTime-1)); + recvCriteria = (rankTime > 0); + tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); + } + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + + IpplTimings::startTimer(timeCommunication); + if(recvCriteria) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(rankTime+sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); + buf->resetReadPos(); + } + IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(coarsePropagator); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->LeapFrogPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, spaceComm); + } + else { + Pcoarse->LeapFrogPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, 0, 0, coarse, spaceComm); + } + IpplTimings::stopTimer(coarsePropagator); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + + IpplTimings::startTimer(timeCommunication); + if(sendCriteria) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(rankTime-sign, tag, *Pend, *buf, request, nloc, timeComm); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + IpplTimings::stopTimer(timeCommunication); + sign *= -1; + } + } + + msg << TestName << " Parareal: End." << endl; + IpplTimings::stopTimer(mainTimer); + IpplTimings::print(); + IpplTimings::print(std::string("timing.dat")); + + MPI_Comm_free(&spaceComm); + MPI_Comm_free(&timeComm); + + + return 0; +} diff --git a/alpine/PinT/PenningTrapPinT.cpp b/alpine/PinT/PenningTrapPinT.cpp new file mode 100644 index 000000000..270fba8c2 --- /dev/null +++ b/alpine/PinT/PenningTrapPinT.cpp @@ -0,0 +1,717 @@ +// Parallel-in-time (PinT) method Parareal combined with Particle-in-cell +// and Particle-in-Fourier schemes. The example is electrostatic Landau +// damping. The implementation of Parareal follows the open source implementation +// https://github.com/Parallel-in-Time/PararealF90 by Daniel Ruprecht. The corresponding +// publication is Ruprecht, Daniel. "Shared memory pipelined parareal." +// European Conference on Parallel Processing. Springer, Cham, 2017. +// +// Usage: +// srun ./PenningTrapPinT +// +// --info 5 +// nmx = No. of Fourier modes in the x-direction +// nmy = No. of Fourier modes in the y-direction +// nmz = No. of Fourier modes in the z-direction +// nx = No. of grid points in the x-direction (not used if PIF is also used as coarse propagator) +// ny = No. of grid points in the y-direction (not used if PIF is also used as coarse propagator) +// nz = No. of grid points in the z-direction (not used if PIF is also used as coarse propagator) +// Np = Total no. of macro-particles in the simulation +// tolParareal = Parareal tolerance +// nCycles = No. of Parareal blocks/cycles +// ShapeType = Shape function type B-spline only for the moment +// degree = B-spline degree (-1 for delta function) +// No. of space procs = Number of MPI ranks to be used in the spatial parallelization +// No. of time procs = Number of MPI ranks to be used in the time parallelization +// coarseTol = Coarse tolerance for PIF if we use PIF as a coarse propagator (will not be used when PIC is used) +// fineTol = fine tolerance for PIF +// coarseType = Type of coarse propagator (PIF or PIC) +// Example: +// srun ./PenningTrapPinT 32 32 32 16 16 16 655360 19.2 0.05 0.05 1e-5 1 B-spline 1 4 16 1e-2 1e-4 PIC --info 5 +// +// Copyright (c) 2022, Sriramkrishnan Muralikrishnan, +// Jülich Supercomputing Centre, Jülich, Germany. +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + +#include "ChargedParticlesPinT.hpp" +#include "StatesSlice.hpp" +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "Utility/IpplTimings.h" + +template +struct Newton1D { + + double tol = 1e-12; + int max_iter = 20; + double pi = std::acos(-1.0); + + T mu, sigma, u; + + KOKKOS_INLINE_FUNCTION + Newton1D() {} + + KOKKOS_INLINE_FUNCTION + Newton1D(const T& mu_, const T& sigma_, + const T& u_) + : mu(mu_), sigma(sigma_), u(u_) {} + + KOKKOS_INLINE_FUNCTION + ~Newton1D() {} + + KOKKOS_INLINE_FUNCTION + T f(T& x) { + T F; + F = std::erf((x - mu)/(sigma * std::sqrt(2.0))) + - 2 * u + 1; + return F; + } + + KOKKOS_INLINE_FUNCTION + T fprime(T& x) { + T Fprime; + Fprime = (1 / sigma) * std::sqrt(2 / pi) * + std::exp(-0.5 * (std::pow(((x - mu) / sigma),2))); + return Fprime; + } + + KOKKOS_FUNCTION + void solve(T& x) { + int iterations = 0; + while ((iterations < max_iter) && (std::fabs(f(x)) > tol)) { + x = x - (f(x)/fprime(x)); + iterations += 1; + } + } +}; + + +template +struct generate_random { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x, v; + + // The GeneratorPool + GeneratorPool rand_pool; + + T mu, sigma, minU, maxU; + + double pi = std::acos(-1.0); + + // Initialize all members + generate_random(view_type x_, view_type v_, GeneratorPool rand_pool_, + T& mu_, T& sigma_, T& minU_, T& maxU_) + : x(x_), v(v_), rand_pool(rand_pool_), + mu(mu_), sigma(sigma_), minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + value_type u; + for (unsigned d = 0; d < Dim; ++d) { + u = rand_gen.drand(minU[d], maxU[d]); + x(i)[d] = (std::sqrt(pi / 2) * (2 * u - 1)) * + sigma[d] + mu[d]; + Newton1D solver(mu[d], sigma[d], u); + solver.solve(x(i)[d]); + v(i)[d] = rand_gen.normal(0.0, 1.0); + } + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +double CDF(const double& x, const double& mu, const double& sigma) { + double cdf = 0.5 * (1.0 + std::erf((x - mu)/(sigma * std::sqrt(2)))); + return cdf; +} + +double computeRL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, + Vector_t& length, MPI_Comm& spaceComm) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + + Kokkos::parallel_reduce("Abs. error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + + //This is just to undo the effect of periodic BCs during the + //error calculation. Otherwise even though the actual error is + //small the computed error might be very large. + //The values (e.g. 22) mentioned here are just an adhoc + //value depending on the domain length. + for (unsigned d = 0; d < 3; ++d) { + bool isLeft = (diff[d] <= -22.0); + bool isRight = (diff[d] >= 22.0); + bool isInside = ((diff[d] > -22.0) && (diff[d] < 22.0)); + diff[d] = (isInside * diff[d]) + (isLeft * (diff[d] + length[d])) + +(isRight * (diff[d] - length[d])); + } + + double myValError = dot(diff, diff).apply(); + valLError += myValError; + double myValnorm = dot(Qview(i), Qview(i)).apply(); + valLnorm += myValnorm; + }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); + + Kokkos::fence(); + double globalError = 0.0; + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + double globalNorm = 0.0; + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + + double relError = std::sqrt(globalError) / std::sqrt(globalNorm); + + return relError; + +} + +double computePL2Error(ParticleAttrib& Q, ParticleAttrib& QprevIter, MPI_Comm& spaceComm) { + + auto Qview = Q.getView(); + auto QprevIterView = QprevIter.getView(); + double localError = 0.0; + double localNorm = 0.0; + + Kokkos::parallel_reduce("Abs. error and norm", Q.size(), + KOKKOS_LAMBDA(const int i, double& valLError, double& valLnorm){ + Vector_t diff = Qview(i) - QprevIterView(i); + double myValError = dot(diff, diff).apply(); + valLError += myValError; + double myValnorm = dot(Qview(i), Qview(i)).apply(); + valLnorm += myValnorm; + }, Kokkos::Sum(localError), Kokkos::Sum(localNorm)); + + Kokkos::fence(); + double globalError = 0.0; + MPI_Allreduce(&localError, &globalError, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + double globalNorm = 0.0; + MPI_Allreduce(&localNorm, &globalNorm, 1, MPI_DOUBLE, MPI_SUM, spaceComm); + + double relError = std::sqrt(globalError) / std::sqrt(globalNorm); + + return relError; + +} + +const char* TestName = "PenningTrapPinT"; + +int main(int argc, char *argv[]){ + Ippl ippl(argc, argv); + + int spaceColor, timeColor; + MPI_Comm spaceComm, timeComm; + + int spaceProcs = std::atoi(argv[15]); + int timeProcs = std::atoi(argv[16]); + spaceColor = Ippl::Comm->rank() / spaceProcs; + timeColor = Ippl::Comm->rank() % spaceProcs; + + MPI_Comm_split(Ippl::getComm(), spaceColor, Ippl::Comm->rank(), &spaceComm); + MPI_Comm_split(Ippl::getComm(), timeColor, Ippl::Comm->rank(), &timeComm); + + int rankSpace, sizeSpace, rankTime, sizeTime; + MPI_Comm_rank(spaceComm, &rankSpace); + MPI_Comm_size(spaceComm, &sizeSpace); + + MPI_Comm_rank(timeComm, &rankTime); + MPI_Comm_size(timeComm, &sizeTime); + + Inform msg(TestName, Ippl::Comm->size()-1); + Inform msg2all(TestName,INFORM_ALL_NODES); + + ippl::Vector nmPIF = { + std::atoi(argv[1]), + std::atoi(argv[2]), + std::atoi(argv[3]) + }; + + ippl::Vector nrPIC = { + std::atoi(argv[4]), + std::atoi(argv[5]), + std::atoi(argv[6]) + }; + + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("mainTimer"); + static IpplTimings::TimerRef particleCreation = IpplTimings::getTimer("particlesCreation"); + static IpplTimings::TimerRef timeCommunication = IpplTimings::getTimer("timeCommunication"); + static IpplTimings::TimerRef deepCopy = IpplTimings::getTimer("deepCopy"); + static IpplTimings::TimerRef finePropagator = IpplTimings::getTimer("finePropagator"); + static IpplTimings::TimerRef coarsePropagator = IpplTimings::getTimer("coarsePropagator"); + static IpplTimings::TimerRef dumpData = IpplTimings::getTimer("dumpData"); + static IpplTimings::TimerRef computeErrors = IpplTimings::getTimer("computeErrors"); + static IpplTimings::TimerRef initializeShapeFunctionPIF = IpplTimings::getTimer("initializeShapeFunctionPIF"); + + IpplTimings::startTimer(mainTimer); + + const size_type totalP = std::atoll(argv[7]); + const double tEnd = std::atof(argv[8]); + const unsigned int nCycles = std::atoi(argv[12]); + double tEndCycle = tEnd / nCycles; + const double dtSlice = tEndCycle / sizeTime; + const double dtFine = std::atof(argv[9]); + const double dtCoarse = std::atof(argv[10]); + const unsigned int ntFine = std::ceil(dtSlice / dtFine); + const unsigned int ntCoarse = std::ceil(dtSlice / dtCoarse); + const double tol = std::atof(argv[11]); + + using bunch_type = ChargedParticlesPinT; + using states_type = StatesSlice; + + std::unique_ptr Pcoarse; + std::unique_ptr Pbegin; + std::unique_ptr Pend; + + ippl::NDIndex domainPIC; + ippl::NDIndex domainPIF; + for (unsigned i = 0; i< Dim; i++) { + domainPIC[i] = ippl::Index(nrPIC[i]); + domainPIF[i] = ippl::Index(nmPIF[i]); + } + + ippl::e_dim_tag decomp[Dim]; + for (unsigned d = 0; d < Dim; ++d) { + decomp[d] = ippl::SERIAL; + } + + // create mesh and layout objects for this problem domain + Vector_t rmin(0.0); + Vector_t rmax(25.0); + Vector_t length = rmax - rmin; + double dxPIC = length[0] / nrPIC[0]; + double dyPIC = length[1] / nrPIC[1]; + double dzPIC = length[2] / nrPIC[2]; + + Vector_t mu, sd; + + for (unsigned d = 0; d(PL,hrPIC,rmin,rmax,decomp,Q,Total_particles); + Pbegin = std::make_unique(PL); + Pend = std::make_unique(PL); + + Pcoarse->nr_m = nrPIC; + Pcoarse->nm_m = nmPIF; + + Pcoarse->rhoPIF_m.initialize(meshPIF, FLPIF); + Pcoarse->Sk_m.initialize(meshPIF, FLPIF); + + Pcoarse->coarsetype_m = argv[19]; + + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->rhoPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->EfieldPIC_m.initialize(meshPIC, FLPIC); + Pcoarse->initFFTSolver(); + } + + //////////////////////////////////////////////////////////// + //Initialize an FFT object for getting rho in real space and + //doing charge conservation check + + ippl::ParameterList fftParams; + fftParams.add("use_heffte_defaults", false); + fftParams.add("use_pencils", true); + fftParams.add("use_reorder", false); + fftParams.add("use_gpu_aware", true); + fftParams.add("comm", ippl::p2p_pl); + fftParams.add("r2c_direction", 0); + + ippl::NDIndex domainPIFhalf; + + for(unsigned d = 0; d < Dim; ++d) { + if(fftParams.template get("r2c_direction") == (int)d) + domainPIFhalf[d] = ippl::Index(domainPIF[d].length()/2 + 1); + else + domainPIFhalf[d] = ippl::Index(domainPIF[d].length()); + } + + + FieldLayout_t FLPIFhalf(domainPIFhalf, decomp); + + ippl::Vector hDummy = {1.0, 1.0, 1.0}; + ippl::Vector originDummy = {0.0, 0.0, 0.0}; + Mesh_t meshPIFhalf(domainPIFhalf, hDummy, originDummy); + + Pcoarse->rhoPIFreal_m.initialize(meshPIF, FLPIF); + Pcoarse->rhoPIFhalf_m.initialize(meshPIFhalf, FLPIFhalf); + + Pcoarse->fft_mp = std::make_shared(FLPIF, FLPIFhalf, fftParams); + + //////////////////////////////////////////////////////////// + + Vector_t minU, maxU; + for (unsigned d = 0; d shapetype_m = argv[13]; + Pcoarse->shapedegree_m = std::atoi(argv[14]); + IpplTimings::startTimer(initializeShapeFunctionPIF); + Pcoarse->initializeShapeFunctionPIF(); + IpplTimings::stopTimer(initializeShapeFunctionPIF); + + IpplTimings::startTimer(particleCreation); + + Pcoarse->create(nloc); + Pbegin->create(nloc); + Pend->create(nloc); + + Pcoarse->q = Pcoarse->Q_m/Total_particles; + + IpplTimings::stopTimer(particleCreation); + + double coarseTol = std::atof(argv[17]); + double fineTol = std::atof(argv[18]); + Pcoarse->initNUFFTs(FLPIF, coarseTol, fineTol); + std::string coarse = "Coarse"; + std::string fine = "Fine"; + + IpplTimings::startTimer(particleCreation); +#ifdef KOKKOS_ENABLE_CUDA + //If we don't do the following even with the same seed the initial + //condition is not the same on different GPUs + + IpplTimings::startTimer(timeCommunication); + //For some reason using the next_tag with multiple cycles is not + //working so we use static tags here + tag = 500;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if(rankTime == 0) { + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42 + 100*rankSpace)); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + Pbegin->R.getView(), Pbegin->P.getView(), rand_pool64, mu, sd, + minU, maxU)); + + + Kokkos::fence(); + } + else { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(rankTime-1, tag, *Pbegin, *buf, bufSize, nloc, timeComm); + buf->resetReadPos(); + } + IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(coarsePropagator); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, Bext, spaceComm); + } + else { + Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, rankTime * dtSlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + } + IpplTimings::stopTimer(coarsePropagator); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(timeCommunication); + if(rankTime < sizeTime-1) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(rankTime+1, tag, *Pend, *buf, request, nloc, timeComm); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + IpplTimings::stopTimer(timeCommunication); +#else + //Note the CPU version has not been tested. + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(0)); + Kokkos::parallel_for(nloc, + generate_random, Dim>( + Pcoarse->R.getView(), Pcoarse->P.getView(), rand_pool64, mu, sd, + minU, maxU)); + + Kokkos::fence(); + Ippl::Comm->barrier(); +#endif + + msg << "Parareal " + << TestName + << endl + << "Slice dT: " << dtSlice + << endl + << "No. of fine time steps: " << ntFine + << endl + << "No. of coarse time steps: " << ntCoarse + << endl + << "Tolerance: " << tol + << " No. of cycles: " << nCycles + << endl + << "Np= " << Total_particles + << " Fourier modes = " << nmPIF + << " Grid points = " << nrPIC + << endl; + + IpplTimings::stopTimer(particleCreation); + + msg << "particles created and initial conditions assigned " << endl; + + int sign = 1; + for (unsigned int nc=0; nc < nCycles; nc++) { + double tStartMySlice; + bool sendCriteria, recvCriteria; + bool isConverged = false; + bool isPreviousDomainConverged = false; + + //even cycles + if(nc % 2 == 0) { + sendCriteria = (rankTime < (sizeTime-1)); + recvCriteria = (rankTime > 0); + if(rankTime == 0) { + isPreviousDomainConverged = true; + } + tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); + msg.setPrintNode(Ippl::Comm->size()-1); + } + //odd cycles + else { + recvCriteria = (rankTime < (sizeTime-1)); + sendCriteria = (rankTime > 0); + if(rankTime == (sizeTime - 1)) { + isPreviousDomainConverged = true; + } + tStartMySlice = (nc * tEndCycle) + (((sizeTime - 1) - rankTime) * dtSlice); + msg.setPrintNode(0); + } + + unsigned int it = 0; + while (!isConverged) { + //Run fine integrator in parallel + IpplTimings::startTimer(finePropagator); + Pcoarse->BorisPIF(Pbegin->R, Pbegin->P, ntFine, dtFine, tStartMySlice, nc+1, it+1, + Bext, rankTime, rankSpace, fine, spaceComm); + IpplTimings::stopTimer(finePropagator); + + + //Difference = Fine - Coarse + Pend->R = Pbegin->R - Pcoarse->R; + Pend->P = Pbegin->P - Pcoarse->P; + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->RprevIter.getView(), Pcoarse->R.getView()); + Kokkos::deep_copy(Pcoarse->PprevIter.getView(), Pcoarse->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(timeCommunication); + tag = 1100;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + int tagbool = 1300;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + if(recvCriteria && (!isPreviousDomainConverged)) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(rankTime-sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); + buf->resetReadPos(); + MPI_Recv(&isPreviousDomainConverged, 1, MPI_C_BOOL, rankTime-sign, tagbool, + timeComm, MPI_STATUS_IGNORE); + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + } + IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pcoarse->R0.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pcoarse->P0.getView()); + Kokkos::deep_copy(Pcoarse->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(coarsePropagator); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->BorisPIC(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + } + else { + Pcoarse->BorisPIF(Pcoarse->R, Pcoarse->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + } + IpplTimings::stopTimer(coarsePropagator); + + Pend->R = Pend->R + Pcoarse->R; + Pend->P = Pend->P + Pcoarse->P; + + PL.applyBC(Pend->R, PL.getRegionLayout().getDomain()); + IpplTimings::startTimer(computeErrors); + double Rerror = computeRL2Error(Pcoarse->R, Pcoarse->RprevIter, length, spaceComm); + double Perror = computePL2Error(Pcoarse->P, Pcoarse->PprevIter, spaceComm); + IpplTimings::stopTimer(computeErrors); + + if((Rerror <= tol) && (Perror <= tol) && isPreviousDomainConverged) { + isConverged = true; + } + + IpplTimings::startTimer(timeCommunication); + if(sendCriteria) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(rankTime+sign, tag, *Pend, *buf, request, nloc, timeComm); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + MPI_Send(&isConverged, 1, MPI_C_BOOL, rankTime+sign, tagbool, timeComm); + } + IpplTimings::stopTimer(timeCommunication); + + + msg << "Finished iteration: " << it+1 + << " in cycle: " << nc+1 + << " Rerror: " << Rerror + << " Perror: " << Perror + << endl; + + IpplTimings::startTimer(dumpData); + Pcoarse->writelocalError(Rerror, Perror, nc+1, it+1, rankTime, rankSpace); + IpplTimings::stopTimer(dumpData); + + MPI_Barrier(spaceComm); + + it += 1; + } + + MPI_Barrier(MPI_COMM_WORLD); + if((nCycles > 1) && (nc < (nCycles - 1))) { + tag = 1000;//Ippl::Comm->next_tag(IPPL_PARAREAL_APP, IPPL_APP_CYCLE); + + //send, receive criteria and tStartMySlice are reversed at the end of the cycle + if(nc % 2 == 0) { + recvCriteria = (rankTime < (sizeTime-1)); + sendCriteria = (rankTime > 0); + tStartMySlice = (nc * tEndCycle) + (((sizeTime - 1) - rankTime) * dtSlice); + } + //odd cycles + else { + sendCriteria = (rankTime < (sizeTime-1)); + recvCriteria = (rankTime > 0); + tStartMySlice = (nc * tEndCycle) + (rankTime * dtSlice); + } + + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pbegin->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pbegin->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(timeCommunication); + if(recvCriteria) { + size_type bufSize = Pbegin->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_RECV, bufSize); + Ippl::Comm->recv(rankTime+sign, tag, *Pbegin, *buf, bufSize, nloc, timeComm); + buf->resetReadPos(); + } + IpplTimings::stopTimer(timeCommunication); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pend->R.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pend->P.getView(), Pbegin->P.getView()); + Kokkos::deep_copy(Pcoarse->R0.getView(), Pbegin->R.getView()); + Kokkos::deep_copy(Pcoarse->P0.getView(), Pbegin->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(coarsePropagator); + if(Pcoarse->coarsetype_m == "PIC") { + Pcoarse->BorisPIC(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, Bext, spaceComm); + } + else { + Pcoarse->BorisPIF(Pend->R, Pend->P, ntCoarse, dtCoarse, tStartMySlice, 0, 0, Bext, 0, 0, coarse, spaceComm); + } + IpplTimings::stopTimer(coarsePropagator); + + IpplTimings::startTimer(deepCopy); + Kokkos::deep_copy(Pcoarse->R.getView(), Pend->R.getView()); + Kokkos::deep_copy(Pcoarse->P.getView(), Pend->P.getView()); + IpplTimings::stopTimer(deepCopy); + + IpplTimings::startTimer(timeCommunication); + if(sendCriteria) { + size_type bufSize = Pend->packedSize(nloc); + buffer_type buf = Ippl::Comm->getBuffer(IPPL_PARAREAL_SEND, bufSize); + MPI_Request request; + Ippl::Comm->isend(rankTime-sign, tag, *Pend, *buf, request, nloc, timeComm); + buf->resetWritePos(); + MPI_Wait(&request, MPI_STATUS_IGNORE); + } + IpplTimings::stopTimer(timeCommunication); + sign *= -1; + } + } + msg << TestName << " Parareal: End." << endl; + IpplTimings::stopTimer(mainTimer); + IpplTimings::print(); + IpplTimings::print(std::string("timing.dat")); + + MPI_Comm_free(&spaceComm); + MPI_Comm_free(&timeComm); + + return 0; +} diff --git a/alpine/PinT/StatesSlice.hpp b/alpine/PinT/StatesSlice.hpp new file mode 100644 index 000000000..206f8746c --- /dev/null +++ b/alpine/PinT/StatesSlice.hpp @@ -0,0 +1,31 @@ +// Copyright (c) 2021 Paul Scherrer Institut, Villigen PSI, Switzerland +// All rights reserved +// +// This file is part of IPPL. +// +// IPPL is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// You should have received a copy of the GNU General Public License +// along with IPPL. If not, see . +// + + +template +class StatesSlice : public ippl::ParticleBase { + +public: + typename ippl::ParticleBase::particle_position_type P; + + StatesSlice(PLayout& pl) + : ippl::ParticleBase(pl) + { + // register the particle attributes + this->addAttribute(P); + } + + ~StatesSlice(){ } + +}; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ad4b1f186..bd6a2205b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -94,7 +94,14 @@ include_directories ( add_library ( ippl ${IPPL_SRCS} ${IPPL_SRCS_FORT} ) -target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) + +if (ENABLE_NUFFT) + target_include_directories(ippl PUBLIC ${CUFINUFFT_INCLUDE_DIR}) + target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY} ${CUFINUFFT_LIBRARY_DIR}) +else() + target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) +endif() + install (TARGETS ippl DESTINATION lib) install (FILES ${IPPL_BASEDIR_HDRS} DESTINATION include) diff --git a/src/Communicate/Communicate.cpp b/src/Communicate/Communicate.cpp index 78bf9bb82..63314a1ef 100644 --- a/src/Communicate/Communicate.cpp +++ b/src/Communicate/Communicate.cpp @@ -25,7 +25,13 @@ namespace ippl { Communicate::Communicate(int& argc, char**& argv, const MPI_Comm& comm) : comm_m(comm) { - MPI_Init(&argc, &argv); + int isInitialized; + MPI_Initialized(&isInitialized); + + if (!isInitialized) { + MPI_Init(&argc, &argv); + } + MPI_Comm_rank(comm_m, &rank_m); MPI_Comm_size(comm_m, &size_m); } diff --git a/src/Communicate/Communicate.h b/src/Communicate/Communicate.h index 7024982e2..423bd08b1 100644 --- a/src/Communicate/Communicate.h +++ b/src/Communicate/Communicate.h @@ -123,14 +123,14 @@ namespace ippl { */ template void recv(int src, int tag, Buffer& buffer, archive_type& ar, - size_type msize, size_type nrecvs); + size_type msize, size_type nrecvs, const MPI_Comm& comm = MPI_COMM_WORLD); /*! * \warning Only works with default spaces! */ template void isend(int dest, int tag, Buffer& buffer, archive_type&, - MPI_Request&, size_type nsends); + MPI_Request&, size_type nsends, const MPI_Comm& comm = MPI_COMM_WORLD); /*! * \warning Only works with default spaces! @@ -158,7 +158,7 @@ namespace ippl { template void Communicate::recv(int src, int tag, Buffer& buffer, archive_type& ar, - size_type msize, size_type nrecvs) + size_type msize, size_type nrecvs, const MPI_Comm& comm) { // Temporary fix. MPI communication seems to have problems when the // count argument exceeds the range of int, so large messages should @@ -169,14 +169,15 @@ namespace ippl { } MPI_Status status; MPI_Recv(ar.getBuffer(), msize, - MPI_BYTE, src, tag, comm_m, &status); + MPI_BYTE, src, tag, comm, &status); buffer.deserialize(ar, nrecvs); } template void Communicate::isend(int dest, int tag, Buffer& buffer, - archive_type& ar, MPI_Request& request, size_type nsends) + archive_type& ar, MPI_Request& request, size_type nsends, + const MPI_Comm& comm) { if (ar.getSize() > INT_MAX) { std::cerr << "Message size exceeds range of int" << std::endl; @@ -184,7 +185,7 @@ namespace ippl { } buffer.serialize(ar, nsends); MPI_Isend(ar.getBuffer(), ar.getSize(), - MPI_BYTE, dest, tag, comm_m, &request); + MPI_BYTE, dest, tag, comm, &request); } } diff --git a/src/Communicate/Tags.h b/src/Communicate/Tags.h index 8d6db8bcd..1e07ed717 100644 --- a/src/Communicate/Tags.h +++ b/src/Communicate/Tags.h @@ -26,13 +26,6 @@ #define IPPL_EXIT_TAG 6 // program should exit() -// tags for reduction -#define COMM_REDUCE_SEND_TAG 10000 -#define COMM_REDUCE_RECV_TAG 11000 -#define COMM_REDUCE_SCATTER_TAG 12000 -#define COMM_REDUCE_CYCLE 1000 - - // tag for applying parallel periodic boundary condition. #define BC_PARALLEL_PERIODIC_TAG 15000 @@ -48,28 +41,6 @@ namespace ippl { } } -#define F_GUARD_CELLS_TAG 20000 // Field::fillGuardCells() -#define F_WRITE_TAG 21000 // Field::write() -#define F_READ_TAG 22000 // Field::read() -#define F_GEN_ASSIGN_TAG 23000 // assign(BareField,BareField) -#define F_REPARTITION_BCAST_TAG 24000 // broadcast in FieldLayout::repartion. -#define F_REDUCE_PERP_TAG 25000 // reduction in binary load balance. -#define F_GETSINGLE_TAG 26000 // IndexedBareField::getsingle() -#define F_REDUCE_TAG 27000 // Reduction in minloc/maxloc -#define F_LAYOUT_IO_TAG 28000 // Reduction in minloc/maxloc -#define F_TAG_CYCLE 1000 - -// // Tags for FieldView and FieldBlock -// #define FV_2D_TAG 30000 // FieldView::update_2D_data() -// #define FV_3D_TAG 31000 // FieldView::update_2D_data() -// #define FV_TAG_CYCLE 1000 -// -// #define FB_WRITE_TAG 32000 // FieldBlock::write() -// #define FB_READ_TAG 33000 // FieldBlock::read() -// #define FB_TAG_CYCLE 1000 -// -// #define FP_GATHER_TAG 34000 // FieldPrint::print() -// #define FP_TAG_CYCLE 1000 // Special tags used by Particle classes for communication. #define P_WEIGHTED_LAYOUT_TAG 50000 @@ -88,7 +59,7 @@ namespace ippl { #define IPPL_TAG_CYCLE 1000 // Tags for Ippl application codes -#define IPPL_APP_TAG0 90000 +#define IPPL_PARAREAL_APP 90000 #define IPPL_APP_TAG1 91000 #define IPPL_APP_TAG2 92000 #define IPPL_APP_TAG3 93000 @@ -128,4 +99,7 @@ namespace ippl { #define OPEN_SOLVER_TAG 18000 #define VICO_SOLVER_TAG 70000 +#define IPPL_PARAREAL_SEND 19000 +#define IPPL_PARAREAL_RECV 21000 + #endif // TAGS_H diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 8a13e3b45..6807e8ba3 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -30,10 +30,13 @@ #include #include +#include #include #include +#include #include +#include "Types/IpplTypes.h" #include "FieldLayout/FieldLayout.h" #include "Field/Field.h" #include "Utility/ParameterList.h" @@ -48,6 +51,8 @@ namespace heffte { namespace ippl { + template class ParticleAttrib; + /** Tag classes for CC type of Fourier transforms */ @@ -64,6 +69,12 @@ namespace ippl { Tag classes for Cosine transforms */ class CosTransform {}; +#ifdef KOKKOS_ENABLE_CUDA + /** + Tag classes for Non-uniform type of Fourier transforms + */ + class NUFFTransform {}; +#endif enum FFTComm { a2av = 0, @@ -110,6 +121,37 @@ namespace ippl { using backendCos = heffte::backend::stock_cos; }; #endif +#endif + +#ifdef KOKKOS_ENABLE_CUDA + template + struct CufinufftType {}; + + template <> + struct CufinufftType { + std::function makeplan = cufinufftf_makeplan; + std::function setpts = cufinufftf_setpts; + std::function execute = cufinufftf_execute; + std::function destroy = cufinufftf_destroy; + + using complexType = cuFloatComplex; + using plan_t = cufinufftf_plan; + }; + + template <> + struct CufinufftType { + std::function makeplan = cufinufft_makeplan; + std::function setpts = cufinufft_setpts; + std::function execute = cufinufft_execute; + std::function destroy = cufinufft_destroy; + + using complexType = cuDoubleComplex; + using plan_t = cufinufft_plan; + }; #endif } @@ -133,6 +175,7 @@ namespace ippl { using heffteBackend = typename detail::HeffteBackendType::backend; using workspace_t = typename heffte::fft3d::template buffer_container; + using view_type = typename detail::ViewType::view_type; /** Create a new FFT object with the layout for the input Field and * parameters for heffte. @@ -160,6 +203,7 @@ namespace ippl { std::shared_ptr> heffte_m; workspace_t workspace_m; + view_type tempField_m; }; @@ -178,6 +222,8 @@ namespace ippl { using heffteBackend = typename detail::HeffteBackendType::backend; typedef Kokkos::complex Complex_t; using workspace_t = typename heffte::fft3d_r2c::template buffer_container; + using view_real_type = typename detail::ViewType::view_type; + using view_complex_type = typename detail::ViewType::view_type; typedef Field ComplexField_t; @@ -211,6 +257,8 @@ namespace ippl { std::shared_ptr> heffte_m; workspace_t workspace_m; + view_real_type tempFieldf_m; + view_complex_type tempFieldg_m; }; @@ -227,6 +275,7 @@ namespace ippl { using heffteBackend = typename detail::HeffteBackendType::backendSine; using workspace_t = typename heffte::fft3d::template buffer_container; + using view_type = typename detail::ViewType::view_type; /** Create a new FFT object with the layout for the input Field and * parameters for heffte. @@ -252,6 +301,7 @@ namespace ippl { std::shared_ptr> heffte_m; workspace_t workspace_m; + view_type tempField_m; }; /** @@ -267,6 +317,7 @@ namespace ippl { using heffteBackend = typename detail::HeffteBackendType::backendCos; using workspace_t = typename heffte::fft3d::template buffer_container; + using view_type = typename detail::ViewType::view_type; /** Create a new FFT object with the layout for the input Field and * parameters for heffte. @@ -292,11 +343,71 @@ namespace ippl { std::shared_ptr> heffte_m; workspace_t workspace_m; + view_type tempField_m; + + }; + + +#ifdef KOKKOS_ENABLE_CUDA + /** + Non-uniform FFT class + */ + template + class FFT { + + public: + + typedef FieldLayout Layout_t; + typedef Kokkos::complex KokkosComplex_t; + typedef Field ComplexField_t; + + using complexType = typename detail::CufinufftType::complexType; + using plan_t = typename detail::CufinufftType::plan_t; + using view_field_type = typename detail::ViewType::view_type; + using view_particle_real_type = typename detail::ViewType::view_type; + using view_particle_complex_type = typename detail::ViewType::view_type; + + + FFT() = default; + + /** Create a new FFT object with the layout for the input Field, type + * (1 or 2) for the NUFFT and parameters for cuFINUFFT. + */ + FFT(const Layout_t& layout, const detail::size_type& localNp, int type, const ParameterList& params); + + // Destructor + ~FFT(); + + /** Do the NUFFT. + */ + template + void transform(const ParticleAttrib< Vector, Properties... >& R, + ParticleAttrib& Q, ComplexField_t& f); + + + private: + + /** + setup performs the initialization necessary. + */ + void setup(std::array& nmodes, + const ParameterList& params); + + detail::CufinufftType nufft_m; + plan_t plan_m; + int ier_m; + T tol_m; + int type_m; + view_field_type tempField_m; + view_particle_real_type tempR_m[3] = {}; + view_particle_complex_type tempQ_m; + }; } +#endif #include "FFT/FFT.hpp" #endif // IPPL_FFT_FFT_H diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 853651858..07f36cee9 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -57,8 +57,8 @@ namespace ippl { * 1D FFTs we just have to make the length in other * dimensions to be 1. */ - std::array low; - std::array high; + std::array low; + std::array high; const NDIndex& lDom = layout.getLocalNDIndex(); @@ -74,6 +74,9 @@ namespace ippl { high[d] = static_cast(lDom[d].length() + lDom[d].first() - 1); } + if(tempField_m.size() < lDom.size()) { + Kokkos::realloc(tempField_m, lDom[0].length(), lDom[1].length(), lDom[2].length()); + } setup(low, high, params); } @@ -88,45 +91,45 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); + heffte_m = std::make_shared> + (inbox, outbox, Ippl::getComm(), heffteOptions); - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -138,74 +141,75 @@ namespace ippl { int direction, typename FFT::ComplexField_t& f) { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost).real( - fview(i, j, k).real()); - tempField(i-nghost, j-nghost, k-nghost).imag( - fview(i, j, k).imag()); - }); - - - - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k).real() = - tempField(i-nghost, j-nghost, k-nghost).real(); - fview(i, j, k).imag() = - tempField(i-nghost, j-nghost, k-nghost).imag(); - }); + auto fview = f.getView(); + const int nghost = f.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + //Kokkos::View + // tempField("tempField", fview.extent(0) - 2*nghost, + // fview.extent(1) - 2*nghost, + // fview.extent(2) - 2*nghost); + + auto tempField = tempField_m; + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost).real( + fview(i, j, k).real()); + tempField(i-nghost, j-nghost, k-nghost).imag( + fview(i, j, k).imag()); + }); + + + + + if ( direction == 1 ) + { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } + else if ( direction == -1 ) + { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + + Kokkos::parallel_for("copy to Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k).real() = + tempField(i-nghost, j-nghost, k-nghost).real(); + fview(i, j, k).imag() = + tempField(i-nghost, j-nghost, k-nghost).imag(); + }); } @@ -259,6 +263,14 @@ namespace ippl { lDomOutput[d].first() - 1); } + + if(tempFieldf_m.size() < lDomInput.size()) { + Kokkos::realloc(tempFieldf_m, lDomInput[0].length(), lDomInput[1].length(), lDomInput[2].length()); + } + if(tempFieldg_m.size() < lDomOutput.size()) { + Kokkos::realloc(tempFieldg_m, lDomOutput[0].length(), lDomOutput[1].length(), lDomOutput[2].length()); + } + setup(lowInput, highInput, lowOutput, highOutput, params); } @@ -275,17 +287,17 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {lowInput, highInput}; - heffte::box3d outbox = {lowOutput, highOutput}; + heffte::box3d inbox = {lowInput, highInput}; + heffte::box3d outbox = {lowOutput, highOutput}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif switch (params.get("comm")) { @@ -309,12 +321,12 @@ namespace ippl { } heffte_m = std::make_shared> - (inbox, outbox, params.get("r2c_direction"), Ippl::getComm(), + (inbox, outbox, params.get("r2c_direction"), MPI_COMM_SELF, heffteOptions); - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -325,104 +337,106 @@ namespace ippl { typename FFT::RealField_t& f, typename FFT::ComplexField_t& g) { - auto fview = f.getView(); - auto gview = g.getView(); - const int nghostf = f.getNghost(); - const int nghostg = g.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempFieldf("tempFieldf", fview.extent(0) - 2*nghostf, - fview.extent(1) - 2*nghostf, - fview.extent(2) - 2*nghostf); - - Kokkos::View - tempFieldg("tempFieldg", gview.extent(0) - 2*nghostg, - gview.extent(1) - 2*nghostg, - gview.extent(2) - 2*nghostg); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos f field in FFT", - mdrange_type({nghostf, nghostf, nghostf}, - {fview.extent(0) - nghostf, - fview.extent(1) - nghostf, - fview.extent(2) - nghostf - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempFieldf(i-nghostf, j-nghostf, k-nghostf) = fview(i, j, k); - }); - Kokkos::parallel_for("copy from Kokkos g field in FFT", - mdrange_type({nghostg, nghostg, nghostg}, - {gview.extent(0) - nghostg, - gview.extent(1) - nghostg, - gview.extent(2) - nghostg - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempFieldg(i-nghostg, j-nghostg, k-nghostg).real( - gview(i, j, k).real()); - tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag( - gview(i, j, k).imag()); - }); + auto fview = f.getView(); + auto gview = g.getView(); + const int nghostf = f.getNghost(); + const int nghostg = g.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + //Kokkos::View + // tempFieldf("tempFieldf", fview.extent(0) - 2*nghostf, + // fview.extent(1) - 2*nghostf, + // fview.extent(2) - 2*nghostf); + + //Kokkos::View + // tempFieldg("tempFieldg", gview.extent(0) - 2*nghostg, + // gview.extent(1) - 2*nghostg, + // gview.extent(2) - 2*nghostg); + + auto tempFieldf = tempFieldf_m; + auto tempFieldg = tempFieldg_m; + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos f field in FFT", + mdrange_type({nghostf, nghostf, nghostf}, + {fview.extent(0) - nghostf, + fview.extent(1) - nghostf, + fview.extent(2) - nghostf + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempFieldf(i-nghostf, j-nghostf, k-nghostf) = fview(i, j, k); + }); + Kokkos::parallel_for("copy from Kokkos g field in FFT", + mdrange_type({nghostg, nghostg, nghostg}, + {gview.extent(0) - nghostg, + gview.extent(1) - nghostg, + gview.extent(2) - nghostg + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempFieldg(i-nghostg, j-nghostg, k-nghostg).real( + gview(i, j, k).real()); + tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag( + gview(i, j, k).imag()); + }); - if ( direction == 1 ) - { - heffte_m->forward( tempFieldf.data(), tempFieldg.data(), workspace_m.data(), - heffte::scale::full ); - } - else if ( direction == -1 ) - { - heffte_m->backward( tempFieldg.data(), tempFieldf.data(), workspace_m.data(), - heffte::scale::none ); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - - Kokkos::parallel_for("copy to Kokkos f field FFT", - mdrange_type({nghostf, nghostf, nghostf}, - {fview.extent(0) - nghostf, - fview.extent(1) - nghostf, - fview.extent(2) - nghostf - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = tempFieldf(i-nghostf, j-nghostf, k-nghostf); - }); - - Kokkos::parallel_for("copy to Kokkos g field FFT", - mdrange_type({nghostg, nghostg, nghostg}, - {gview.extent(0) - nghostg, - gview.extent(1) - nghostg, - gview.extent(2) - nghostg - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - gview(i, j, k).real() = - tempFieldg(i-nghostg, j-nghostg, k-nghostg).real(); - gview(i, j, k).imag() = - tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag(); - }); + if ( direction == 1 ) + { + heffte_m->forward( tempFieldf.data(), tempFieldg.data(), workspace_m.data(), + heffte::scale::full ); + } + else if ( direction == -1 ) + { + heffte_m->backward( tempFieldg.data(), tempFieldf.data(), workspace_m.data(), + heffte::scale::none ); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + + Kokkos::parallel_for("copy to Kokkos f field FFT", + mdrange_type({nghostf, nghostf, nghostf}, + {fview.extent(0) - nghostf, + fview.extent(1) - nghostf, + fview.extent(2) - nghostf + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k) = tempFieldf(i-nghostf, j-nghostf, k-nghostf); + }); + + Kokkos::parallel_for("copy to Kokkos g field FFT", + mdrange_type({nghostg, nghostg, nghostg}, + {gview.extent(0) - nghostg, + gview.extent(1) - nghostg, + gview.extent(2) - nghostg + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + gview(i, j, k).real() = + tempFieldg(i-nghostg, j-nghostg, k-nghostg).real(); + gview(i, j, k).imag() = + tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag(); + }); } @@ -446,8 +460,8 @@ namespace ippl { * 1D FFTs we just have to make the length in other * dimensions to be 1. */ - std::array low; - std::array high; + std::array low; + std::array high; const NDIndex& lDom = layout.getLocalNDIndex(); @@ -463,6 +477,9 @@ namespace ippl { high[d] = static_cast(lDom[d].length() + lDom[d].first() - 1); } + if(tempField_m.size() < lDom.size()) { + Kokkos::realloc(tempField_m, lDom[0].length(), lDom[1].length(), lDom[2].length()); + } setup(low, high, params); } @@ -477,44 +494,44 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); + heffte_m = std::make_shared> + (inbox, outbox, Ippl::getComm(), heffteOptions); - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -524,66 +541,67 @@ namespace ippl { int direction, typename FFT::Field_t& f) { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost) = - fview(i, j, k); - }); - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = - tempField(i-nghost, j-nghost, k-nghost); - }); + auto fview = f.getView(); + const int nghost = f.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + //Kokkos::View + // tempField("tempField", fview.extent(0) - 2*nghost, + // fview.extent(1) - 2*nghost, + // fview.extent(2) - 2*nghost); + + auto tempField = tempField_m; + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost) = + fview(i, j, k); + }); + + if ( direction == 1 ) + { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } + else if ( direction == -1 ) + { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + Kokkos::parallel_for("copy to Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k) = + tempField(i-nghost, j-nghost, k-nghost); + }); } @@ -607,8 +625,8 @@ namespace ippl { * 1D FFTs we just have to make the length in other * dimensions to be 1. */ - std::array low; - std::array high; + std::array low; + std::array high; const NDIndex& lDom = layout.getLocalNDIndex(); @@ -624,6 +642,9 @@ namespace ippl { high[d] = static_cast(lDom[d].length() + lDom[d].first() - 1); } + if(tempField_m.size() < lDom.size()) { + Kokkos::realloc(tempField_m, lDom[0].length(), lDom[1].length(), lDom[2].length()); + } setup(low, high, params); } @@ -638,44 +659,44 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); + heffte_m = std::make_shared> + (inbox, outbox, Ippl::getComm(), heffteOptions); - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -686,68 +707,277 @@ namespace ippl { int direction, typename FFT::Field_t& f) { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost) = - fview(i, j, k); - }); - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = - tempField(i-nghost, j-nghost, k-nghost); - }); + auto fview = f.getView(); + const int nghost = f.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + //Kokkos::View + // tempField("tempField", fview.extent(0) - 2*nghost, + // fview.extent(1) - 2*nghost, + // fview.extent(2) - 2*nghost); + + auto tempField = tempField_m; + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost) = + fview(i, j, k); + }); + + if ( direction == 1 ) + { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } + else if ( direction == -1 ) + { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + Kokkos::parallel_for("copy to Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k) = + tempField(i-nghost, j-nghost, k-nghost); + }); + + } + + +#ifdef KOKKOS_ENABLE_CUDA + //========================================================================= + // FFT NUFFTransform Constructors + //========================================================================= + + /** + Create a new FFT object of type NUFFTransform, with a + given layout and cuFINUFFT parameters. + */ + + template + FFT::FFT(const Layout_t& layout, + const detail::size_type& localNp, + int type, + const ParameterList& params) + { + /** + * cuFINUFFT requires to pass a 3D array even for 2D and + * 1D FFTs we just have to fill in other + * dimensions to be 1. Note this is different from Heffte + * where we fill 0. + */ + + std::array nmodes; + + const NDIndex& lDom = layout.getLocalNDIndex(); + nmodes.fill(1); + + for(size_t d = 0; d < Dim; ++d) { + nmodes[d] = lDom[d].length();; + } + + type_m = type; + if(tempField_m.size() < lDom.size()) { + Kokkos::realloc(tempField_m, lDom[0].length(), lDom[1].length(), lDom[2].length()); + } + for(size_t d = 0; d < Dim; ++d) { + if(tempR_m[d].size() < localNp) { + Kokkos::realloc(tempR_m[d], localNp); + } + } + if(tempQ_m.size() < localNp) { + Kokkos::realloc(tempQ_m, localNp); + } + setup(nmodes, params); } + + + /** + setup performs the initialization necessary. + */ + template + void + FFT::setup(std::array& nmodes, + const ParameterList& params) + { + + cufinufft_opts opts; + cufinufft_default_opts(&opts); + tol_m = 1e-6; + + if(!params.get("use_cufinufft_defaults")) { + tol_m = params.get("tolerance"); + opts.gpu_method = params.get("gpu_method"); + opts.gpu_sort = params.get("gpu_sort"); + opts.gpu_kerevalmeth = params.get("gpu_kerevalmeth"); + } + + opts.gpu_maxbatchsize = 0; //default option. ignored for ntransf = 1 which + // is our case + //For Perlmutter since the mask to hide the other GPUs in the node is + //somehow not working there + //opts.gpu_device_id = (int)(Ippl::Comm->rank() % 4); + + int iflag; + + if(type_m == 1) { + iflag = -1; + } + else if(type_m == 2) { + iflag = 1; + } + else { + throw std::logic_error("Only type 1 and type 2 NUFFT are allowed now"); + } + + //dim in cufinufft is int + int dim = static_cast(Dim); + ier_m = nufft_m.makeplan(type_m, dim, nmodes.data(), iflag, 1, tol_m, + &plan_m, &opts); + + } + + + + template + template + void + FFT::transform(const ParticleAttrib< Vector, Properties... >& R, + ParticleAttrib& Q, + typename FFT::ComplexField_t& f) + { + + + auto fview = f.getView(); + auto Rview = R.getView(); + auto Qview = Q.getView(); + const int nghost = f.getNghost(); + + auto localNp = R.getParticleCount(); + + const Layout_t& layout = f.getLayout(); + const UniformCartesian& mesh = f.get_mesh(); + const Vector& dx = mesh.getMeshSpacing(); + const Vector& origin = mesh.getOrigin(); + const auto& domain = layout.getDomain(); + Vector Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + const double pi = std::acos(-1.0); + + auto tempField = tempField_m; + auto tempQ = tempQ_m; + Kokkos::View tempR[3] = {}; + for(size_t d = 0; d < Dim; ++d) { + tempR[d] = tempR_m[d]; + } + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from field data NUFFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost).x = + fview(i, j, k).real(); + tempField(i-nghost, j-nghost, k-nghost).y = + fview(i, j, k).imag(); + }); + + + Kokkos::parallel_for("copy from particle data NUFFT", + localNp, + KOKKOS_LAMBDA(const size_t i) + { + for(size_t d = 0; d < Dim; ++d) { + //tempR[d](i) = (Rview(i)[d] - (twopiFactor * 2.0 * pi)) * (2.0 * pi / Len[d]); + tempR[d](i) = Rview(i)[d] * (2.0 * pi / Len[d]); + //tempR[d](i) = Rview(i)[d]; + } + tempQ(i).x = Qview(i); + tempQ(i).y = 0.0; + }); + + ier_m = nufft_m.setpts(plan_m, localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, + NULL, NULL, NULL); + + ier_m = nufft_m.execute(plan_m, tempQ.data(), tempField.data()); + Kokkos::fence(); + + + if(type_m == 1) { + Kokkos::parallel_for("copy to field data NUFFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k).real() = + tempField(i-nghost, j-nghost, k-nghost).x; + fview(i, j, k).imag() = + tempField(i-nghost, j-nghost, k-nghost).y; + }); + } + else if(type_m == 2) { + Kokkos::parallel_for("copy to particle data NUFFT", + localNp, + KOKKOS_LAMBDA(const size_t i) + { + Qview(i) = tempQ(i).x; + }); + } + } + + template + FFT::~FFT() { + + ier_m = nufft_m.destroy(plan_m); + + } +#endif } // vi: set et ts=4 sw=4 sts=4: diff --git a/src/Field/BareField.hpp b/src/Field/BareField.hpp index 685e6a751..36886e86d 100644 --- a/src/Field/BareField.hpp +++ b/src/Field/BareField.hpp @@ -92,7 +92,14 @@ namespace ippl { template void BareField::fillHalo() { - if(Ippl::Comm->size() > 1) { + + bool isAllSerial = true; + + for (unsigned d = 0; d < Dim; ++d) { + isAllSerial = isAllSerial && (layout_m->getRequestedDistribution(d) == SERIAL); + } + + if((Ippl::Comm->size() > 1) && (!isAllSerial)) { halo_m.fillHalo(dview_m, layout_m); } if(layout_m->isAllPeriodic_m) { @@ -106,7 +113,14 @@ namespace ippl { template void BareField::accumulateHalo() { - if(Ippl::Comm->size() > 1) { + + bool isAllSerial = true; + + for (unsigned d = 0; d < Dim; ++d) { + isAllSerial = isAllSerial && (layout_m->getRequestedDistribution(d) == SERIAL); + } + + if((Ippl::Comm->size() > 1) && (!isAllSerial)) { halo_m.accumulateHalo(dview_m, layout_m); } if(layout_m->isAllPeriodic_m) { diff --git a/src/FieldLayout/FieldLayout.hpp b/src/FieldLayout/FieldLayout.hpp index 34cf92e0e..9e129e497 100644 --- a/src/FieldLayout/FieldLayout.hpp +++ b/src/FieldLayout/FieldLayout.hpp @@ -132,15 +132,6 @@ namespace ippl { isAllPeriodic_m = isAllPeriodic; - if (nRanks < 2) { - Kokkos::resize(dLocalDomains_m, nRanks); - Kokkos::resize(hLocalDomains_m, nRanks); - hLocalDomains_m(0) = domain; - Kokkos::deep_copy(dLocalDomains_m, hLocalDomains_m); - return; - } - - // If the user did not specify parallel/serial flags then make all parallel. long totparelems = 1; for (unsigned d = 0; d < Dim; ++d) { @@ -154,6 +145,22 @@ namespace ippl { } } + bool isAllSerial = true; + + for (unsigned d = 0; d < Dim; ++d) { + isAllSerial = isAllSerial && (requestedLayout_m[d] == SERIAL); + } + + if ((nRanks < 2) || isAllSerial) { + Kokkos::resize(dLocalDomains_m,nRanks); + Kokkos::resize(hLocalDomains_m,nRanks); + for (int r = 0; r < nRanks; ++r) { + hLocalDomains_m(r) = domain; + } + Kokkos::deep_copy(dLocalDomains_m, hLocalDomains_m); + return; + } + /* Check to see if we have too few elements to partition. If so, reduce * the number of ranks (if necessary) to just the number of elements along * parallel dims. diff --git a/src/Ippl.cpp b/src/Ippl.cpp index 420af917a..26b5b30a6 100644 --- a/src/Ippl.cpp +++ b/src/Ippl.cpp @@ -98,7 +98,7 @@ Ippl::Ippl(int& argc, char**& argv, MPI_Comm mpicomm) if (infoLevel > 0 && Comm->myNode() == 0) { for (auto& l : notparsed) { - std::cout << "Warning: Option '" << l << "' is not parsed by Ippl." << std::endl; + std::cout << "Option '" << l << "' is not parsed by Ippl. Make sure your application parses it." << std::endl; } } diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index 99276a82c..bfa089d29 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -31,6 +31,17 @@ #include "Expression/IpplExpressions.h" #include "Particle/ParticleAttribBase.h" +#include "FFT/FFT.h" +#include "Utility/ParameterList.h" + +namespace Kokkos { //reduction identity must be defined in Kokkos namespace + template<> + struct reduction_identity< ippl::Vector > { + KOKKOS_FORCEINLINE_FUNCTION static ippl::Vector sum() { + return ippl::Vector(); + } + }; +} namespace ippl { @@ -50,6 +61,7 @@ namespace ippl { using size_type = detail::size_type; + // Create storage for M particle attributes. The storage is uninitialized. // New items are appended to the end of the array. void create(size_type) override; @@ -127,7 +139,6 @@ namespace ippl { /*! * Assign the same value to the whole attribute. */ - //KOKKOS_INLINE_FUNCTION ParticleAttrib& operator=(T x); /*! @@ -138,22 +149,49 @@ namespace ippl { * @param expr is the expression */ template - //KOKKOS_INLINE_FUNCTION ParticleAttrib& operator=(detail::Expression const& expr); - // // scatter the data from this attribute onto the given Field, using -// // the given Position attribute + // scatter the data from this attribute onto the given Field, using + // the given Position attribute template void scatter(Field& f, - const ParticleAttrib, Properties... >& pp) const; - + const ParticleAttrib, Properties... >& pp, + const MPI_Comm& spaceComm) const; + + template + void + scatterPIFNUDFT(Field& f, Field& Sk, + const ParticleAttrib, Properties... >& pp, + const MPI_Comm& spaceComm) const; + template void gather(Field& f, const ParticleAttrib, Properties...>& pp); + template + void + gatherPIFNUDFT(Field& f, Field& Sk, + const ParticleAttrib, Properties... >& pp); + +#ifdef KOKKOS_ENABLE_CUDA + template + void + scatterPIFNUFFT(Field& f, Field& Sk, + const ParticleAttrib, Properties... >& pp, + FFT* nufft, + const MPI_Comm& spaceComm) const; + + template + void + gatherPIFNUFFT(Field& f, Field& Sk, + const ParticleAttrib, Properties... >& pp, + FFT* nufft, + ParticleAttrib& q); +#endif + T sum(); T max(); T min(); diff --git a/src/Particle/ParticleAttrib.hpp b/src/Particle/ParticleAttrib.hpp index 824cd50aa..724bc2913 100644 --- a/src/Particle/ParticleAttrib.hpp +++ b/src/Particle/ParticleAttrib.hpp @@ -30,6 +30,7 @@ #include "Communicate/DataTypes.h" #include "Utility/IpplTimings.h" + namespace ippl { template @@ -138,14 +139,16 @@ namespace ippl { template template void ParticleAttrib::scatter(Field& f, - const ParticleAttrib< Vector, Properties... >& pp) + const ParticleAttrib< Vector, Properties... >& pp, + const MPI_Comm& spaceComm) const { - static IpplTimings::TimerRef scatterTimer = IpplTimings::getTimer("scatter"); - IpplTimings::startTimer(scatterTimer); + static IpplTimings::TimerRef scatterPICTimer = IpplTimings::getTimer("ScatterPIC"); + IpplTimings::startTimer(scatterPICTimer); + typename Field::view_type view = f.getView(); - const M& mesh = f.get_mesh(); + M& mesh = f.get_mesh(); using vector_type = typename M::vector_type; using value_type = typename ParticleAttrib::value_type; @@ -154,10 +157,19 @@ namespace ippl { const vector_type& origin = mesh.getOrigin(); const vector_type invdx = 1.0 / dx; - const FieldLayout& layout = f.getLayout(); + FieldLayout& layout = f.getLayout(); const NDIndex& lDom = layout.getLocalNDIndex(); const int nghost = f.getNghost(); + + Field tempField; + + tempField.initialize(mesh, layout); + + tempField = 0.0; + + typename Field::view_type viewLocal = tempField.getView(); + Kokkos::parallel_for( "ParticleAttrib::scatter", *(this->localNum_mp), @@ -169,29 +181,137 @@ namespace ippl { Vector whi = l - index; Vector wlo = 1.0 - whi; - const size_t i = index[0] - lDom[0].first() + nghost; - const size_t j = index[1] - lDom[1].first() + nghost; - const size_t k = index[2] - lDom[2].first() + nghost; - + const int i = index[0] - lDom[0].first() + nghost; + const int j = index[1] - lDom[1].first() + nghost; + const int k = index[2] - lDom[2].first() + nghost; // scatter const value_type& val = dview_m(idx); - Kokkos::atomic_add(&view(i-1, j-1, k-1), wlo[0] * wlo[1] * wlo[2] * val); - Kokkos::atomic_add(&view(i-1, j-1, k ), wlo[0] * wlo[1] * whi[2] * val); - Kokkos::atomic_add(&view(i-1, j, k-1), wlo[0] * whi[1] * wlo[2] * val); - Kokkos::atomic_add(&view(i-1, j, k ), wlo[0] * whi[1] * whi[2] * val); - Kokkos::atomic_add(&view(i, j-1, k-1), whi[0] * wlo[1] * wlo[2] * val); - Kokkos::atomic_add(&view(i, j-1, k ), whi[0] * wlo[1] * whi[2] * val); - Kokkos::atomic_add(&view(i, j, k-1), whi[0] * whi[1] * wlo[2] * val); - Kokkos::atomic_add(&view(i, j, k ), whi[0] * whi[1] * whi[2] * val); + Kokkos::atomic_add(&viewLocal(i-1, j-1, k-1), wlo[0] * wlo[1] * wlo[2] * val); + Kokkos::atomic_add(&viewLocal(i-1, j-1, k ), wlo[0] * wlo[1] * whi[2] * val); + Kokkos::atomic_add(&viewLocal(i-1, j, k-1), wlo[0] * whi[1] * wlo[2] * val); + Kokkos::atomic_add(&viewLocal(i-1, j, k ), wlo[0] * whi[1] * whi[2] * val); + Kokkos::atomic_add(&viewLocal(i, j-1, k-1), whi[0] * wlo[1] * wlo[2] * val); + Kokkos::atomic_add(&viewLocal(i, j-1, k ), whi[0] * wlo[1] * whi[2] * val); + Kokkos::atomic_add(&viewLocal(i, j, k-1), whi[0] * whi[1] * wlo[2] * val); + Kokkos::atomic_add(&viewLocal(i, j, k ), whi[0] * whi[1] * whi[2] * val); } ); - IpplTimings::stopTimer(scatterTimer); - static IpplTimings::TimerRef accumulateHaloTimer = IpplTimings::getTimer("accumulateHalo"); - IpplTimings::startTimer(accumulateHaloTimer); - f.accumulateHalo(); - IpplTimings::stopTimer(accumulateHaloTimer); + tempField.accumulateHalo(); + + IpplTimings::stopTimer(scatterPICTimer); + + static IpplTimings::TimerRef scatterAllReducePICTimer = IpplTimings::getTimer("scatterAllReducePIC"); + IpplTimings::startTimer(scatterAllReducePICTimer); + int viewSize = view.extent(0) * view.extent(1) * view.extent(2); + MPI_Allreduce(viewLocal.data(), view.data(), viewSize, + MPI_DOUBLE, MPI_SUM, spaceComm); + IpplTimings::stopTimer(scatterAllReducePICTimer); + } + + + template + template + void ParticleAttrib::scatterPIFNUDFT(Field& f, Field& Sk, + const ParticleAttrib< Vector, Properties... >& pp, + const MPI_Comm& spaceComm) + const + { + + static IpplTimings::TimerRef scatterPIFNUDFTTimer = IpplTimings::getTimer("ScatterPIFNUDFT"); + IpplTimings::startTimer(scatterPIFNUDFTTimer); + + using view_type = typename Field::view_type; + using vector_type = typename M::vector_type; + using value_type = typename ParticleAttrib::value_type; + view_type fview = f.getView(); + typename Field::view_type Skview = Sk.getView(); + const int nghost = f.getNghost(); + const FieldLayout& layout = f.getLayout(); + const M& mesh = f.get_mesh(); + const vector_type& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + vector_type Len; + Vector N; + + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + typedef Kokkos::TeamPolicy<> team_policy; + typedef Kokkos::TeamPolicy<>::member_type member_type; + + using view_type_temp = typename detail::ViewType::view_type; + + view_type_temp viewLocal("viewLocal",fview.extent(0),fview.extent(1),fview.extent(2)); + + double pi = std::acos(-1.0); + Kokkos::complex imag = {0.0, 1.0}; + + size_t Np = *(this->localNum_mp); + + size_t flatN = N[0]*N[1]*N[2]; + + Kokkos::parallel_for("ParticleAttrib::scatterPIFNUDFT compute", + team_policy(flatN, Kokkos::AUTO), + KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { + const size_t flatIndex = teamMember.league_rank(); + +#ifdef KOKKOS_ENABLE_CUDA + const int k = (int)(flatIndex / (N[0] * N[1])); + const int flatIndex2D = flatIndex - (k * N[0] * N[1]); + const int i = flatIndex2D % N[0]; + const int j = (int)(flatIndex2D / N[0]); +#else + + const int i = (int)(flatIndex / (N[0] * N[1])); + const int flatIndex2D = flatIndex - (i * N[0] * N[1]); + const int k = flatIndex2D % N[0]; + const int j = (int)(flatIndex2D / N[0]); +#endif + + FT reducedValue = 0.0; + Vector iVec = {i, j, k}; + vector_type kVec; + for(size_t d = 0; d < Dim; ++d) { + //bool shift = (iVec[d] > (N[d]/2)); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); + } + auto Sk = Skview(i+nghost, j+nghost, k+nghost); + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, Np), + [=](const size_t idx, FT& innerReduce) + { + double arg = 0.0; + for(size_t d = 0; d < Dim; ++d) { + arg += kVec[d]*pp(idx)[d]; + } + const value_type& val = dview_m(idx); + + innerReduce += Sk * (Kokkos::cos(arg) + - imag * Kokkos::sin(arg)) * val; + }, Kokkos::Sum(reducedValue)); + + if(teamMember.team_rank() == 0) { + viewLocal(i+nghost,j+nghost,k+nghost) = reducedValue; + //fview(i+nghost,j+nghost,k+nghost) = reducedValue; + } + + } + ); + + IpplTimings::stopTimer(scatterPIFNUDFTTimer); + + static IpplTimings::TimerRef scatterAllReducePIFTimer = IpplTimings::getTimer("scatterAllReducePIF"); + IpplTimings::startTimer(scatterAllReducePIFTimer); + int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); + MPI_Allreduce(viewLocal.data(), fview.data(), viewSize, + MPI_C_DOUBLE_COMPLEX, MPI_SUM, spaceComm); + IpplTimings::stopTimer(scatterAllReducePIFTimer); + } @@ -201,13 +321,11 @@ namespace ippl { const ParticleAttrib, Properties...>& pp) { - static IpplTimings::TimerRef fillHaloTimer = IpplTimings::getTimer("fillHalo"); - IpplTimings::startTimer(fillHaloTimer); - f.fillHalo(); - IpplTimings::stopTimer(fillHaloTimer); + static IpplTimings::TimerRef gatherPICTimer = IpplTimings::getTimer("GatherPIC"); + IpplTimings::startTimer(gatherPICTimer); - static IpplTimings::TimerRef gatherTimer = IpplTimings::getTimer("gather"); - IpplTimings::startTimer(gatherTimer); + f.fillHalo(); + const typename Field::view_type view = f.getView(); const M& mesh = f.get_mesh(); @@ -250,25 +368,318 @@ namespace ippl { + whi[0] * whi[1] * whi[2] * view(i, j, k ); } ); - IpplTimings::stopTimer(gatherTimer); + IpplTimings::stopTimer(gatherPICTimer); + } + + template + template + void ParticleAttrib::gatherPIFNUDFT(Field& f, Field& Sk, + const ParticleAttrib< Vector, Properties... >& pp) + { + static IpplTimings::TimerRef gatherPIFNUDFTTimer = IpplTimings::getTimer("GatherPIFNUDFT"); + IpplTimings::startTimer(gatherPIFNUDFTTimer); + + using view_type = typename Field::view_type; + using vector_type = typename M::vector_type; + using value_type = typename ParticleAttrib::value_type; + view_type fview = f.getView(); + typename Field::view_type Skview = Sk.getView(); + const int nghost = f.getNghost(); + const FieldLayout& layout = f.getLayout(); + const M& mesh = f.get_mesh(); + const vector_type& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + vector_type Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + typedef Kokkos::TeamPolicy<> team_policy; + typedef Kokkos::TeamPolicy<>::member_type member_type; + + double pi = std::acos(-1.0); + Kokkos::complex imag = {0.0, 1.0}; + + size_t Np = *(this->localNum_mp); + + size_t flatN = N[0]*N[1]*N[2]; + + Kokkos::parallel_for("ParticleAttrib::gatherPIFNUDFT", + team_policy(Np, Kokkos::AUTO), + KOKKOS_CLASS_LAMBDA(const member_type& teamMember) { + const size_t idx = teamMember.league_rank(); + + value_type reducedValue = 0.0; + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, flatN), + [=](const size_t flatIndex, value_type& innerReduce) + { + +#ifdef KOKKOS_ENABLE_CUDA + const int k = (int)(flatIndex / (N[0] * N[1])); + const int flatIndex2D = flatIndex - (k * N[0] * N[1]); + const int i = flatIndex2D % N[0]; + const int j = (int)(flatIndex2D / N[0]); +#else + const int i = (int)(flatIndex / (N[0] * N[1])); + const int flatIndex2D = flatIndex - (i * N[0] * N[1]); + const int k = flatIndex2D % N[0]; + const int j = (int)(flatIndex2D / N[0]); +#endif + + Vector iVec = {i, j, k}; + vector_type kVec; + double Dr = 0.0, arg = 0.0; + for(size_t d = 0; d < Dim; ++d) { + //bool shift = (iVec[d] > (N[d]/2)); + //kVec[d] = 2 * pi / Len[d] * (iVec[d] - shift * N[d]); + //kVec[d] = 2 * pi / Len[d] * iVec[d]; + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d]/2)); + Dr += kVec[d] * kVec[d]; + arg += kVec[d]*pp(idx)[d]; + } + + + FT Ek = 0.0; + value_type Ex = 0.0; + auto rho = fview(i+nghost,j+nghost,k+nghost); + auto Sk = Skview(i+nghost,j+nghost,k+nghost); + for(size_t d = 0; d < Dim; ++d) { + + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + Ek = -(imag * kVec[d] * rho * factor); + + //Inverse Fourier transform when the lhs is real. Use when + //we choose k \in [0 K) instead of from [-K/2+1 K/2] + //Ex[d] = 2.0 * (Ek.real() * Kokkos::cos(arg) + // - Ek.imag() * Kokkos::sin(arg)); + Ek *= Sk * (Kokkos::cos(arg) + + imag * Kokkos::sin(arg)); + Ex[d] = Ek.real(); + } + + innerReduce += Ex; + }, Kokkos::Sum(reducedValue)); + + teamMember.team_barrier(); + + if(teamMember.team_rank() == 0) { + dview_m(idx) = reducedValue; + } + + } + ); + + + IpplTimings::stopTimer(gatherPIFNUDFTTimer); + } +#ifdef KOKKOS_ENABLE_CUDA + + template + template + void ParticleAttrib::scatterPIFNUFFT(Field& f, Field& Sk, + const ParticleAttrib< Vector, Properties... >& pp, + FFT* nufft, + const MPI_Comm& spaceComm) + const + { + + static IpplTimings::TimerRef scatterPIFNUFFTTimer = IpplTimings::getTimer("ScatterPIFNUFFT"); + IpplTimings::startTimer(scatterPIFNUFFTTimer); + + auto q = *this; + + Field tempField; + + FieldLayout& layout = f.getLayout(); + M& mesh = f.get_mesh(); + tempField.initialize(mesh, layout); + + tempField = 0.0; + + nufft->transform(pp, q, tempField); + + using view_type = typename Field::view_type; + view_type fview = f.getView(); + view_type viewLocal = tempField.getView(); + typename Field::view_type Skview = Sk.getView(); + const int nghost = f.getNghost(); + + IpplTimings::stopTimer(scatterPIFNUFFTTimer); + + static IpplTimings::TimerRef scatterAllReducePIFTimer = IpplTimings::getTimer("scatterAllReducePIF"); + IpplTimings::startTimer(scatterAllReducePIFTimer); + int viewSize = fview.extent(0)*fview.extent(1)*fview.extent(2); + MPI_Allreduce(viewLocal.data(), fview.data(), viewSize, + MPI_C_DOUBLE_COMPLEX, MPI_SUM, spaceComm); + IpplTimings::stopTimer(scatterAllReducePIFTimer); + + IpplTimings::startTimer(scatterPIFNUFFTTimer); + + using mdrange_type = Kokkos::MDRangePolicy>; + Kokkos::parallel_for("Multiply with shape functions", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost}), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k) *= Skview(i, j, k); + }); + + IpplTimings::stopTimer(scatterPIFNUFFTTimer); + } + + + template + template + void ParticleAttrib::gatherPIFNUFFT(Field& f, Field& Sk, + const ParticleAttrib< Vector, Properties... >& pp, + FFT* nufft, + ParticleAttrib& q) + { + static IpplTimings::TimerRef gatherPIFNUFFTTimer = IpplTimings::getTimer("GatherPIFNUFFT"); + IpplTimings::startTimer(gatherPIFNUFFTTimer); + + Field tempField; + + FieldLayout& layout = f.getLayout(); + M& mesh = f.get_mesh(); + + tempField.initialize(mesh, layout); + + using view_type = typename Field::view_type; + using vector_type = typename M::vector_type; + view_type fview = f.getView(); + view_type tempview = tempField.getView(); + auto qview = q.getView(); + typename Field::view_type Skview = Sk.getView(); + const int nghost = f.getNghost(); + const vector_type& dx = mesh.getMeshSpacing(); + const auto& domain = layout.getDomain(); + vector_type Len; + Vector N; + + for (unsigned d=0; d < Dim; ++d) { + N[d] = domain[d].length(); + Len[d] = dx[d] * N[d]; + } + + + double pi = std::acos(-1.0); + Kokkos::complex imag = {0.0, 1.0}; + size_t Np = *(this->localNum_mp); + + + using mdrange_type = Kokkos::MDRangePolicy>; + + for(size_t gd = 0; gd < Dim; ++gd) { + Kokkos::parallel_for("Gather NUFFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k) + { + Vector iVec = {i-nghost, j-nghost, k-nghost}; + Vector kVec; + + double Dr = 0.0; + for(size_t d = 0; d < Dim; ++d) { + kVec[d] = 2 * pi / Len[d] * (iVec[d] - (N[d] / 2)); + Dr += kVec[d] * kVec[d]; + } + + tempview(i, j, k) = fview(i, j, k); + + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + + tempview(i, j, k) *= -Skview(i, j, k) * (imag * kVec[gd] * factor); + }); + + nufft->transform(pp, q, tempField); + + Kokkos::parallel_for("Assign E gather NUFFT", + Np, + KOKKOS_CLASS_LAMBDA(const size_t i) + { + dview_m(i)[gd] = qview(i); + }); + } + + IpplTimings::stopTimer(gatherPIFNUFFTTimer); + + } +#endif /* - * Non-class function + * Non-class functions * */ + template + inline + void scatterPIFNUFFT(const ParticleAttrib& attrib, Field& f, + Field& Sk, const ParticleAttrib, Properties...>& pp, + FFT* nufft, + const MPI_Comm& spaceComm = MPI_COMM_WORLD) + { +#ifdef KOKKOS_ENABLE_CUDA + attrib.scatterPIFNUFFT(f, Sk, pp, nufft, spaceComm); +#else + //throw IpplException("scatterPIFNUFFT", "The NUFFT library cuFINUFFT currently only works with CUDA and hence Kokkos needs to + // be compiled with CUDA. Otherwise use scatterPIFNUDFT."); +#endif + } + + template + inline + void gatherPIFNUFFT(ParticleAttrib& attrib, Field& f, + Field& Sk, const ParticleAttrib, Properties...>& pp, + FFT* nufft, + ParticleAttrib& q) + { +#ifdef KOKKOS_ENABLE_CUDA + attrib.gatherPIFNUFFT(f, Sk, pp, nufft, q); +#else + //throw IpplException("gatherPIFNUFFT", + // "The NUFFT library cuFINUFFT currently only works with CUDA and hence Kokkos needs to + // be compiled with CUDA. Otherwise use gatherPIFNUDFT."); +#endif + } + + template inline void scatter(const ParticleAttrib& attrib, Field& f, - const ParticleAttrib, Properties...>& pp) + const ParticleAttrib, Properties...>& pp, + const MPI_Comm& spaceComm = MPI_COMM_WORLD) { - attrib.scatter(f, pp); + attrib.scatter(f, pp, spaceComm); } + template + inline + void scatterPIFNUDFT(const ParticleAttrib& attrib, Field& f, + Field& Sk, const ParticleAttrib, Properties...>& pp, + const MPI_Comm& spaceComm = MPI_COMM_WORLD) + { + attrib.scatterPIFNUDFT(f, Sk, pp, spaceComm); + } + + template inline @@ -278,6 +689,15 @@ namespace ippl { attrib.gather(f, pp); } + template + inline + void gatherPIFNUDFT(ParticleAttrib& attrib, Field& f, + Field& Sk, const ParticleAttrib, Properties...>& pp) + { + attrib.gatherPIFNUDFT(f, Sk, pp); + } + + #define DefineParticleReduction(fun, name, op, MPI_Op) \ template \ T ParticleAttrib::name() { \ diff --git a/src/Particle/ParticleBC.h b/src/Particle/ParticleBC.h index 275f04e00..dfd5aa5a0 100644 --- a/src/Particle/ParticleBC.h +++ b/src/Particle/ParticleBC.h @@ -77,8 +77,11 @@ namespace ippl { struct PeriodicBC : public ParticleBC { using value_type = typename ParticleBC::value_type; - using ParticleBC::extent_m; - using ParticleBC::middle_m; + //using ParticleBC::extent_m; + //using ParticleBC::middle_m; + using ParticleBC::maxval_m; + using ParticleBC::minval_m; + using ParticleBC::isUpper_m; KOKKOS_DEFAULTED_FUNCTION PeriodicBC() = default; @@ -94,7 +97,16 @@ namespace ippl { KOKKOS_INLINE_FUNCTION void operator()(const size_t& i) const { value_type& value = this->view_m(i)[this->dim_m]; - value = value - extent_m * (int)((value - middle_m) * 2 / extent_m); + //value = value - this->extent_m * (int)((value - this->middle_m) * 2 / extent_m); + //if ((value < this->minval_m) && (!this->isUpper_m)) + // value = (this->maxval_m - (this->minval_m - value)); + //else if ((value >= this->maxval_m) && (this->isUpper_m)) + // value = (this->minval_m + (value - this->maxval_m)); + bool tooHigh = value >= maxval_m; + bool tooLow = value < minval_m; + + value += tooHigh * (minval_m - maxval_m) + + tooLow * (maxval_m - minval_m); } KOKKOS_DEFAULTED_FUNCTION diff --git a/src/Solver/FFTPeriodicPoissonSolver.hpp b/src/Solver/FFTPeriodicPoissonSolver.hpp index 015400e9a..73d7d2a2c 100644 --- a/src/Solver/FFTPeriodicPoissonSolver.hpp +++ b/src/Solver/FFTPeriodicPoissonSolver.hpp @@ -113,12 +113,11 @@ namespace ippl { double Dr = kVec[0] * kVec[0] + kVec[1] * kVec[1] + kVec[2] * kVec[2]; - - //It would be great if we can remove this conditional - if(Dr != 0.0) - view(i, j, k) *= 1 / Dr; - else - view(i, j, k) = 0.0; + + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + + view(i, j, k) *= factor; }); fft_mp->transform(-1, *this->rhs_mp, fieldComplex_m); @@ -158,6 +157,8 @@ namespace ippl { const double Len = rmax[d] - origin[d]; bool shift = (iVec[d] > (N[d]/2)); bool notMid = (iVec[d] != (N[d]/2)); + //For the noMid part see + //https://math.mit.edu/~stevenj/fft-deriv.pdf Algorithm 1 kVec[d] = notMid * 2 * pi / Len * (iVec[d] - shift * N[d]); } @@ -166,11 +167,10 @@ namespace ippl { tempview(i, j, k) = view(i, j, k); - //It would be great if we can remove this conditional - if(Dr != 0.0) - tempview(i, j, k) *= -(imag * kVec[gd] / Dr); - else - tempview(i, j, k) = 0.0; + bool isNotZero = (Dr != 0.0); + double factor = isNotZero * (1.0 / (Dr + ((!isNotZero) * 1.0))); + + tempview(i, j, k) *= -(imag * kVec[gd] * factor); }); fft_mp->transform(-1, *this->rhs_mp, tempFieldComplex_m); diff --git a/src/Types/ViewTypes.h b/src/Types/ViewTypes.h index 179cc4056..a8877926d 100644 --- a/src/Types/ViewTypes.h +++ b/src/Types/ViewTypes.h @@ -19,6 +19,7 @@ #define IPPL_VIEW_TYPES_H #include +#include namespace ippl { /** @@ -54,7 +55,7 @@ namespace ippl { }; /*! - * Specialized view type for thee dimensions. + * Specialized view type for three dimensions. */ template struct ViewType { diff --git a/src/Utility/IpplTimings.cpp b/src/Utility/IpplTimings.cpp index 7cc1079c4..c52c53b51 100644 --- a/src/Utility/IpplTimings.cpp +++ b/src/Utility/IpplTimings.cpp @@ -115,7 +115,7 @@ void Timing::print() { msg << level1 << "---------------------------------------------"; msg << "\n"; - msg << " Timing results for " << Ippl::Comm->getNodes() << " nodes:" << "\n"; + msg << " Timing results for " << Ippl::Comm->getNodes() << " ranks:" << "\n"; msg << "---------------------------------------------"; msg << "\n"; @@ -178,7 +178,7 @@ void Timing::print(const std::string &fn, const std::map