diff --git a/CMakeLists.txt b/CMakeLists.txt index 2350faa10..ebe774e17 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,19 @@ option(IPPL_USE_ALTERNATIVE_VARIANT option(IPPL_USE_STANDARD_FOLDERS "Put all generated binaries in bin/lib folders" OFF) option(IPPL_SKIP_FAILING_TESTS "Do not build/test tests that are currently marked as failing" OFF) option(IPPL_ENABLE_SCRIPTS "Generate job script templates for some benchmarks/tests" OFF) +option(IPPL_GPU_AWARE_MPI "Allow MPI to/from from device memory buffers" OFF) +# logging options +set(IPPL_LOG_LEVEL "off" CACHE STRING "Enable logging for messages of >=level") +set_property( + CACHE IPPL_LOG_LEVEL + PROPERTY STRINGS + off + trace + debug + info + warn + error + critical) # "Build IPPL as a shared library (ON) or static library (OFF)" OFF) if(IPPL_DYL) # set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) message(WARNING "IPPL_DYL is deprecated; use @@ -97,21 +110,6 @@ if(DEFINED USE_ALTERNATIVE_VARIANT) set(IPPL_USE_ALTERNATIVE_VARIANT ${USE_ALTERNATIVE_VARIANT} CACHE BOOL "" FORCE) endif() -# ------------------------------------------------------------------------------ -# Debug: This tells the compiler to replace occurrences of ${} with in debug info and error -# messages. -# ------------------------------------------------------------------------------ -add_compile_options( - $<$:-ffile-prefix-map=${CMAKE_SOURCE_DIR}=.> - $<$:-ffile-prefix-map=${CMAKE_SOURCE_DIR}=.>) - -if(DEFINED FETCHCONTENT_BASE_DIR) - add_compile_options( - $<$:-ffile-prefix-map=${FETCHCONTENT_BASE_DIR}=.3p> - $<$:-ffile-prefix-map=${FETCHCONTENT_BASE_DIR}=.3p> - ) -endif() - # ------------------------------------------------------------------------------ # Define sources for project # ------------------------------------------------------------------------------ diff --git a/alpine/LandauDamping.cpp b/alpine/LandauDamping.cpp index 90007b142..88808835d 100644 --- a/alpine/LandauDamping.cpp +++ b/alpine/LandauDamping.cpp @@ -37,17 +37,22 @@ const char* TestName = "LandauDamping"; #include "Manager/datatypes.h" #include "Utility/IpplTimings.h" +#include "Utility/Logging.h" #include "LandauDampingManager.h" #include "Manager/PicManager.h" int main(int argc, char* argv[]) { +#if defined(SPDLOG_ACTIVE_LEVEL) && (SPDLOG_ACTIVE_LEVEL != SPDLOG_LEVEL_OFF) + spdlog::set_pattern("[%^%-8l%$]%t| %v"); + spdlog::set_level(spdlog::level::trace); +#endif ippl::initialize(argc, argv); { Inform msg(TestName); Inform msg2all(TestName, INFORM_ALL_NODES); - static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); static IpplTimings::TimerRef initializeTimer = IpplTimings::getTimer("initialize"); IpplTimings::startTimer(mainTimer); IpplTimings::startTimer(initializeTimer); @@ -82,7 +87,7 @@ int main(int argc, char* argv[]) { manager.pre_run(); IpplTimings::stopTimer(initializeTimer); - + manager.setTime(0.0); msg << "Starting iterations ..." << endl; diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 70bb39314..704d87a8b 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -44,6 +44,23 @@ if("OPENMP" IN_LIST IPPL_PLATFORMS) colour_message(STATUS ${Green} "✅ OpenMP platform requested OpenMP found ${OPENMP_VERSION}") endif() +# ------------------------------------------------------------------------------ +# spdlog logging library +# ------------------------------------------------------------------------------ +string(TOUPPER ${IPPL_LOG_LEVEL} IPPL_LOG_LEVEL_UPPERCASE) +if(NOT "${IPPL_LOG_LEVEL_UPPERCASE}" MATCHES "OFF") + find_package(spdlog REQUIRED) + colour_message(STATUS ${Green} "✅ spdlog found ${spdlog_VERSION}") +endif() + +# ------------------------------------------------------------------------------ +# fmt library (for formatting nice log messages) +# ------------------------------------------------------------------------------ +if(NOT "${IPPL_LOG_LEVEL_UPPERCASE}" MATCHES "OFF") + find_package(fmt REQUIRED) + colour_message(STATUS ${Green} "✅ fmt found ${fmt_VERSION}") +endif() + # ------------------------------------------------------------------------------ # Utility function to clear a list of vars one by one # ------------------------------------------------------------------------------ diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index 4731c2f76..cad844a9d 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -9,6 +9,20 @@ message(STATUS "IPPL_MACHINENAME for script generation is: ${IPPL_MACHINENAME}") # ------------------------------------------------------------------------------ set(IPPL_JOB_SUBMISSION_ACCOUNT "c41" CACHE STRING "Account to use for job submission templates") +# populate IPPL_SCRIPTS_UENV from environment variable UENV_MOUNT_LIST (if present) +if(DEFINED ENV{UENV_MOUNT_LIST}) + set(IPPL_SCRIPTS_UENV "$ENV{UENV_MOUNT_LIST}" + CACHE STRING "UENV to use on alps when running scripts mount") + set(IPPL_SCRIPTS_UENV_VIEW "default" CACHE STRING "View to set in uenv") + colour_message(STATUS ${LightBlue} + "IPPL_SCRIPTS_UENV set from UENV_MOUNT_LIST: ${IPPL_SCRIPTS_UENV}") +else() + # Remove any cached and normal definitions so the variable is completely unset + unset(IPPL_SCRIPTS_UENV CACHE) + unset(IPPL_SCRIPTS_UENV_VIEW CACHE) + colour_message(STATUS ${LightBlue} "No UENV detected") +endif() + # ------------------------------------------------------------------------------ # utility function to get target path/name since we can't use generator expressions to set variables # directly diff --git a/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm b/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm index 4c40a5bc2..14542f1f3 100644 --- a/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm +++ b/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm @@ -12,8 +12,8 @@ #SBATCH --cpus-per-task=72 #SBATCH --exclusive -#SBATCH --uenv=/capstor/store/cscs/cscs/public/uenvs/opal-x-gh200-mpich-gcc-2025-09-28.squashfs -#SBATCH --view=develop +#SBATCH --uenv=@IPPL_SCRIPTS_UENV@ +#SBATCH --view=@IPPL_SCRIPTS_UENV_VIEW@ #SBATCH --output=landau__n_.out #SBATCH --error=landau__n_.error diff --git a/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm b/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm index de29cc9dc..1bd459192 100644 --- a/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm +++ b/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm @@ -13,8 +13,8 @@ #SBATCH --cpus-per-task=48 #SBATCH --exclusive -#SBATCH --uenv=/capstor/scratch/cscs/biddisco/opal-x-mi300-mpich-gcc-2025-10-27.squashfs -#SBATCH --view=default +#SBATCH --uenv=@IPPL_SCRIPTS_UENV@ +#SBATCH --view=@IPPL_SCRIPTS_UENV_VIEW@ #SBATCH --output=landau__n_.out #SBATCH --error=landau__n_.error diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c4778b174..51cfb879b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -35,6 +35,14 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/IpplVersions.h.in add_library(ippl) +if(NOT "${IPPL_LOG_LEVEL_UPPERCASE}" MATCHES "OFF") + target_compile_definitions(ippl PUBLIC IPPL_LOGGING_ENABLED=1) + target_compile_definitions(ippl + PUBLIC SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${IPPL_LOG_LEVEL_UPPERCASE}) + target_link_libraries(ippl PUBLIC spdlog::spdlog $<$:ws2_32>) + target_link_libraries(ippl PUBLIC fmt::fmt) +endif() + target_compile_features(ippl PUBLIC cxx_std_20) target_compile_options( diff --git a/src/Communicate/Archive.h b/src/Communicate/Archive.h index 6ec74f946..ca897ca96 100644 --- a/src/Communicate/Archive.h +++ b/src/Communicate/Archive.h @@ -16,19 +16,20 @@ #include "Types/ViewTypes.h" #include "Types/Vector.h" +#undef IPPL_SIMPLE_VIEW_STORAGE namespace ippl { namespace detail { /*! * @file Archive.h - * Serialize and desesrialize particle attributes. + * Serialize and deserialize particle attributes. * @tparam Properties variadic template for Kokkos::View */ - template + template class Archive { public: - using buffer_type = typename ViewType::view_type; + using buffer_type = BufferType; using pointer_type = typename buffer_type::pointer_type; Archive(size_type size = 0); @@ -73,7 +74,7 @@ namespace ippl { /*! * @returns a pointer to the data of the buffer */ - pointer_type getBuffer() { return buffer_m.data(); } + pointer_type getData() { return buffer_m.data(); } /*! * @returns the size of the buffer @@ -82,10 +83,18 @@ namespace ippl { size_type getBufferSize() const { return buffer_m.size(); } - void resizeBuffer(size_type size) { Kokkos::resize(buffer_m, size); } - - void reallocBuffer(size_type size) { Kokkos::realloc(buffer_m, size); } + void reallocBuffer(size_type size) { +#ifdef IPPL_SIMPLE_VIEW_STORAGE + Kokkos::realloc(buffer_m, size); +#else + buffer_m.reallocBuffer(size); +#endif + } + void resetReadWritePos() { + readpos_m = 0; + writepos_m = 0; + } void resetWritePos() { writepos_m = 0; } void resetReadPos() { readpos_m = 0; } @@ -97,7 +106,7 @@ namespace ippl { //! read position for deserialization size_type readpos_m; //! serialized data - buffer_type buffer_m; + BufferType buffer_m; }; } // namespace detail } // namespace ippl diff --git a/src/Communicate/Archive.hpp b/src/Communicate/Archive.hpp index f1d097a69..1135c8157 100644 --- a/src/Communicate/Archive.hpp +++ b/src/Communicate/Archive.hpp @@ -4,99 +4,116 @@ // #include +#include "Utility/Logging.h" + #include "Archive.h" namespace ippl { namespace detail { - template - Archive::Archive(size_type size) + template + Archive::Archive(size_type size) : writepos_m(0) , readpos_m(0) , buffer_m("buffer", size) {} - template + // ----------------------------------- + // Scalar serialize + template template - void Archive::serialize(const Kokkos::View& view, - size_type nsends) { - using exec_space = typename Kokkos::View::execution_space; - using policy_type = Kokkos::RangePolicy; - - size_t size = sizeof(T); - Kokkos::parallel_for( - "Archive::serialize()", policy_type(0, nsends), - KOKKOS_CLASS_LAMBDA(const size_type i) { - std::memcpy(buffer_m.data() + i * size + writepos_m, view.data() + i, size); - }); + void Archive::serialize(const Kokkos::View& view, + size_type nsends) { + constexpr size_t size = sizeof(T); + char* dst_ptr = (char*)(buffer_m.data()) + writepos_m; + char* src_ptr = (char*)(view.data()); + assert(writepos_m + (nsends * size) <= buffer_m.size()); + // construct temp views of the src/dst buffers of the correct size (bytes) + Kokkos::View src_view(src_ptr, size * nsends); + Kokkos::View dst_view(dst_ptr, size * nsends); + Kokkos::deep_copy(dst_view, src_view); Kokkos::fence(); - writepos_m += size * nsends; + SPDLOG_TRACE("Incrementing writepos: {}, from {}, to {}", (void*)dst_view.data(), + writepos_m, writepos_m + (nsends * size)); + writepos_m += (nsends * size); } - template + // ----------------------------------- + // Vector serialize + template template - void Archive::serialize( - const Kokkos::View*, ViewArgs...>& view, size_type nsends) { - using exec_space = typename Kokkos::View::execution_space; - - size_t size = sizeof(T); - // Default index type for range policies is int64, + void Archive::serialize(const Kokkos::View*, ViewArgs...>& view, + size_type nsends) { + constexpr size_t size = sizeof(T); + char* dst_ptr = (char*)(buffer_m.data()); + ippl::Vector* src_ptr = view.data(); + auto wp = writepos_m; + // The Kokkos range policies expect int64 // so we have to explicitly specify size_type (uint64) + using exec_space = typename Kokkos::View::execution_space; using mdrange_t = Kokkos::MDRangePolicy, Kokkos::IndexType, exec_space>; Kokkos::parallel_for( - "Archive::serialize()", - // The constructor for Kokkos range policies always - // expects int64 regardless of index type provided - // by template parameters, so the typecast is necessary - // to avoid compiler warnings - mdrange_t({0, 0}, {(long int)nsends, Dim}), - KOKKOS_CLASS_LAMBDA(const size_type i, const size_t d) { - std::memcpy(buffer_m.data() + (Dim * i + d) * size + writepos_m, - &(*(view.data() + i))[d], size); + "Archive::serialize()", mdrange_t({0, 0}, {(long int)nsends, Dim}), + KOKKOS_LAMBDA(const size_type i, const size_t d) { + std::memcpy(dst_ptr + (Dim * i + d) * size + wp, &(*(src_ptr + i))[d], size); }); + Kokkos::fence(); writepos_m += Dim * size * nsends; } - template + // ----------------------------------- + // Scalar Deserialize + template template - void Archive::deserialize(Kokkos::View& view, - size_type nrecvs) { - using exec_space = typename Kokkos::View::execution_space; - using policy_type = Kokkos::RangePolicy; - - size_t size = sizeof(T); + void Archive::deserialize(Kokkos::View& view, + size_type nrecvs) { + // if we have to enlarge the destination view if (nrecvs > view.extent(0)) { + SPDLOG_WARN("DeSerialization realloc: {}, from {}, to {}", (void*)view.data(), + view.extent(0), nrecvs); Kokkos::realloc(view, nrecvs); } - Kokkos::parallel_for( - "Archive::deserialize()", policy_type(0, nrecvs), - KOKKOS_CLASS_LAMBDA(const size_type i) { - std::memcpy(view.data() + i, buffer_m.data() + i * size + readpos_m, size); - }); - // Wait for deserialization kernel to complete - // (as with serialization kernels) + // + constexpr size_t size = sizeof(T); + char* src_ptr = (char*)(buffer_m.data()) + readpos_m; + char* dst_ptr = (char*)(view.data()); + assert(readpos_m + (nrecvs * size) <= buffer_m.size()); + // construct temp views of the src/dst buffers of the correct size (bytes) + Kokkos::View src_view(src_ptr, size * nrecvs); + Kokkos::View dst_view(dst_ptr, size * nrecvs); + Kokkos::deep_copy(dst_view, src_view); Kokkos::fence(); - readpos_m += size * nrecvs; + SPDLOG_TRACE("Incrementing readpos: {}, from {}, to {}", (void*)buffer_m.data(), + readpos_m, readpos_m + (nrecvs * size)); + readpos_m += (nrecvs * size); } - template + // ----------------------------------- + // Vecto Deserialize + template template - void Archive::deserialize(Kokkos::View*, ViewArgs...>& view, - size_type nrecvs) { - using exec_space = typename Kokkos::View::execution_space; - - size_t size = sizeof(T); + void Archive::deserialize(Kokkos::View*, ViewArgs...>& view, + size_type nrecvs) // + { + // if we have to enlarge the destination view if (nrecvs > view.extent(0)) { + SPDLOG_WARN("DeSerialization realloc: {}, from {}, to {}", (void*)view.data(), + view.extent(0), nrecvs); Kokkos::realloc(view, nrecvs); } + // + constexpr size_t size = sizeof(T); + char* src_ptr = (char*)(buffer_m.data()); + ippl::Vector* dst_ptr = view.data(); + auto rp = readpos_m; + using exec_space = typename Kokkos::View::execution_space; using mdrange_t = Kokkos::MDRangePolicy, Kokkos::IndexType, exec_space>; Kokkos::parallel_for( "Archive::deserialize()", mdrange_t({0, 0}, {(long int)nrecvs, Dim}), - KOKKOS_CLASS_LAMBDA(const size_type i, const size_t d) { - std::memcpy(&(*(view.data() + i))[d], - buffer_m.data() + (Dim * i + d) * size + readpos_m, size); + KOKKOS_LAMBDA(const size_type i, const size_t d) { + std::memcpy(&(*(dst_ptr + i))[d], src_ptr + (Dim * i + d) * size + rp, size); }); Kokkos::fence(); readpos_m += Dim * size * nrecvs; diff --git a/src/Communicate/BufferHandler.h b/src/Communicate/BufferHandler.h index 117c12f22..0655cb677 100644 --- a/src/Communicate/BufferHandler.h +++ b/src/Communicate/BufferHandler.h @@ -4,9 +4,155 @@ #include #include +#include "Types/IpplTypes.h" +#include "Types/ViewTypes.h" + +#include "Utility/Logging.h" +#include "Utility/TypeUtils.h" + #include "Communicate/Archive.h" -namespace ippl { +namespace ippl::comms { + + // --------------------------------------------------------------------- +#ifdef IPPL_SIMPLE_VIEW_STORAGE + template + using communicator_storage = + ippl::detail::ViewType>::view_type; +#else + template + using communicator_storage = + ippl::detail::ViewType>::view_type; +#endif + +#define DEFAULT_BUFFER_ALIGNMENT 1024 + // Here's a simple class that provides an aligned buffer, by default on the host + // but we can specialize the constructor/destructor for other memory spaces + template + struct AlignedBuffer { + using memory_space = MemorySpace; + void* ptrOriginal{nullptr}; + void* ptrAligned{nullptr}; + detail::size_type space{0}; + // + AlignedBuffer() {} + // + AlignedBuffer& operator=(AlignedBuffer&& other) { + ptrOriginal = other.ptrOriginal; + ptrAligned = other.ptrAligned; + space = other.space; + other.ptrOriginal = nullptr; + other.ptrAligned = nullptr; + other.space = 0; + return *this; + } + // + AlignedBuffer(std::size_t size) { + ptrOriginal = std::aligned_alloc(DEFAULT_BUFFER_ALIGNMENT, size); + ptrAligned = ptrOriginal; + space = size; + SPDLOG_TRACE("AlignedBuffer: original {}, aligned {}, size {}, space {}", + (void*)(ptrOriginal), (void*)(ptrAligned), size, space); + // sanity check should always be true when std::align used + assert(space >= size); + } + // + ~AlignedBuffer() { + if (ptrOriginal) { + SPDLOG_DEBUG("Destroying host buffer {}", ptrOriginal); + std::free(ptrOriginal); + } + } + }; + + // --------------------------------------------------------------------- +#ifdef KOKKOS_ENABLE_CUDA + // make number a multiple of the alignment + inline std::int64_t to_multiple(std::int64_t num) { + return ((2 * num + (DEFAULT_BUFFER_ALIGNMENT - 1)) & (-DEFAULT_BUFFER_ALIGNMENT)); + } + + // Specialize buffer allocation/free for cuda + template <> + inline AlignedBuffer::AlignedBuffer(std::size_t size) { + void* original; + space = to_multiple(size); + cudaMalloc(&original, space); + if (!original) { + throw std::runtime_error("Error allocating cuda memory in AlignedBuffer"); + } + ptrOriginal = original; + ptrAligned = std::align(DEFAULT_BUFFER_ALIGNMENT, size, original, space); + SPDLOG_TRACE("AlignedBuffer: original {}, aligned {}, size {}, space {}", + (void*)(ptrOriginal), (void*)(ptrAligned), size, space); + // sanity check should always be true when std::align used + assert(space >= size); + } + // + template <> + inline AlignedBuffer::~AlignedBuffer() { + if (ptrOriginal) { + SPDLOG_DEBUG("Destroying cuda buffer {}", ptrOriginal); + cudaFree(ptrOriginal); + } + } +#endif + + template + struct comm_storage_wrapper { + using memory_space = MemorySpace; + using buffer_type = communicator_storage; + using pointer_type = typename buffer_type::pointer_type; + using size_type = detail::size_type; + // + comm_storage_wrapper(const std::string& /*name*/, size_type size) + : view() // we will construct the view manually + , buffer(size) // + { + SPDLOG_TRACE("Construct: view origin {}, aligned {}", (void*)(view.data()), + (void*)(buffer.ptrAligned)); + view = buffer_type((pointer_type)buffer.ptrAligned, size); + assert(view.data() == buffer.ptrAligned); + } + // + size_type size() const { return buffer.space; } + // + pointer_type data() { return view.data(); } + + // Note that this makes no effort to preserve any existing data + void reallocBuffer(size_type newsize) { + // wipe the old memory, before allocating new, (help prevent out-of-space errors) + buffer = AlignedBuffer(); + // allocate new + buffer = AlignedBuffer(newsize); + view = buffer_type((pointer_type)buffer.ptrAligned, newsize); + SPDLOG_DEBUG("Realloc : view {}, aligned {}, size {}, space {}", (void*)(view.data()), + (void*)(buffer.ptrAligned), newsize, buffer.space); + } + // + buffer_type view; + AlignedBuffer buffer; + }; + + // --------------------------------------------- + // archive wrapper around some arbitrary buffer + template + struct rma_archive { + using type = detail::Archive; + }; + + template + using rma_archive_type = rma_archive::type; + +#ifdef IPPL_SIMPLE_VIEW_STORAGE + template + using archive_buffer = rma_archive_type>; +#else + template + using archive_buffer = rma_archive_type>; +#endif /** * @brief Interface for memory buffer handling. @@ -17,11 +163,11 @@ namespace ippl { * * @tparam MemorySpace The memory space type used for buffer allocation. */ - template + template class BufferHandler { public: - using archive_type = ippl::detail::Archive; - using buffer_type = std::shared_ptr; + using archive_type = Buffer; + using buffer_type = std::shared_ptr; using size_type = ippl::detail::size_type; virtual ~BufferHandler() {} @@ -92,11 +238,12 @@ namespace ippl { * @tparam MemorySpace The memory space type for the buffer (e.g., `Kokkos::HostSpace`). */ template - class DefaultBufferHandler : public BufferHandler { + class DefaultBufferHandler : public BufferHandler, MemorySpace> { public: - using typename BufferHandler::archive_type; - using typename BufferHandler::buffer_type; - using typename BufferHandler::size_type; + using buffer_type = + typename BufferHandler, MemorySpace>::buffer_type; + using typename BufferHandler, MemorySpace>::archive_type; + using typename BufferHandler, MemorySpace>::size_type; ~DefaultBufferHandler() override; @@ -106,8 +253,8 @@ namespace ippl { * Requests a memory buffer of the specified size, with the option * to request a buffer larger than the base size by an overallocation * multiplier. If a sufficiently large buffer is available, it is returned. If not, the - * largest free buffer is reallocated. If there are no free buffers available, only then a - * new buffer is allocated. + * largest free buffer is reallocated. If there are no free buffers available, only then + * a new buffer is allocated. * * @param size The required buffer size. * @param overallocation A multiplier to allocate additional buffer space. @@ -163,7 +310,7 @@ namespace ippl { buffer_set_type free_buffers{ &DefaultBufferHandler::bufferSizeComparator}; ///< Set of free buffers }; -} // namespace ippl +} // namespace ippl::comms #include "Communicate/BufferHandler.hpp" diff --git a/src/Communicate/BufferHandler.hpp b/src/Communicate/BufferHandler.hpp index c6d57f0a3..f654dea7f 100644 --- a/src/Communicate/BufferHandler.hpp +++ b/src/Communicate/BufferHandler.hpp @@ -1,7 +1,7 @@ #ifndef IPPL_BUFFER_HANDLER_HPP #define IPPL_BUFFER_HANDLER_HPP -namespace ippl { +namespace ippl::comms { template DefaultBufferHandler::~DefaultBufferHandler() {} @@ -112,6 +112,7 @@ namespace ippl { freeSize_m -= buffer->getBufferSize(); usedSize_m += buffer->getBufferSize(); + buffer->resetReadWritePos(); free_buffers.erase(buffer); used_buffers.insert(buffer); return buffer; @@ -128,6 +129,7 @@ namespace ippl { free_buffers.erase(buffer); buffer->reallocBuffer(requiredSize); + buffer->resetReadWritePos(); used_buffers.insert(buffer); return buffer; @@ -140,9 +142,10 @@ namespace ippl { usedSize_m += newBuffer->getBufferSize(); used_buffers.insert(newBuffer); + newBuffer->resetReadWritePos(); return newBuffer; } -} // namespace ippl +} // namespace ippl::comms #endif diff --git a/src/Communicate/Buffers.cpp b/src/Communicate/Buffers.cpp index 04cbb1808..026633fec 100644 --- a/src/Communicate/Buffers.cpp +++ b/src/Communicate/Buffers.cpp @@ -32,13 +32,13 @@ namespace ippl { } void Communicator::deleteAllBuffers() { - buffer_handlers_m.forAll([](BufferHandler&& bh) { + buffer_handlers_m->forAll([](BufferHandler&& bh) { bh.deleteAllBuffers(); }); } void Communicator::freeAllBuffers() { - buffer_handlers_m.forAll([](BufferHandler&& bh) { + buffer_handlers_m->forAll([](BufferHandler&& bh) { bh.freeAllBuffers(); }); } diff --git a/src/Communicate/Buffers.hpp b/src/Communicate/Buffers.hpp index c08dd2a69..90b477af3 100644 --- a/src/Communicate/Buffers.hpp +++ b/src/Communicate/Buffers.hpp @@ -20,22 +20,33 @@ // exchanging particle data between ranks. // +#include "Utility/Logging.h" +#include "Utility/TypeUtils.h" + namespace ippl { namespace mpi { - template - Communicator::buffer_type Communicator::getBuffer(size_type size, - double overallocation) { - auto& buffer_handler = buffer_handlers_m.get(); + // ----------------------------------- + template + Communicator::buffer_type Communicator::getBuffer(size_type size, + double overallocation) { + using memory_space = BufferType::memory_space; - return buffer_handler.getBuffer(size * sizeof(T), - std::max(overallocation, defaultOveralloc_m)); - } + auto& buffer_handler = buffer_handlers_m->get(); - template - void Communicator::freeBuffer(Communicator::buffer_type buffer) { - auto& buffer_handler = buffer_handlers_m.get(); + auto b = buffer_handler.getBuffer(size * sizeof(T), + std::max(overallocation, defaultOveralloc_m)); + SPDLOG_TRACE("{}, getBuffer {}, buf, {}, size {}", (void*)this, + ippl::debug::print_type(), (void*)(b->getData()), + size * sizeof(T)); + return b; + } + template + void Communicator::freeBuffer(Communicator::buffer_type buffer) { + using memory_space = BufferType::memory_space; + auto& buffer_handler = buffer_handlers_m->get(); + SPDLOG_TRACE("freeBuffer buf, {}", (void*)(buffer->getData())); buffer_handler.freeBuffer(buffer); } diff --git a/src/Communicate/Collectives.hpp b/src/Communicate/Collectives.hpp index d273242ab..0ed2aa69b 100644 --- a/src/Communicate/Collectives.hpp +++ b/src/Communicate/Collectives.hpp @@ -2,62 +2,60 @@ #include "Communicate/Operations.h" -namespace ippl { - namespace mpi { - template - void Communicator::gather(const T* input, T* output, int count, int root) { - MPI_Datatype type = get_mpi_datatype(*input); +namespace ippl::mpi { + template + void Communicator::gather(const T* input, T* output, int count, int root) { + MPI_Datatype type = get_mpi_datatype(*input); - MPI_Gather(const_cast(input), count, type, output, count, type, root, *comm_m); - } + MPI_Gather(const_cast(input), count, type, output, count, type, root, *comm_m); + } - template - void Communicator::scatter(const T* input, T* output, int count, int root) { - MPI_Datatype type = get_mpi_datatype(*input); + template + void Communicator::scatter(const T* input, T* output, int count, int root) { + MPI_Datatype type = get_mpi_datatype(*input); - MPI_Scatter(const_cast(input), count, type, output, count, type, root, *comm_m); - } + MPI_Scatter(const_cast(input), count, type, output, count, type, root, *comm_m); + } - template - void Communicator::reduce(const T* input, T* output, int count, Op, int root) { - MPI_Datatype type = get_mpi_datatype(*input); + template + void Communicator::reduce(const T* input, T* output, int count, Op, int root) { + MPI_Datatype type = get_mpi_datatype(*input); - MPI_Op mpiOp = get_mpi_op(); + MPI_Op mpiOp = get_mpi_op(); - MPI_Reduce(const_cast(input), output, count, type, mpiOp, root, *comm_m); - } + MPI_Reduce(const_cast(input), output, count, type, mpiOp, root, *comm_m); + } - template - void Communicator::reduce(const T& input, T& output, int count, Op op, int root) { - reduce(&input, &output, count, op, root); - } + template + void Communicator::reduce(const T& input, T& output, int count, Op op, int root) { + reduce(&input, &output, count, op, root); + } - template - void Communicator::allreduce(const T* input, T* output, int count, Op) { - MPI_Datatype type = get_mpi_datatype(*input); + template + void Communicator::allreduce(const T* input, T* output, int count, Op) { + MPI_Datatype type = get_mpi_datatype(*input); - MPI_Op mpiOp = get_mpi_op(); + MPI_Op mpiOp = get_mpi_op(); - MPI_Allreduce(const_cast(input), output, count, type, mpiOp, *comm_m); - } + MPI_Allreduce(const_cast(input), output, count, type, mpiOp, *comm_m); + } - template - void Communicator::allreduce(const T& input, T& output, int count, Op op) { - allreduce(&input, &output, count, op); - } + template + void Communicator::allreduce(const T& input, T& output, int count, Op op) { + allreduce(&input, &output, count, op); + } - template - void Communicator::allreduce(T* inout, int count, Op) { - MPI_Datatype type = get_mpi_datatype(*inout); + template + void Communicator::allreduce(T* inout, int count, Op) { + MPI_Datatype type = get_mpi_datatype(*inout); - MPI_Op mpiOp = get_mpi_op(); + MPI_Op mpiOp = get_mpi_op(); - MPI_Allreduce(MPI_IN_PLACE, inout, count, type, mpiOp, *comm_m); - } + MPI_Allreduce(MPI_IN_PLACE, inout, count, type, mpiOp, *comm_m); + } - template - void Communicator::allreduce(T& inout, int count, Op op) { - allreduce(&inout, count, op); - } - } // namespace mpi -} // namespace ippl + template + void Communicator::allreduce(T& inout, int count, Op op) { + allreduce(&inout, count, op); + } +} // namespace ippl::mpi diff --git a/src/Communicate/Communicator.cpp b/src/Communicate/Communicator.cpp index 0ade37738..81376de62 100644 --- a/src/Communicate/Communicator.cpp +++ b/src/Communicate/Communicator.cpp @@ -1,42 +1,56 @@ #include "Communicate/Communicator.h" -namespace ippl { - namespace mpi { - - Communicator::Communicator() - : comm_m(new MPI_Comm(MPI_COMM_WORLD)) { - MPI_Comm_rank(*comm_m, &rank_m); - MPI_Comm_size(*comm_m, &size_m); - } - - Communicator::Communicator(MPI_Comm comm) { - comm_m = std::make_shared(comm); - MPI_Comm_rank(*comm_m, &rank_m); - MPI_Comm_size(*comm_m, &size_m); - } - - Communicator& Communicator::operator=(MPI_Comm comm) { - comm_m = std::make_shared(comm); - MPI_Comm_rank(*comm_m, &rank_m); - MPI_Comm_size(*comm_m, &size_m); - return *this; - } - - Communicator Communicator::Communicator::split(int color, int key) const { - MPI_Comm newcomm; - MPI_Comm_split(*comm_m, color, key, &newcomm); - return Communicator(newcomm); - } - - void Communicator::probe(int source, int tag, Status& status) { - MPI_Probe(source, tag, *comm_m, status); - } - - bool Communicator::iprobe(int source, int tag, Status& status) { - int flag = 0; - MPI_Iprobe(source, tag, *comm_m, &flag, status); - return (flag != 0); +namespace ippl::mpi { + + Communicator::Communicator() + : buffer_handlers_m(get_buffer_handler_instance()) + , comm_m(new MPI_Comm(MPI_COMM_WORLD)) { + MPI_Comm_rank(*comm_m, &rank_m); + MPI_Comm_size(*comm_m, &size_m); + } + + Communicator::Communicator(MPI_Comm comm) { + buffer_handlers_m = get_buffer_handler_instance(); + comm_m = std::make_shared(comm); + MPI_Comm_rank(*comm_m, &rank_m); + MPI_Comm_size(*comm_m, &size_m); + } + + Communicator& Communicator::operator=(MPI_Comm comm) { + buffer_handlers_m = get_buffer_handler_instance(); + comm_m = std::make_shared(comm); + MPI_Comm_rank(*comm_m, &rank_m); + MPI_Comm_size(*comm_m, &size_m); + return *this; + } + + Communicator Communicator::Communicator::split(int color, int key) const { + MPI_Comm newcomm; + MPI_Comm_split(*comm_m, color, key, &newcomm); + return Communicator(newcomm); + } + + void Communicator::probe(int source, int tag, Status& status) { + MPI_Probe(source, tag, *comm_m, status); + } + + bool Communicator::iprobe(int source, int tag, Status& status) { + int flag = 0; + MPI_Iprobe(source, tag, *comm_m, &flag, status); + return (flag != 0); + } + + // --------------------------------------- + // singleton access to buffer manager + // --------------------------------------- + std::shared_ptr Communicator::get_buffer_handler_instance() { + static std::shared_ptr comm_buff_handler_ptr{nullptr}; + if (comm_buff_handler_ptr == nullptr) { + comm_buff_handler_ptr = std::make_shared(); + SPDLOG_DEBUG("BufferHandler new: {}", + ippl::debug::print_type()); } - } // namespace mpi -} // namespace ippl + return comm_buff_handler_ptr; + } +} // namespace ippl::mpi diff --git a/src/Communicate/Communicator.h b/src/Communicate/Communicator.h index 63886368a..114b0b4a8 100644 --- a/src/Communicate/Communicator.h +++ b/src/Communicate/Communicator.h @@ -5,215 +5,218 @@ #ifndef IPPL_MPI_COMMUNICATOR_H #define IPPL_MPI_COMMUNICATOR_H -#include -#include - -#include "Communicate/BufferHandler.h" -#include "Communicate/LoggingBufferHandler.h" -#include "Communicate/Request.h" -#include "Communicate/Status.h" - -//////////////////////////////////////////////// -// For message size check; see below #include #include +#include +#include #include "Utility/TypeUtils.h" #include "Communicate/Archive.h" +#include "Communicate/BufferHandler.h" +#include "Communicate/LogEntry.h" +#include "Communicate/Request.h" +#include "Communicate/Status.h" #include "Communicate/TagMaker.h" #include "Communicate/Tags.h" -//////////////////////////////////////////////////// -namespace ippl { - namespace mpi { - - class Communicator : public TagMaker { - public: - Communicator(); +//////////////////////////////////////////////////// - Communicator(MPI_Comm comm); +namespace ippl::mpi { - Communicator& operator=(MPI_Comm comm); + class Communicator : public TagMaker { + public: + Communicator(); - ~Communicator() = default; + Communicator(MPI_Comm comm); - Communicator split(int color, int key) const; + Communicator& operator=(MPI_Comm comm); - operator const MPI_Comm&() const noexcept { return *comm_m; } + ~Communicator() = default; - int size() const noexcept { return size_m; } + Communicator split(int color, int key) const; - int rank() const noexcept { return rank_m; } + operator const MPI_Comm&() const noexcept { return *comm_m; } - void barrier() { MPI_Barrier(*comm_m); } + int size() const noexcept { return size_m; } - void abort(int errorcode = -1) { MPI_Abort(*comm_m, errorcode); } + int rank() const noexcept { return rank_m; } - /* - * Blocking point-to-point communication - * - */ + void barrier() { MPI_Barrier(*comm_m); } - template - void send(const T& buffer, int count, int dest, int tag); + void abort(int errorcode = -1) { MPI_Abort(*comm_m, errorcode); } - template - void send(const T* buffer, int count, int dest, int tag); + /* + * Blocking point-to-point communication + * + */ - template - void recv(T& output, int count, int source, int tag, Status& status); + template + void send(const T& buffer, int count, int dest, int tag); - template - void recv(T* output, int count, int source, int tag, Status& status); + template + void send(const T* buffer, int count, int dest, int tag); - void probe(int source, int tag, Status& status); + template + void recv(T& output, int count, int source, int tag, Status& status); - /* - * Non-blocking point-to-point communication - * - */ + template + void recv(T* output, int count, int source, int tag, Status& status); - template - void isend(const T& buffer, int count, int dest, int tag, Request& request); + void probe(int source, int tag, Status& status); - template - void isend(const T* buffer, int count, int dest, int tag, Request& request); + /* + * Non-blocking point-to-point communication + * + */ - template - void irecv(T& buffer, int count, int source, int tag, Request& request); + template + void isend(const T& buffer, int count, int dest, int tag, Request& request); - template - void irecv(T* buffer, int count, int source, int tag, Request& request); + template + void isend(const T* buffer, int count, int dest, int tag, Request& request); - bool iprobe(int source, int tag, Status& status); + template + void irecv(T& buffer, int count, int source, int tag, Request& request); - /* - * Collective communication - */ + template + void irecv(T* buffer, int count, int source, int tag, Request& request); - /* Gather the data in the given source container from all other nodes to a - * specific node (default: 0). - */ - template - void gather(const T* input, T* output, int count, int root = 0); + bool iprobe(int source, int tag, Status& status); - /* Scatter the data from all other nodes to a - * specific node (default: 0). - */ - template - void scatter(const T* input, T* output, int count, int root = 0); + /* + * Collective communication + */ - /* Reduce data coming from all nodes to a specific node - * (default: 0). Apply certain operation - * - */ - template - void reduce(const T* input, T* output, int count, Op op, int root = 0); + /* Gather the data in the given source container from all other nodes to a + * specific node (default: 0). + */ + template + void gather(const T* input, T* output, int count, int root = 0); - template - void reduce(const T& input, T& output, int count, Op op, int root = 0); + /* Scatter the data from all other nodes to a + * specific node (default: 0). + */ + template + void scatter(const T* input, T* output, int count, int root = 0); - template - void allreduce(const T* input, T* output, int count, Op op); + /* Reduce data coming from all nodes to a specific node + * (default: 0). Apply certain operation + * + */ + template + void reduce(const T* input, T* output, int count, Op op, int root = 0); - template - void allreduce(const T& input, T& output, int count, Op op); + template + void reduce(const T& input, T& output, int count, Op op, int root = 0); - template - void allreduce(T* inout, int count, Op op); + template + void allreduce(const T* input, T* output, int count, Op op); - template - void allreduce(T& inout, int count, Op op); + template + void allreduce(const T& input, T& output, int count, Op op); - ///////////////////////////////////////////////////////////////////////////////////// - template - using archive_type = detail::Archive; + template + void allreduce(T* inout, int count, Op op); - template - using buffer_type = std::shared_ptr>; + template + void allreduce(T& inout, int count, Op op); - private: - template - using buffer_container_type = LoggingBufferHandler; + private: + template + using buffer_container_type = comms::DefaultBufferHandler; - using buffer_handler_type = - typename detail::ContainerForAllSpaces::type; + using buffer_handler_type = + typename detail::ContainerForAllSpaces::type; - public: - using size_type = detail::size_type; - double getDefaultOverallocation() const { return defaultOveralloc_m; } - void setDefaultOverallocation(double factor); + public: + template + using buffer_type = buffer_container_type::buffer_type; - template - buffer_type getBuffer(size_type size, double overallocation = 1.0); + public: + using size_type = detail::size_type; + double getDefaultOverallocation() const { return defaultOveralloc_m; } + void setDefaultOverallocation(double factor); - void deleteAllBuffers(); - void freeAllBuffers(); + template + buffer_type getBuffer(size_type size, double overallocation = 1.0); - template - void freeBuffer(buffer_type buffer); + void deleteAllBuffers(); + void freeAllBuffers(); - const MPI_Comm& getCommunicator() const noexcept { return *comm_m; } + template + void freeBuffer(buffer_type buffer); - template - void recv(int src, int tag, Buffer& buffer, Archive& ar, size_type msize, - size_type nrecvs) { - // Temporary fix. MPI communication seems to have problems when the - // count argument exceeds the range of int, so large messages should - // be split into smaller messages - if (msize > INT_MAX) { - std::cerr << "Message size exceeds range of int" << std::endl; - this->abort(); - } - MPI_Status status; - MPI_Recv(ar.getBuffer(), msize, MPI_BYTE, src, tag, *comm_m, &status); + const MPI_Comm& getCommunicator() const noexcept { return *comm_m; } - buffer.deserialize(ar, nrecvs); + template + void recv(int src, int tag, Buffer& buffer, Archive& ar, size_type msize, + size_type nrecvs) { + // Temporary fix. MPI communication seems to have problems when the + // count argument exceeds the range of int, so large messages should + // be split into smaller messages + if (msize > INT_MAX) { + std::cerr << "Message size exceeds range of int" << std::endl; + this->abort(); } - - template - void isend(int dest, int tag, Buffer& buffer, Archive& ar, MPI_Request& request, - size_type nsends) { - if (ar.getSize() > INT_MAX) { - std::cerr << "Message size exceeds range of int" << std::endl; - this->abort(); - } - buffer.serialize(ar, nsends); - MPI_Isend(ar.getBuffer(), ar.getSize(), MPI_BYTE, dest, tag, *comm_m, &request); + MPI_Status status; + MPI_Recv(ar.getData(), msize, MPI_BYTE, src, tag, *comm_m, &status); + SPDLOG_DEBUG("Recv buf {}, size {:04}, src {:02}, tag {:04}", (void*)(ar.getData()), + msize, src, tag); + buffer.deserialize(ar, nrecvs); + } + + template + void isend(int dest, int tag, Buffer& buffer, Archive& ar, MPI_Request& request, + size_type nsends) // + { + if (ar.getSize() > INT_MAX) { + std::cerr << "Message size exceeds range of int" << std::endl; + this->abort(); } - - template - void irecv(int src, int tag, Archive& ar, MPI_Request& request, size_type msize) { - if (msize > INT_MAX) { - std::cerr << "Message size exceeds range of int" << std::endl; - this->abort(); - } - MPI_Irecv(ar.getBuffer(), msize, MPI_BYTE, src, tag, *comm_m, &request); + buffer.serialize(ar, nsends); + MPI_Isend(ar.getData(), ar.getSize(), MPI_BYTE, dest, tag, *comm_m, &request); + SPDLOG_DEBUG("Isend buf {}, size {:04}, dst {:02}, tag {:04}, req {}", + (void*)(ar.getData()), ar.getSize(), dest, tag, + static_cast(request)); + } + + template + void irecv(int src, int tag, Archive& ar, MPI_Request& request, size_type msize) { + if (msize > INT_MAX) { + std::cerr << "Message size exceeds range of int" << std::endl; + this->abort(); } - void printLogs(const std::string& filename); + MPI_Irecv(ar.getData(), msize, MPI_BYTE, src, tag, *comm_m, &request); + SPDLOG_DEBUG("Irecv buf {}, size {:04}, src {:02}, tag {:04}, req {}", + (void*)(ar.getData()), msize, src, tag, static_cast(request)); + } + + void printLogs(const std::string& filename); + + private: + std::vector gatherLocalLogs(); + void sendLogsToRank0(const std::vector& localLogs); + std::vector gatherLogsFromAllRanks(const std::vector& localLogs); + void writeLogsToFile(const std::vector& allLogs, const std::string& filename); - private: - std::vector gatherLocalLogs(); - void sendLogsToRank0(const std::vector& localLogs); - std::vector gatherLogsFromAllRanks(const std::vector& localLogs); - void writeLogsToFile(const std::vector& allLogs, const std::string& filename); + std::shared_ptr buffer_handlers_m; + double defaultOveralloc_m = 1.0; - buffer_handler_type buffer_handlers_m; + ///////////////////////////////////////////////////////////////////////////////////// - double defaultOveralloc_m = 1.0; + protected: + std::shared_ptr comm_m; + int size_m; + int rank_m; - ///////////////////////////////////////////////////////////////////////////////////// + public: + std::shared_ptr get_buffer_handler_instance(); + }; - protected: - std::shared_ptr comm_m; - int size_m; - int rank_m; - }; - } // namespace mpi -} // namespace ippl +} // namespace ippl::mpi #include "Communicate/Collectives.hpp" #include "Communicate/PointToPoint.hpp" diff --git a/src/Communicate/CommunicatorLogging.cpp b/src/Communicate/CommunicatorLogging.cpp index 92e821ebd..3c3000695 100644 --- a/src/Communicate/CommunicatorLogging.cpp +++ b/src/Communicate/CommunicatorLogging.cpp @@ -6,119 +6,122 @@ #include "Utility/Inform.h" #include "Communicate/Communicator.h" -#include "Communicate/LogEntry.h" - -namespace ippl { - namespace mpi { - void Communicator::printLogs(const std::string& filename) { - std::vector localLogs = gatherLocalLogs(); - - std::vector allLogs; - if (rank() == 0) { - allLogs = gatherLogsFromAllRanks(localLogs); - } else { - sendLogsToRank0(localLogs); - } +#include "Communicate/LoggingBufferHandler.h" - if (rank() == 0) { - writeLogsToFile(allLogs, filename); - } +namespace ippl::mpi { + void Communicator::printLogs(const std::string& filename) { + std::vector localLogs = gatherLocalLogs(); + + std::vector allLogs; + if (rank() == 0) { + allLogs = gatherLogsFromAllRanks(localLogs); + } else { + sendLogsToRank0(localLogs); + } + + if (rank() == 0) { + writeLogsToFile(allLogs, filename); } + } - std::vector Communicator::gatherLocalLogs() { - std::vector localLogs; + template + struct is_a_logger : std::false_type {}; - buffer_handlers_m.forAll([&](auto& loggingHandler) { + template + struct is_a_logger > : std::true_type {}; + + std::vector Communicator::gatherLocalLogs() { + std::vector localLogs; + if constexpr (is_a_logger::value) { + buffer_handlers_m->forAll([&](auto& loggingHandler) { const auto& logs = loggingHandler.getLogs(); localLogs.insert(localLogs.end(), logs.begin(), logs.end()); }); - - return localLogs; } + return localLogs; + } - void Communicator::sendLogsToRank0(const std::vector& localLogs) { - std::vector buffer = serializeLogs(localLogs); + void Communicator::sendLogsToRank0(const std::vector& localLogs) { + std::vector buffer = serializeLogs(localLogs); - int logSize = buffer.size(); - - this->send(logSize, 1, 0, 0); - this->send(buffer.data(), logSize, 0, 0); - } + int logSize = buffer.size(); - std::vector Communicator::gatherLogsFromAllRanks( - const std::vector& localLogs) { - std::vector allLogs = localLogs; + this->send(logSize, 1, 0, 0); + this->send(buffer.data(), logSize, 0, 0); + } - for (int rank = 1; rank < size_m; ++rank) { - int logSize; - Status status; + std::vector Communicator::gatherLogsFromAllRanks( + const std::vector& localLogs) { + std::vector allLogs = localLogs; - this->recv(logSize, 1, rank, 0, status); + for (int rank = 1; rank < size_m; ++rank) { + int logSize; + Status status; - std::vector buffer(logSize); - this->recv(buffer.data(), logSize, rank, 0, status); + this->recv(logSize, 1, rank, 0, status); - std::vector deserializedLogs = deserializeLogs(buffer); - allLogs.insert(allLogs.end(), deserializedLogs.begin(), deserializedLogs.end()); - } + std::vector buffer(logSize); + this->recv(buffer.data(), logSize, rank, 0, status); - return allLogs; + std::vector deserializedLogs = deserializeLogs(buffer); + allLogs.insert(allLogs.end(), deserializedLogs.begin(), deserializedLogs.end()); } - std::vector serializeLogs(const std::vector& logs) { - std::vector buffer; + return allLogs; + } - for (const auto& logEntry : logs) { - std::vector serializedEntry = logEntry.serialize(); - buffer.insert(buffer.end(), serializedEntry.begin(), serializedEntry.end()); - } + std::vector serializeLogs(const std::vector& logs) { + std::vector buffer; - return buffer; + for (const auto& logEntry : logs) { + std::vector serializedEntry = logEntry.serialize(); + buffer.insert(buffer.end(), serializedEntry.begin(), serializedEntry.end()); } - std::vector deserializeLogs(const std::vector& buffer) { - std::vector logs; - size_t offset = 0; + return buffer; + } - while (offset < buffer.size()) { - LogEntry logEntry = LogEntry::deserialize(buffer, offset); + std::vector deserializeLogs(const std::vector& buffer) { + std::vector logs; + size_t offset = 0; - logs.push_back(logEntry); + while (offset < buffer.size()) { + LogEntry logEntry = LogEntry::deserialize(buffer, offset); - offset += logEntry.serialize().size(); - } - return logs; + logs.push_back(logEntry); + + offset += logEntry.serialize().size(); } + return logs; + } + + void Communicator::writeLogsToFile(const std::vector& allLogs, + const std::string& filename) { + Inform logFile(0, filename.c_str(), Inform::OVERWRITE, 0); + logFile.setOutputLevel(1); - void Communicator::writeLogsToFile(const std::vector& allLogs, - const std::string& filename) { - Inform logFile(0, filename.c_str(), Inform::OVERWRITE, 0); - logFile.setOutputLevel(1); - - logFile << "Timestamp,Method,Rank,MemorySpace,usedSize,FreeSize,Parameters" << endl; - - for (const auto& log : allLogs) { - auto timestamp = std::chrono::duration_cast( - log.timestamp.time_since_epoch()) - .count(); - - logFile << timestamp << "," << log.methodName << "," << log.rank << "," - << log.memorySpace << "," << log.usedSize << "," << log.freeSize; - - logFile << ",\""; - bool first = true; - for (const auto& [key, value] : log.parameters) { - if (!first) { - logFile << "; "; - } - logFile << key << ": " << value; - first = false; + logFile << "Timestamp,Method,Rank,MemorySpace,usedSize,FreeSize,Parameters" << endl; + + for (const auto& log : allLogs) { + auto timestamp = std::chrono::duration_cast( + log.timestamp.time_since_epoch()) + .count(); + + logFile << timestamp << "," << log.methodName << "," << log.rank << "," + << log.memorySpace << "," << log.usedSize << "," << log.freeSize; + + logFile << ",\""; + bool first = true; + for (const auto& [key, value] : log.parameters) { + if (!first) { + logFile << "; "; } - logFile << "\"" << endl; + logFile << key << ": " << value; + first = false; } - - logFile.flush(); + logFile << "\"" << endl; } - } // namespace mpi -} // namespace ippl + logFile.flush(); + } +} // namespace ippl::mpi diff --git a/src/Communicate/CommunicatorLogging.hpp b/src/Communicate/CommunicatorLogging.hpp index 5746c3df9..12caaa4d9 100644 --- a/src/Communicate/CommunicatorLogging.hpp +++ b/src/Communicate/CommunicatorLogging.hpp @@ -5,11 +5,9 @@ #include "Communicate/LogEntry.h" -namespace ippl { - namespace mpi { - std::vector serializeLogs(const std::vector& logs); - std::vector deserializeLogs(const std::vector& buffer); - } // namespace mpi -} // namespace ippl +namespace ippl::mpi { + std::vector serializeLogs(const std::vector& logs); + std::vector deserializeLogs(const std::vector& buffer); +} // namespace ippl::mpi #endif diff --git a/src/Communicate/LoggingBufferHandler.h b/src/Communicate/LoggingBufferHandler.h index f6a0bd5d6..fc3ad2eba 100644 --- a/src/Communicate/LoggingBufferHandler.h +++ b/src/Communicate/LoggingBufferHandler.h @@ -10,7 +10,7 @@ #include "Communicate/BufferHandler.h" #include "Communicate/LogEntry.h" -namespace ippl { +namespace ippl::comms { /** * @class LoggingBufferHandler @@ -28,10 +28,12 @@ namespace ippl { * Instead, it adds logging for monitoring purposes. */ template - class LoggingBufferHandler : public BufferHandler { + class LoggingBufferHandler : public BufferHandler, MemorySpace> { public: - using buffer_type = typename BufferHandler::buffer_type; - using size_type = typename BufferHandler::size_type; + using buffer_type = + typename BufferHandler, MemorySpace>::buffer_type; + using size_type = + typename BufferHandler, MemorySpace>::size_type; /** * @brief Constructs a LoggingBufferHandler with an existing buffer handler. @@ -39,7 +41,9 @@ namespace ippl { * operations. * @param rank The MPI rank for logging purposes, used to identify the source of logs. */ - LoggingBufferHandler(std::shared_ptr> handler, int rank); + LoggingBufferHandler( + std::shared_ptr, MemorySpace>> handler, + int rank); /** * @brief Default constructor, creates an internal `BufferHandler` for managing buffers. @@ -104,7 +108,7 @@ namespace ippl { const std::vector& getLogs() const; private: - std::shared_ptr> + std::shared_ptr, MemorySpace>> handler_m; ///< Internal handler for buffer management. std::vector logEntries_m; ///< Log entries for buffer operations. int rank_m; ///< MPI rank for identifying log sources. @@ -122,7 +126,7 @@ namespace ippl { void logMethod(const std::string& methodName, const std::map& parameters); }; -} // namespace ippl +} // namespace ippl::comms #include "Communicate/LoggingBufferHandler.hpp" diff --git a/src/Communicate/LoggingBufferHandler.hpp b/src/Communicate/LoggingBufferHandler.hpp index 33f4269cd..9517e1de9 100644 --- a/src/Communicate/LoggingBufferHandler.hpp +++ b/src/Communicate/LoggingBufferHandler.hpp @@ -4,11 +4,11 @@ #include #include -namespace ippl { +namespace ippl::comms { template LoggingBufferHandler::LoggingBufferHandler( - std::shared_ptr> handler, int rank) + std::shared_ptr, MemorySpace>> handler, int rank) : handler_m(std::move(handler)) , rank_m(rank) {} @@ -70,6 +70,6 @@ namespace ippl { std::chrono::high_resolution_clock::now()}); } -} // namespace ippl +} // namespace ippl::comms #endif diff --git a/src/Field/HaloCells.h b/src/Field/HaloCells.h index c4d87b9bd..c056177dd 100644 --- a/src/Field/HaloCells.h +++ b/src/Field/HaloCells.h @@ -22,7 +22,7 @@ namespace ippl { template struct FieldBufferData { using view_type = typename detail::ViewType::view_type; - using archive_type = Archive; + using archive_type = comms::archive_buffer; void serialize(archive_type& ar, size_type nsends) { ar.serialize(buffer, nsends); } @@ -139,7 +139,8 @@ namespace ippl { * unpack function call */ template - void exchangeBoundaries(view_type& view, Layout_t* layout, SendOrder order, int nghost = 1); + void exchangeBoundaries(view_type& view, Layout_t* layout, SendOrder order, + int nghost = 1); /*! * Extract the subview of the original data. This does not copy. diff --git a/src/Ippl.cpp b/src/Ippl.cpp index 3892e4ae9..fc9563535 100644 --- a/src/Ippl.cpp +++ b/src/Ippl.cpp @@ -71,6 +71,14 @@ namespace ippl { } auto factor = detail::getNumericalOption(argv[nargs]); Comm->setDefaultOverallocation(factor); + } else if (detail::checkOption(argv[nargs], "--debug", "-g")) { + ++nargs; + if (Comm->rank() == 0) { + std::cout << "Please attach debugger and hit return" << std::endl; + char c; + std::cin >> c; + } + Comm->barrier(); } else if (nargs > 0 && std::strstr(argv[nargs], "--kokkos") == nullptr) { notparsed.push_back(argv[nargs]); } diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index e694585f8..e145152c3 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -43,6 +43,7 @@ namespace ippl { using memory_space = typename view_type::memory_space; using execution_space = typename view_type::execution_space; + using archive_type = comms::archive_buffer; using size_type = detail::size_type; @@ -64,24 +65,22 @@ namespace ippl { void unpack(size_type) override; - void serialize(detail::Archive& ar, size_type nsends) override { - ar.serialize(buf_m, nsends); - } + void serialize(archive_type& ar, size_type nsends) override { ar.serialize(buf_m, nsends); } - void deserialize(detail::Archive& ar, size_type nrecvs) override { + void deserialize(archive_type& ar, size_type nrecvs) override { ar.deserialize(buf_m, nrecvs); } virtual ~ParticleAttrib() = default; - + size_type size() const override { return dview_m.extent(0); } - + size_type packedSize(const size_type count) const override { return count * sizeof(value_type); } - + void resize(size_type n) { Kokkos::resize(dview_m, n); } - + void realloc(size_type n) { Kokkos::realloc(dview_m, n); } void print() { @@ -99,8 +98,8 @@ namespace ippl { const view_type& getView() const { return dview_m; } HostMirror getHostMirror() const { return Kokkos::create_mirror(dview_m); } - - void set_name(const std::string & name_) override { this->name_m = name_; } + + void set_name(const std::string& name_) override { this->name_m = name_; } std::string get_name() const override { return this->name_m; } diff --git a/src/Particle/ParticleAttribBase.h b/src/Particle/ParticleAttribBase.h index 0b02a9739..2fc6c45ff 100644 --- a/src/Particle/ParticleAttribBase.h +++ b/src/Particle/ParticleAttribBase.h @@ -30,17 +30,18 @@ namespace ippl { }; public: - using hash_type = ippl::detail::hash_type; using memory_space = MemorySpace; using execution_space = typename memory_space::execution_space; + using hash_type = detail::hash_type; + using archive_type = comms::archive_buffer; template using with_properties = typename WithMemSpace::type; - ParticleAttribBase(){this->name_m = "UNNAMED_attribute";} + ParticleAttribBase() { this->name_m = "UNNAMED_attribute"; } + + virtual void set_name(const std::string& name_) = 0; - virtual void set_name(const std::string & name_) = 0; - virtual std::string get_name() const = 0; virtual void create(size_type) = 0; @@ -52,9 +53,9 @@ namespace ippl { virtual void unpack(size_type) = 0; - virtual void serialize(Archive& ar, size_type nsends) = 0; + virtual void serialize(archive_type& ar, size_type nsends) = 0; - virtual void deserialize(Archive& ar, size_type nrecvs) = 0; + virtual void deserialize(archive_type& ar, size_type nrecvs) = 0; virtual size_type size() const = 0; @@ -64,7 +65,7 @@ namespace ippl { size_type getParticleCount() const { return *localNum_mp; } virtual void applyPermutation(const hash_type&) = 0; - virtual void internalCopy(const hash_type&) = 0; + virtual void internalCopy(const hash_type&) = 0; protected: const size_type* localNum_mp; diff --git a/src/Particle/ParticleSpatialLayout.hpp b/src/Particle/ParticleSpatialLayout.hpp index 11ec30496..d62a4a54c 100644 --- a/src/Particle/ParticleSpatialLayout.hpp +++ b/src/Particle/ParticleSpatialLayout.hpp @@ -27,7 +27,6 @@ #include "Communicate/Window.h" - namespace ippl { /*! @@ -38,7 +37,7 @@ namespace ippl { */ struct increment_type { size_t count[2]; - + KOKKOS_FUNCTION void init() { count[0] = 0; count[1] = 0; @@ -61,8 +60,7 @@ namespace ippl { ParticleSpatialLayout::ParticleSpatialLayout(FieldLayout& fl, Mesh& mesh) : rlayout_m(fl, mesh) - , flayout_m(fl) - { + , flayout_m(fl) { nRecvs_m.resize(Comm->size()); if (Comm->size() > 1) { window_m.create(*Comm, nRecvs_m.begin(), nRecvs_m.end()); @@ -72,14 +70,13 @@ namespace ippl { template void ParticleSpatialLayout::updateLayout(FieldLayout& fl, Mesh& mesh) { - //flayout_m = fl; + // flayout_m = fl; rlayout_m.changeDomain(fl, mesh); } template template void ParticleSpatialLayout::update(ParticleContainer& pc) { - /* Apply Boundary Conditions */ static IpplTimings::TimerRef ParticleBCTimer = IpplTimings::getTimer("particleBC"); IpplTimings::startTimer(ParticleBCTimer); @@ -89,7 +86,7 @@ namespace ippl { /* Update Timer for the rest of the function */ static IpplTimings::TimerRef ParticleUpdateTimer = IpplTimings::getTimer("updateParticle"); IpplTimings::startTimer(ParticleUpdateTimer); - + int nRanks = Comm->size(); if (nRanks < 2) { return; @@ -103,7 +100,7 @@ namespace ippl { */ // 1. figure out which particles need to go where -> locateParticles(...) ============= // - + static IpplTimings::TimerRef locateTimer = IpplTimings::getTimer("locateParticles"); IpplTimings::startTimer(locateTimer); @@ -112,40 +109,38 @@ namespace ippl { /* The indices correspond to the indices of the local particles, * the values correspond to the ranks to which the particles need to be sent - */ + */ locate_type particleRanks("particles' MPI ranks", localnum); /* The indices are the indices of the particles, - * the boolean values describe whether the particle has left the current rank + * the boolean values describe whether the particle has left the current rank * 0 --> particle valid (inside current rank) * 1 --> particle invalid (left rank) */ bool_type invalidParticles("validity of particles", localnum); /* The indices are the MPI ranks, - * the values are the number of particles are sent to that rank from myrank + * the values are the number of particles are sent to that rank from myrank */ locate_type rankSendCount_dview("rankSendCount Device", nRanks); - /* The indices have no particluar meaning, + /* The indices have no particluar meaning, * the values are the MPI ranks to which we need to send */ locate_type destinationRanks_dview("destinationRanks Device", nRanks); /* nInvalid is the number of invalid particles * nDestinationRanks is the number of MPI ranks we need to send to - */ - auto [nInvalid, nDestinationRanks] = - locateParticles(pc, - particleRanks, invalidParticles, - rankSendCount_dview, destinationRanks_dview); + */ + auto [nInvalid, nDestinationRanks] = locateParticles( + pc, particleRanks, invalidParticles, rankSendCount_dview, destinationRanks_dview); /* Host space copy of rankSendCount_dview */ - auto rankSendCount_hview = + auto rankSendCount_hview = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rankSendCount_dview); - + /* Host Space copy of destinationRanks_dview */ - auto destinationRanks_hview = + auto destinationRanks_hview = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), destinationRanks_dview); IpplTimings::stopTimer(locateTimer); @@ -153,18 +148,18 @@ namespace ippl { // 2. fill send buffer and send particles =============================================== // // 2.1 Remote Memory Access window for one-sided communication - + static IpplTimings::TimerRef preprocTimer = IpplTimings::getTimer("sendPreprocess"); IpplTimings::startTimer(preprocTimer); - - std::fill(nRecvs_m.begin(), nRecvs_m.end(), 0); + + std::fill(nRecvs_m.begin(), nRecvs_m.end(), 0); window_m.fence(0); - - // Prepare RMA window for the ranks we need to send to - for(size_t ridx=0; ridx < nDestinationRanks; ridx++){ + + // Prepare RMA window for the ranks we need to send to + for (size_t ridx = 0; ridx < nDestinationRanks; ridx++) { int rank = destinationRanks_hview[ridx]; - if (rank == Comm->rank()){ + if (rank == Comm->rank()) { // we do not need to send to ourselves continue; } @@ -175,30 +170,29 @@ namespace ippl { IpplTimings::stopTimer(preprocTimer); - // 2.2 Particle Sends + // 2.2 Particle Sends static IpplTimings::TimerRef sendTimer = IpplTimings::getTimer("particleSend"); IpplTimings::startTimer(sendTimer); - + std::vector requests(0); int tag = Comm->next_tag(mpi::tag::P_SPATIAL_LAYOUT, mpi::tag::P_LAYOUT_CYCLE); - for(size_t ridx=0; ridx < nDestinationRanks; ridx++){ + for (size_t ridx = 0; ridx < nDestinationRanks; ridx++) { int rank = destinationRanks_hview[ridx]; - if(rank == Comm->rank()){ - continue; - } + if (rank == Comm->rank()) { + continue; + } hash_type hash("hash", rankSendCount_hview(rank)); fillHash(rank, particleRanks, hash); pc.sendToRank(rank, tag, requests, hash); } - - IpplTimings::stopTimer(sendTimer); + IpplTimings::stopTimer(sendTimer); // 3. Internal destruction of invalid particles ======================================= // - + static IpplTimings::TimerRef destroyTimer = IpplTimings::getTimer("particleDestroy"); IpplTimings::startTimer(destroyTimer); @@ -206,9 +200,9 @@ namespace ippl { Kokkos::fence(); IpplTimings::stopTimer(destroyTimer); - - // 4. Receive Particles ================================================================ // - + + // 4. Receive Particles ================================================================ // + static IpplTimings::TimerRef recvTimer = IpplTimings::getTimer("particleRecv"); IpplTimings::startTimer(recvTimer); @@ -219,13 +213,13 @@ namespace ippl { } IpplTimings::stopTimer(recvTimer); - IpplTimings::startTimer(sendTimer); if (requests.size() > 0) { MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); } IpplTimings::stopTimer(sendTimer); + Comm->freeAllBuffers(); IpplTimings::stopTimer(ParticleUpdateTimer); } @@ -238,9 +232,9 @@ namespace ippl { return ((pos[Idx] > region[Idx].min()) && ...) && ((pos[Idx] <= region[Idx].max()) && ...); }; - - /* Helper function that evaluates the total number of neighbors for the current rank in Dim dimensions. - */ + /* Helper function that evaluates the total number of neighbors for the current rank in Dim + * dimensions. + */ template detail::size_type ParticleSpatialLayout::getNeighborSize( const neighbor_list& neighbors) const { @@ -253,29 +247,29 @@ namespace ippl { return totalSize; } - /** - * @brief This function determines to which rank particles need to be sent after the iteration step. - * It starts by first scanning direct rank neighbors, and only does a global scan if there are still - * unfound particles. It then calculates how many particles need to be sent to each rank and how many - * ranks are sent to in total. + * @brief This function determines to which rank particles need to be sent after the iteration + * step. It starts by first scanning direct rank neighbors, and only does a global scan if there + * are still unfound particles. It then calculates how many particles need to be sent to each + * rank and how many ranks are sent to in total. * * @param pc Particle Container - * @param ranks A vector the length of the number of particles on the current rank, where each value refers - * to the new rank of the particle - * @param invalid A vector marking the particles that need to be sent away, and thus locally deleted - * @param nSends_dview Device view the length of number of ranks, where each value determines the number - * of particles sent to that rank from the current rank + * @param ranks A vector the length of the number of particles on the current rank, where + * each value refers to the new rank of the particle + * @param invalid A vector marking the particles that need to be sent away, and thus + * locally deleted + * @param nSends_dview Device view the length of number of ranks, where each value determines + * the number of particles sent to that rank from the current rank * @param sends_dview Device view for the number of ranks that are sent to from current rank * * @return tuple with the number of particles sent away and the number of ranks sent to */ template template - std::pair ParticleSpatialLayout::locateParticles( - const ParticleContainer& pc, locate_type& ranks, bool_type& invalid, + std::pair + ParticleSpatialLayout::locateParticles( + const ParticleContainer& pc, locate_type& ranks, bool_type& invalid, locate_type& nSends_dview, locate_type& sends_dview) const { - auto positions = pc.R.getView(); region_view_type Regions = rlayout_m.getdLocalRegions(); @@ -291,9 +285,9 @@ namespace ippl { locate_type outsideIds("Particles outside of neighborhood", size_type(pc.getLocalNum())); /// outsideCount: Tracks the number of particles that travelled outside of the neighborhood. - size_type outsideCount = 0; + size_type outsideCount = 0; /// invalidCount: Tracks the number of particles that need to be sent to other ranks. - size_type invalidCount = 0; + size_type invalidCount = 0; /// neighborSize: Size of a neighborhood in D dimentions. const size_type neighborSize = getNeighborSize(neighbors); @@ -301,11 +295,11 @@ namespace ippl { /// neighbors_view: Kokkos view with the IDs of the neighboring MPI ranks. locate_type neighbors_view("Nearest neighbors IDs", neighborSize); - /* red_val: Used to reduce both the number of invalid particles and the number of particles - * outside of the neighborhood (Kokkos::parallel_scan doesn't allow multiple reduction values, so we - * use the helper class increment_type). First element updates InvalidCount, second - * one updates outsideCount. - */ + /* red_val: Used to reduce both the number of invalid particles and the number of particles + * outside of the neighborhood (Kokkos::parallel_scan doesn't allow multiple reduction + * values, so we use the helper class increment_type). First element updates InvalidCount, + * second one updates outsideCount. + */ increment_type red_val; red_val.init(); @@ -317,7 +311,7 @@ namespace ippl { for (const auto& componentNeighbors : neighbors) { for (size_t j = 0; j < componentNeighbors.size(); ++j) { neighbors_mirror(k) = componentNeighbors[j]; - //std::cout << "Neighbor: " << neighbors_mirror(k) << std::endl; + // std::cout << "Neighbor: " << neighbors_mirror(k) << std::endl; k++; } } @@ -338,21 +332,21 @@ namespace ippl { Kokkos::RangePolicy(0, ranks.extent(0)), KOKKOS_LAMBDA(const size_type i, increment_type& val, const bool final) { /* Step 1 - * inCurr: True if the particle hasn't left the current MPI rank. - * inNeighbor: True if the particle is found in a neighboring rank. - * found: True either if inCurr = True or inNeighbor = True. - * increment: Helper variable to update red_val. - */ - bool inCurr = false; + * inCurr: True if the particle hasn't left the current MPI rank. + * inNeighbor: True if the particle is found in a neighboring rank. + * found: True either if inCurr = True or inNeighbor = True. + * increment: Helper variable to update red_val. + */ + bool inCurr = false; bool inNeighbor = false; - bool found = false; + bool found = false; bool increment[2]; - inCurr = positionInRegion(is, positions(i), Regions(myRank)); + inCurr = positionInRegion(is, positions(i), Regions(myRank)); - ranks(i) = inCurr * myRank; - invalid(i) = !inCurr; - found = inCurr || found; + ranks(i) = inCurr * myRank; + invalid(i) = !inCurr; + found = inCurr || found; /// Step 2 for (size_t j = 0; j < neighbors_view.extent(0); ++j) { @@ -360,34 +354,34 @@ namespace ippl { inNeighbor = positionInRegion(is, positions(i), Regions(rank)); - ranks(i) = !(inNeighbor) * ranks(i) + inNeighbor * rank; - found = inNeighbor || found; + ranks(i) = !(inNeighbor)*ranks(i) + inNeighbor * rank; + found = inNeighbor || found; } /// Step 3 - /* isOut: When the last thread has finished the search, checks whether the particle has been found - * either in the current rank or in a neighboring one. - * Used to avoid race conditions when updating outsideIds. + /* isOut: When the last thread has finished the search, checks whether the particle + * has been found either in the current rank or in a neighboring one. Used to avoid + * race conditions when updating outsideIds. */ - if(final && !found) { + if (final && !found) { outsideIds(val.count[1]) = i; } - //outsideIds(val.count[1]) = i * isOut; + // outsideIds(val.count[1]) = i * isOut; increment[0] = invalid(i); increment[1] = !found; val += increment; - }, red_val); Kokkos::fence(); - invalidCount = red_val.count[0]; - outsideCount = red_val.count[1]; + invalidCount = red_val.count[0]; + outsideCount = red_val.count[1]; IpplTimings::stopTimer(neighborSearch); - /// Step 4 - static IpplTimings::TimerRef nonNeighboringParticles = IpplTimings::getTimer("nonNeighboringParticles"); + /// Step 4 + static IpplTimings::TimerRef nonNeighboringParticles = + IpplTimings::getTimer("nonNeighboringParticles"); IpplTimings::startTimer(nonNeighboringParticles); if (outsideCount > 0) { Kokkos::parallel_for( @@ -396,37 +390,35 @@ namespace ippl { KOKKOS_LAMBDA(const size_t i, const size_type j) { /// pID: (local) ID of the particle that is currently being searched. size_type pId = outsideIds(i); - + /// inRegion: Checks whether particle pID is inside region j. bool inRegion = positionInRegion(is, positions(pId), Regions(j)); - if(inRegion){ + if (inRegion) { ranks(pId) = j; - } + } }); Kokkos::fence(); } IpplTimings::stopTimer(nonNeighboringParticles); - - Kokkos::parallel_for("Calculate nSends", - Kokkos::RangePolicy(0, ranks.extent(0)), - KOKKOS_LAMBDA(const size_t i){ - size_type rank = ranks(i); - Kokkos::atomic_fetch_add(&nSends_dview(rank),1); - } - ); - - // Number of Ranks we need to send to + + Kokkos::parallel_for( + "Calculate nSends", Kokkos::RangePolicy(0, ranks.extent(0)), + KOKKOS_LAMBDA(const size_t i) { + size_type rank = ranks(i); + Kokkos::atomic_fetch_add(&nSends_dview(rank), 1); + }); + + // Number of Ranks we need to send to Kokkos::View rankSends("Number of Ranks we need to send to"); - - Kokkos::parallel_for("Calculate sends", - Kokkos::RangePolicy(0, nSends_dview.extent(0)), - KOKKOS_LAMBDA(const size_t rank){ - if(nSends_dview(rank) != 0){ - size_type index = Kokkos::atomic_fetch_add(&rankSends(), 1); - sends_dview(index) = rank; - } - } - ); + + Kokkos::parallel_for( + "Calculate sends", Kokkos::RangePolicy(0, nSends_dview.extent(0)), + KOKKOS_LAMBDA(const size_t rank) { + if (nSends_dview(rank) != 0) { + size_type index = Kokkos::atomic_fetch_add(&rankSends(), 1); + sends_dview(index) = rank; + } + }); size_type temp; Kokkos::deep_copy(temp, rankSends); @@ -454,7 +446,6 @@ namespace ippl { } }); Kokkos::fence(); - } template diff --git a/src/Utility/IpplTimings.cpp b/src/Utility/IpplTimings.cpp index 2648fe50f..8d75823fc 100644 --- a/src/Utility/IpplTimings.cpp +++ b/src/Utility/IpplTimings.cpp @@ -35,20 +35,22 @@ #ifdef IPPL_ENABLE_NSYS_PROFILER #include "nvtx3/nvToolsExt.h" -const uint32_t colors[] = { 0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, 0xff00ffff, 0xffff0000, 0xffffffff }; -const int num_colors = sizeof(colors)/sizeof(uint32_t); -#define PUSH_RANGE(name,cid) { \ - int color_id = cid; \ - color_id = color_id%num_colors;\ - nvtxEventAttributes_t eventAttrib = {0}; \ - eventAttrib.version = NVTX_VERSION; \ - eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \ - eventAttrib.colorType = NVTX_COLOR_ARGB; \ - eventAttrib.color = colors[color_id]; \ - eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \ - eventAttrib.message.ascii = name; \ - nvtxRangePushEx(&eventAttrib); \ -} +const uint32_t colors[] = {0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, + 0xff00ffff, 0xffff0000, 0xffffffff}; +const int num_colors = sizeof(colors) / sizeof(uint32_t); +#define PUSH_RANGE(name, cid) \ + { \ + int color_id = cid; \ + color_id = color_id % num_colors; \ + nvtxEventAttributes_t eventAttrib = {0}; \ + eventAttrib.version = NVTX_VERSION; \ + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \ + eventAttrib.colorType = NVTX_COLOR_ARGB; \ + eventAttrib.color = colors[color_id]; \ + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \ + eventAttrib.message.ascii = name; \ + nvtxRangePushEx(&eventAttrib); \ + } #endif Timing* IpplTimings::instance = new Timing(); @@ -88,9 +90,9 @@ Timing::TimerRef Timing::getTimer(const char* nm) { void Timing::startTimer(TimerRef t) { if (t >= TimerList.size()) return; - #ifdef IPPL_ENABLE_NSYS_PROFILER +#ifdef IPPL_ENABLE_NSYS_PROFILER PUSH_RANGE(TimerList[t]->name.c_str(), (int)t); - #endif +#endif TimerList[t]->start(); } @@ -98,9 +100,9 @@ void Timing::startTimer(TimerRef t) { void Timing::stopTimer(TimerRef t) { if (t >= TimerList.size()) return; - #ifdef IPPL_ENABLE_NSYS_PROFILER +#ifdef IPPL_ENABLE_NSYS_PROFILER nvtxRangePop(); - #endif +#endif TimerList[t]->stop(); } @@ -111,6 +113,12 @@ void Timing::clearTimer(TimerRef t) { TimerList[t]->clear(); } +const std::string& Timing::timerName(TimerRef t) { + if (t >= TimerList.size()) + return EmptyName; + return TimerList[t]->name; +} + // print out the timing results void Timing::print() { if (TimerList.size() < 1) diff --git a/src/Utility/IpplTimings.h b/src/Utility/IpplTimings.h index aea1332cd..e2ea93c5b 100644 --- a/src/Utility/IpplTimings.h +++ b/src/Utility/IpplTimings.h @@ -31,6 +31,7 @@ #include #include +#include "Utility/Logging.h" #include "Utility/PAssert.h" #include "Utility/Timer.h" #include "Utility/my_auto_ptr.h" @@ -117,6 +118,9 @@ struct Timing { // clear a timer, by turning it off and throwing away its time void clearTimer(TimerRef); + // access the timer's name + const std::string& timerName(TimerRef); + // return a TimerInfo struct by asking for the name TimerInfo* infoTimer(const char* nm) { return TimerMap[std::string(nm)]; } @@ -136,6 +140,8 @@ struct Timing { // a map of timers, keyed by string TimerMap_t TimerMap; + + std::string EmptyName; }; class IpplTimings { @@ -150,10 +156,16 @@ class IpplTimings { static TimerRef getTimer(const char* nm) { return instance->getTimer(nm); } // start a timer - static void startTimer(TimerRef t) { instance->startTimer(t); } + static void startTimer(TimerRef t) { + SPDLOG_TRACE("Starting timer [{}]", instance->timerName(t)); + instance->startTimer(t); + } // stop a timer, and accumulate it's values - static void stopTimer(TimerRef t) { instance->stopTimer(t); } + static void stopTimer(TimerRef t) { + SPDLOG_TRACE("Stopping timer [{}]", instance->timerName(t)); + instance->stopTimer(t); + } // clear a timer, by turning it off and throwing away its time static void clearTimer(TimerRef t) { instance->clearTimer(t); } diff --git a/src/Utility/Logging.h b/src/Utility/Logging.h new file mode 100644 index 000000000..99bc2d08b --- /dev/null +++ b/src/Utility/Logging.h @@ -0,0 +1,39 @@ +#pragma once + +#if defined(IPPL_LOGGING_ENABLED) + +#include +#include +#include +#include +#include +#include "Utility/PrintType.h" + +template +struct scoped_var { + // capture tuple elements by reference - no temp vars in constructor please + std::tuple const message_; + // + explicit scoped_var(Args const&... args) + : message_(args...) // + { + SPDLOG_CRITICAL("SCOPE >> enter << {}", message_); + } + + ~scoped_var() { SPDLOG_CRITICAL("SCOPE << leave >> {}", message_); } +}; +#define SPDLOG_SCOPE(...) scoped_var scope(__VA_ARGS__); + +#else + +// In increasing level +#define SPDLOG_TRACE(...) +#define SPDLOG_DEBUG(...) +#define SPDLOG_INFO(...) +#define SPDLOG_WARN(...) +#define SPDLOG_ERROR(...) +#define SPDLOG_CRITICAL(...) +// +#define SPDLOG_SCOPE(...) + +#endif diff --git a/src/Utility/PrintType.h b/src/Utility/PrintType.h new file mode 100644 index 000000000..c250c3d8a --- /dev/null +++ b/src/Utility/PrintType.h @@ -0,0 +1,70 @@ +#pragma once + +#include +#include +#include +#include +#include + +// gcc and clang both provide this heaader +#if __has_include() +#include +using cxxabi_supported__ = std::true_type; +#else +using cxxabi_supported__ = std::false_type; +// create some dummmy function to make the compiler happy in the true_type instantiation +namespace abi { + template + char* __cxa_demangle(Ts... ts) { + return nullptr; + } +} // namespace abi +#endif + +// -------------------------------------------------------------------- +namespace ippl::debug::detail { + // default : use built-in typeid to get the best info we can + template + struct demangle_helper { + char const* type_id() const { return typeid(T).name(); } + }; + + // if available : demangle an arbitrary c++ type using gnu utility + template + struct demangle_helper { + demangle_helper() + : demangled_{abi::__cxa_demangle(typeid(T).name(), nullptr, nullptr, nullptr), + std::free} {} + + char const* type_id() const { return demangled_ ? demangled_.get() : typeid(T).name(); } + + private: + std::unique_ptr demangled_; + }; + + template + using cxx_type_id = demangle_helper; +} // namespace ippl::debug::detail + +// -------------------------------------------------------------------- +// print type information +// usage : std::cout << debug::print_type("separator") +// separator is appended if the number of types > 1 +// -------------------------------------------------------------------- +namespace ippl::debug { + template // print a single type + inline std::string print_type(char const* = "") { + return std::string(detail::cxx_type_id().type_id()); + } + + template <> // fallback for an empty type + inline std::string print_type<>(char const*) { + return "<>"; + } + + template // print a list of types + inline std::enable_if_t print_type(char const* delim = "") { + std::string temp(print_type()); + return temp + delim + print_type(delim); + } +} // namespace ippl::debug diff --git a/unit_tests/Communicate/BufferHandler.cpp b/unit_tests/Communicate/BufferHandler.cpp index eef387da9..15b1b6bf2 100644 --- a/unit_tests/Communicate/BufferHandler.cpp +++ b/unit_tests/Communicate/BufferHandler.cpp @@ -14,10 +14,10 @@ class TypedBufferHandlerTest : public ::testing::Test { protected: using memory_space = MemorySpace; - class TestableBufferHandler : public ippl::DefaultBufferHandler { + class TestableBufferHandler : public ippl::comms::DefaultBufferHandler { public: - using ippl::DefaultBufferHandler::deleteAllBuffers; - using ippl::DefaultBufferHandler::freeAllBuffers; + using ippl::comms::DefaultBufferHandler::deleteAllBuffers; + using ippl::comms::DefaultBufferHandler::freeAllBuffers; size_t usedBuffersSize() const { return this->used_buffers.size(); } @@ -109,9 +109,9 @@ TYPED_TEST(TypedBufferHandlerTest, GetBuffer_ExactSizeMatch) { // Test: Freeing a buffer that does not exist in the used pool has no effect TYPED_TEST(TypedBufferHandlerTest, FreeNonExistentBuffer) { - auto buffer = this->handler->getBuffer(100, 1.0); - auto newBuffer = - std::make_shared>(200); + using archive_type = ippl::comms::archive_buffer; + auto buffer = this->handler->getBuffer(100, 1.0); + auto newBuffer = std::make_shared(200); this->handler->freeBuffer(newBuffer); EXPECT_EQ(this->handler->usedBuffersSize(), 1); @@ -182,7 +182,8 @@ TYPED_TEST(TypedBufferHandlerTest, GetAllocatedAndFreeSize_AfterDeleteAllBuffers EXPECT_EQ(this->handler->getFreeSize(), 0); } -// Test: Buffer size is correctly accounted for if a free buffer is available but we request a larger one, thus reallocating this one +// Test: Buffer size is correctly accounted for if a free buffer is available but we request a +// larger one, thus reallocating this one TYPED_TEST(TypedBufferHandlerTest, GetAllocatedAndFreeSize_ResizeBufferLargerThanAvailable) { auto smallBuffer = this->handler->getBuffer(50, 1.0); this->handler->freeBuffer(smallBuffer); diff --git a/unit_tests/Communicate/LoggingBufferHandler.cpp b/unit_tests/Communicate/LoggingBufferHandler.cpp index a0db38366..8a1d9f38e 100644 --- a/unit_tests/Communicate/LoggingBufferHandler.cpp +++ b/unit_tests/Communicate/LoggingBufferHandler.cpp @@ -12,14 +12,14 @@ class TypedLoggingBufferHandlerTest : public ::testing::Test { protected: void SetUp() override { rank = 0; - this->bufferHandler = std::make_shared>(); + this->bufferHandler = std::make_shared>(); this->loggingHandler = - std::make_shared>(bufferHandler, rank); + std::make_shared>(bufferHandler, rank); } int rank; - std::shared_ptr> bufferHandler; - std::shared_ptr> loggingHandler; + std::shared_ptr> bufferHandler; + std::shared_ptr> loggingHandler; }; TYPED_TEST_SUITE(TypedLoggingBufferHandlerTest, MemorySpaces);