diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index e572278..14f9e0a 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -11,3 +11,8 @@ add_subdirectory( ${CMAKE_CURRENT_LIST_DIR}/simple-mallocMC ${CMAKE_BINARY_DIR}/examples/simple-mallocMC ) + +add_subdirectory( + ${CMAKE_CURRENT_LIST_DIR}/multi-setup + ${CMAKE_BINARY_DIR}/examples/multi-setup +) diff --git a/examples/multi-setup/CMakeLists.txt b/examples/multi-setup/CMakeLists.txt new file mode 100644 index 0000000..0f773df --- /dev/null +++ b/examples/multi-setup/CMakeLists.txt @@ -0,0 +1,50 @@ +cmake_minimum_required(VERSION 3.14...3.22) + +if(POLICY CMP0167) + cmake_policy(SET CMP0167 NEW) +endif() +project(KitGenBenchExampleMultiSetup LANGUAGES CXX) + +# --- Import tools ---- + +include(../../cmake/tools.cmake) + +# ---- Dependencies ---- + +include(../../cmake/CPM.cmake) + +cpmaddpackage( + NAME nlohmann_json + GITHUB_REPOSITORY nlohmann/json + VERSION 3.11.3 NO_TESTS +) + +cpmaddpackage( + NAME alpaka + GITHUB_REPOSITORY alpaka-group/alpaka + GIT_TAG 1.2.0 +) + +cpmaddpackage(NAME KitGenBench SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/../..) + +# ---- Create standalone executable ---- + +file(GLOB sources CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/source/*.cpp) + +alpaka_add_executable(${PROJECT_NAME} ${sources}) + +set_target_properties( + ${PROJECT_NAME} + PROPERTIES + CXX_STANDARD 20 + OUTPUT_NAME ${PROJECT_NAME} + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS OFF +) + +target_link_libraries( + ${PROJECT_NAME} + KitGenBench::KitGenBench + nlohmann_json::nlohmann_json + alpaka::alpaka +) diff --git a/examples/multi-setup/source/main.cpp b/examples/multi-setup/source/main.cpp new file mode 100644 index 0000000..ae3e22d --- /dev/null +++ b/examples/multi-setup/source/main.cpp @@ -0,0 +1,320 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED +# include +#endif // alpaka_ACC_GPU_CUDA_ENABLE + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nlohmann/json_fwd.hpp" + +using nlohmann::json; +using namespace kitgenbench; + +using Dim = alpaka::DimInt<1>; +using Idx = std::uint32_t; +using AccTag = std::remove_cvref_t(alpaka::EnabledAccTags{}))>; +using Acc = alpaka::TagToAcc; + +namespace kitgenbench::Actions { + [[maybe_unused]] static constexpr int MALLOC = 1; + [[maybe_unused]] static constexpr int FREE = 2; +} // namespace kitgenbench::Actions + +auto makeExecutionDetails() { + auto const platformAcc = alpaka::Platform{}; + auto const dev = alpaka::getDevByIdx(platformAcc, 0); +#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED + cudaDeviceSetLimit(cudaLimitMallocHeapSize, 1024U * 1024U * 1024U); +#endif + uint32_t const numThreadsPerBlock = 256U; + uint32_t const numThreads = 4U * numThreadsPerBlock; + auto workdiv = [numThreads, numThreadsPerBlock]() -> alpaka::WorkDivMembers { + if constexpr (std::is_same_v, alpaka::TagCpuSerial>) { + return {{1U}, {1U}, {numThreads}}; + } else { + return alpaka::WorkDivMembers{ + {numThreads / numThreadsPerBlock}, {numThreadsPerBlock}, {1U}}; + } + }(); + return kitgenbench::ExecutionDetails{workdiv, dev}; +} + +// Reasons for the check to yield the result it yielded. +// `completed` means that the check completed. The result can still be true/false depending on +// whether the obtained value was actually correct. `notApplicable` means that the checks were +// skipped. `nullpointer` means that a nullpointer was given, so the checks couldn't run at all. +enum class Reason { completed, notApplicable, nullpointer }; +using Payload = std::variant, std::pair>; + +template struct SimpleSumLogger { + using Clock = DeviceClock; + + DeviceClock::DurationType mallocDuration; + std::uint32_t mallocCounter{0U}; + + DeviceClock::DurationType freeDuration; + std::uint32_t freeCounter{0U}; + + std::uint32_t nullpointersObtained{0U}; + std::uint32_t failedChecksCounter{0U}; + std::uint32_t invalidCheckResults{0U}; + + template ALPAKA_FN_INLINE ALPAKA_FN_ACC auto call(TAcc const& acc, auto func) { + static_assert( + std::is_same_v, alpaka::Idx>, TAcc>); + auto start = Clock::clock(); + auto result = func(acc); + auto end = Clock::clock(); + + if (std::get<0>(result) == Actions::MALLOC) { + mallocDuration += Clock::duration(start, end); + mallocCounter++; + } + + if (std::get<0>(result) == Actions::FREE) { + freeDuration += Clock::duration(start, end); + freeCounter++; + } + + if (std::get<0>(result) == Actions::CHECK) { + if (std::holds_alternative>(std::get<1>(result))) { + auto [passed, reason] = std::get>(std::get<1>(result)); + if (not passed) { + if (reason == Reason::nullpointer) { + nullpointersObtained++; + } + if (reason == Reason::completed) { + failedChecksCounter++; + } + } + } else { + invalidCheckResults++; + } + } + + return result; + } + + ALPAKA_FN_ACC void accumulate(const auto& acc, const SimpleSumLogger& other) { + alpaka::atomicAdd(acc, &mallocDuration, other.mallocDuration); + alpaka::atomicAdd(acc, &mallocCounter, other.mallocCounter); + alpaka::atomicAdd(acc, &freeDuration, other.freeDuration); + alpaka::atomicAdd(acc, &freeCounter, other.freeCounter); + alpaka::atomicAdd(acc, &nullpointersObtained, other.nullpointersObtained); + alpaka::atomicAdd(acc, &failedChecksCounter, other.failedChecksCounter); + alpaka::atomicAdd(acc, &invalidCheckResults, other.invalidCheckResults); + } + + nlohmann::json generateReport() { +#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, 0); + auto clockRate = prop.clockRate; +#else + auto clockRate = 1; +#endif // ALPAKA_ACC_GPU_CUDA_ENABLED + return { +#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED + {"clock rate [1/ms]", clockRate}, +#endif + {"allocation total time [ms]", mallocDuration / clockRate}, + {"allocation average time [ms]", + mallocDuration / clockRate / (mallocCounter > 0 ? mallocCounter : 1U)}, + {"allocation count", mallocCounter}, + {"deallocation total time [ms]", freeDuration / clockRate}, + {"deallocation average time [ms]", + freeDuration / clockRate / (freeCounter > 0 ? freeCounter : 1U)}, + {"deallocation count ", freeCounter}, + {"failed checks count", failedChecksCounter}, + {"nullpointers count", nullpointersObtained}, + {"invalid check results count", invalidCheckResults}, + }; + } +}; + +template