From 675e173eedde3a6931574990a4b966a829983605 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 14 Oct 2024 12:18:54 +0200 Subject: [PATCH 001/123] Add new Kokkos backend to the backend_types enumeration. --- include/plssvm/backend_types.hpp | 14 +++++++++++++- include/plssvm/detail/cmd/parser_predict.hpp | 2 +- include/plssvm/detail/cmd/parser_train.hpp | 2 +- src/plssvm/backend_types.cpp | 15 +++++++++++---- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/include/plssvm/backend_types.hpp b/include/plssvm/backend_types.hpp index 779062ab5..76b23914b 100644 --- a/include/plssvm/backend_types.hpp +++ b/include/plssvm/backend_types.hpp @@ -44,7 +44,9 @@ enum class backend_type { /** [OpenCL](https://www.khronos.org/opencl/) to target CPUs and GPUs from different vendors. */ opencl, /** [SYCL](https://www.khronos.org/sycl/) to target CPUs and GPUs from different vendors. Currently tested SYCL implementations are [DPC++](https://github.com/intel/llvm) and [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp) (formerly known as hipSYCL). */ - sycl + sycl, + /** [Kokkos](https://github.com/kokkos/kokkos) to target CPUs and GPUs from different vendors. */ + kokkos }; /** @@ -89,6 +91,7 @@ namespace hip { class csvm; } namespace opencl { class csvm; } namespace adaptivecpp { class csvm; } namespace dpcpp { class csvm; } +namespace kokkos { class csvm; } // clang-format on @@ -169,6 +172,15 @@ struct csvm_to_backend_type { constexpr static sycl::implementation_type impl = sycl::implementation_type::dpcpp; }; +/** + * @brief Sets the `value` to `plssvm::backend_type::kokkos` for the Kokkos C-SVM. + */ +template <> +struct csvm_to_backend_type { + /// The enum value representing the Kokkos backend. + constexpr static backend_type value = backend_type::kokkos; +}; + } // namespace detail /// @endcond diff --git a/include/plssvm/detail/cmd/parser_predict.hpp b/include/plssvm/detail/cmd/parser_predict.hpp index 23ba69866..6c0a37c01 100644 --- a/include/plssvm/detail/cmd/parser_predict.hpp +++ b/include/plssvm/detail/cmd/parser_predict.hpp @@ -37,7 +37,7 @@ struct parser_predict { */ parser_predict(int argc, char **argv); - /// The used backend: automatic (depending on the specified target_platforms), OpenMP, CUDA, HIP, OpenCL, or SYCL. + /// The used backend: automatic (depending on the specified target_platforms), OpenMP, CUDA, HIP, OpenCL, SYCL, or Kokkos. backend_type backend{ backend_type::automatic }; /// The target platform: automatic (depending on the used backend), CPUs or GPUs from NVIDIA, AMD, or Intel. target_platform target{ target_platform::automatic }; diff --git a/include/plssvm/detail/cmd/parser_train.hpp b/include/plssvm/detail/cmd/parser_train.hpp index 6a48c8d91..008466863 100644 --- a/include/plssvm/detail/cmd/parser_train.hpp +++ b/include/plssvm/detail/cmd/parser_train.hpp @@ -53,7 +53,7 @@ struct parser_train { /// The multi-class classification strategy used. classification_type classification{ classification_type::oaa }; - /// The used backend: automatic (depending on the specified target_platforms), OpenMP, CUDA, HIP, OpenCL, or SYCL. + /// The used backend: automatic (depending on the specified target_platforms), OpenMP, CUDA, HIP, OpenCL, SYCL, or Kokkos. backend_type backend{ backend_type::automatic }; /// The target platform: automatic (depending on the used backend), CPUs or GPUs from NVIDIA, AMD, or Intel. target_platform target{ target_platform::automatic }; diff --git a/src/plssvm/backend_types.cpp b/src/plssvm/backend_types.cpp index 0d01bb837..8c7f4095c 100644 --- a/src/plssvm/backend_types.cpp +++ b/src/plssvm/backend_types.cpp @@ -47,6 +47,9 @@ std::vector list_available_backends() { #if defined(PLSSVM_HAS_SYCL_BACKEND) available_backends.push_back(backend_type::sycl); #endif +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + available_backends.push_back(backend_type::kokkos); +#endif // automatic is ALWAYS available but AT LEAST ONE other backend must be available in addition PLSSVM_ASSERT(available_backends.size() > 1, "Besides \"automatic\" at least one other backend must be available!"); @@ -58,10 +61,10 @@ backend_type determine_default_backend(const std::vector &availabl // the decision order based on empiric findings using decision_order_type = std::pair>; const std::array decision_order = { - decision_order_type{ target_platform::gpu_nvidia, { backend_type::cuda, backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::stdpar } }, - decision_order_type{ target_platform::gpu_amd, { backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::stdpar } }, - decision_order_type{ target_platform::gpu_intel, { backend_type::sycl, backend_type::opencl, backend_type::stdpar } }, - decision_order_type{ target_platform::cpu, { backend_type::sycl, backend_type::opencl, backend_type::openmp, backend_type::stdpar } } + decision_order_type{ target_platform::gpu_nvidia, { backend_type::cuda, backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::kokkos, backend_type::stdpar } }, + decision_order_type{ target_platform::gpu_amd, { backend_type::hip, backend_type::opencl, backend_type::sycl, backend_type::kokkos, backend_type::stdpar } }, + decision_order_type{ target_platform::gpu_intel, { backend_type::sycl, backend_type::opencl, backend_type::kokkos, backend_type::stdpar } }, + decision_order_type{ target_platform::cpu, { backend_type::sycl, backend_type::kokkos, backend_type::opencl, backend_type::openmp, backend_type::stdpar } } }; // return the default backend based on the previously defined decision order @@ -95,6 +98,8 @@ std::ostream &operator<<(std::ostream &out, const backend_type backend) { return out << "opencl"; case backend_type::sycl: return out << "sycl"; + case backend_type::kokkos: + return out << "kokkos"; } return out << "unknown"; } @@ -118,6 +123,8 @@ std::istream &operator>>(std::istream &in, backend_type &backend) { backend = backend_type::opencl; } else if (str == "sycl") { backend = backend_type::sycl; + } else if (str == "kokkos") { + backend = backend_type::kokkos; } else { in.setstate(std::ios::failbit); } From 4781224b22743dc8363ba01d302c90b11897fd6c Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 14 Oct 2024 12:20:29 +0200 Subject: [PATCH 002/123] Add specialized exception type. --- include/plssvm/backends/Kokkos/exceptions.hpp | 38 +++++++++++++++++++ src/plssvm/backends/Kokkos/exceptions.cpp | 21 ++++++++++ 2 files changed, 59 insertions(+) create mode 100644 include/plssvm/backends/Kokkos/exceptions.hpp create mode 100644 src/plssvm/backends/Kokkos/exceptions.cpp diff --git a/include/plssvm/backends/Kokkos/exceptions.hpp b/include/plssvm/backends/Kokkos/exceptions.hpp new file mode 100644 index 000000000..047b7cad8 --- /dev/null +++ b/include/plssvm/backends/Kokkos/exceptions.hpp @@ -0,0 +1,38 @@ +/** +* @file +* @author Alexander Van Craen +* @author Marcel Breyer +* @copyright 2018-today The PLSSVM project - All Rights Reserved +* @license This file is part of the PLSSVM project which is released under the MIT license. +* See the LICENSE.md file in the project root for full license information. +* +* @brief Implements custom exception classes specific to the Kokkos backend. +*/ + +#ifndef PLSSVM_BACKENDS_KOKKOS_EXCEPTIONS_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_EXCEPTIONS_HPP_ +#pragma once + +#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception +#include "plssvm/exceptions/source_location.hpp" // plssvm::source_location + +#include // std::string + +namespace plssvm::kokkos { + +/** +* @brief Exception type thrown if a problem with the Kokkos backend occurs. +*/ +class backend_exception : public exception { + public: + /** + * @brief Construct a new exception forwarding the exception message and source location to plssvm::exception. + * @param[in] msg the exception's `what()` message + * @param[in] loc the exception's call side information + */ + explicit backend_exception(const std::string &msg, source_location loc = source_location::current()); +}; + +} // namespace plssvm::kokkos + +#endif // PLSSVM_BACKENDS_KOKKOS_EXCEPTIONS_HPP_ diff --git a/src/plssvm/backends/Kokkos/exceptions.cpp b/src/plssvm/backends/Kokkos/exceptions.cpp new file mode 100644 index 000000000..4186e4008 --- /dev/null +++ b/src/plssvm/backends/Kokkos/exceptions.cpp @@ -0,0 +1,21 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/backends/Kokkos/exceptions.hpp" + +#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception +#include "plssvm/exceptions/source_location.hpp" // plssvm::source_location + +#include // std::string + +namespace plssvm::kokkos { + +backend_exception::backend_exception(const std::string &msg, source_location loc) : + ::plssvm::exception{ msg, "kokkos::backend_exception", loc } { } + +} // namespace plssvm::kokkos From 40ec682995aa16d2a32de6498c8d5cc9b099b564 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 14 Oct 2024 12:20:46 +0200 Subject: [PATCH 003/123] Stub implementation of the pinned_memory class. --- .../backends/Kokkos/detail/pinned_memory.hpp | 93 +++++++++++++++++++ .../backends/Kokkos/detail/pinned_memory.cpp | 46 +++++++++ 2 files changed, 139 insertions(+) create mode 100644 include/plssvm/backends/Kokkos/detail/pinned_memory.hpp create mode 100644 src/plssvm/backends/Kokkos/detail/pinned_memory.cpp diff --git a/include/plssvm/backends/Kokkos/detail/pinned_memory.hpp b/include/plssvm/backends/Kokkos/detail/pinned_memory.hpp new file mode 100644 index 000000000..dffb0d1c7 --- /dev/null +++ b/include/plssvm/backends/Kokkos/detail/pinned_memory.hpp @@ -0,0 +1,93 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Small wrapper around RAII enabled TODO. + */ + +#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_PINNED_MEMORY_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_DETAIL_PINNED_MEMORY_HPP_ +#pragma once + +#include "plssvm/backends/host_pinned_memory.hpp" // plssvm::detail::host_pinned_memory +#include "plssvm/matrix.hpp" // plssvm::matrix, plssvm::layout_type + +#include // std::size_t +#include // std::vector + +namespace plssvm::kokkos::detail { + +/** + * @brief A small RAII wrapper class to register/unregister pinned memory. + * @tparam T the type of the data array that should be pinned + */ +template +class [[nodiscard]] pinned_memory final : public ::plssvm::detail::host_pinned_memory { + /// The template base type of the CUDA pinned_memory class. + using base_type = ::plssvm::detail::host_pinned_memory; + + using base_type::is_pinned_; + using base_type::ptr_; + + public: + using typename base_type::value_type; + + /** + * @brief Register the memory managed by the matrix @p matr to use pinned memory. + * @tparam layout the layout type of the matrix + * @param[in] matr the memory to pin + */ + template + explicit pinned_memory(const matrix &matr) : + pinned_memory{ matr.data(), matr.size_padded() } { } + + /** + * @brief Register the memory managed by the vector @p vec to use pinned memory. + * @param[in] vec the memory to pin + */ + explicit pinned_memory(const std::vector &vec); + /** + * @brief Register the memory managed by the pointer @p ptr with @p size to use pinned memory. + * @param[in] ptr the memory to pin + * @param[in] size the number of elements in the memory region to pin (**not** bytes!) + */ + pinned_memory(const T *ptr, std::size_t size); + /** + * @brief Unregister the memory managed by this object. + */ + ~pinned_memory() override; + + /** + * @brief Must provide a memory that should be pinned. + */ + pinned_memory() = delete; + /** + * @brief Delete the copy-constructor. + */ + pinned_memory(const pinned_memory &) = delete; + /** + * @brief Delete the move-constructor. + */ + pinned_memory(pinned_memory &&) noexcept = delete; + /** + * @brief Delete the copy-assignment operator. + * @return `*this` + */ + pinned_memory &operator=(const pinned_memory &) = delete; + /** + * @brief Delete the move-assignment operator. + * @return `*this` + */ + pinned_memory &operator=(pinned_memory &&) noexcept = delete; +}; + +extern template class pinned_memory; +extern template class pinned_memory; + +} // namespace plssvm::kokkos::detail + +#endif // PLSSVM_BACKENDS_KOKKOS_DETAIL_PINNED_MEMORY_HPP_ diff --git a/src/plssvm/backends/Kokkos/detail/pinned_memory.cpp b/src/plssvm/backends/Kokkos/detail/pinned_memory.cpp new file mode 100644 index 000000000..919cbdaa1 --- /dev/null +++ b/src/plssvm/backends/Kokkos/detail/pinned_memory.cpp @@ -0,0 +1,46 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/backends/Kokkos/detail/pinned_memory.hpp" + +#include "plssvm/backends/host_pinned_memory.hpp" // plssvm::detail::host_pinned_memory +#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception + +#include // std::size_t +#include // std::terminate +#include // std::cerr, std::endl +#include // std::vector + +namespace plssvm::kokkos::detail { + +template +pinned_memory::pinned_memory(const std::vector &vec) : + pinned_memory{ vec.data(), vec.size() } { } + +template +pinned_memory::pinned_memory(const T *ptr, const std::size_t size) : + ::plssvm::detail::host_pinned_memory{ ptr } { + this->pin_memory(size * sizeof(T)); +} + +template +pinned_memory::~pinned_memory() { + try { + if (is_pinned_ && ptr_ != nullptr) { + this->unpin_memory(); + } + } catch (const plssvm::exception &e) { + std::cerr << e.what_with_loc() << std::endl; + std::terminate(); + } +} + +template class pinned_memory; +template class pinned_memory; + +} // namespace plssvm::kokkos::detail From dd6999086844851a382713af213b9297dc042270 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 14 Oct 2024 12:21:05 +0200 Subject: [PATCH 004/123] Stub implementation of the Kokkos csvm class. --- include/plssvm/backends/Kokkos/csvm.hpp | 198 ++++++++++++++++++ .../Kokkos/detail/execution_space.hpp | 42 ++++ .../plssvm/backends/Kokkos/detail/utility.hpp | 89 ++++++++ src/plssvm/backends/Kokkos/csvm.cpp | 179 ++++++++++++++++ .../Kokkos/detail/execution_space.cpp | 39 ++++ src/plssvm/backends/Kokkos/detail/utility.cpp | 138 ++++++++++++ 6 files changed, 685 insertions(+) create mode 100644 include/plssvm/backends/Kokkos/csvm.hpp create mode 100644 include/plssvm/backends/Kokkos/detail/execution_space.hpp create mode 100644 include/plssvm/backends/Kokkos/detail/utility.hpp create mode 100644 src/plssvm/backends/Kokkos/csvm.cpp create mode 100644 src/plssvm/backends/Kokkos/detail/execution_space.cpp create mode 100644 src/plssvm/backends/Kokkos/detail/utility.cpp diff --git a/include/plssvm/backends/Kokkos/csvm.hpp b/include/plssvm/backends/Kokkos/csvm.hpp new file mode 100644 index 000000000..524f1bd4b --- /dev/null +++ b/include/plssvm/backends/Kokkos/csvm.hpp @@ -0,0 +1,198 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Defines a C-SVM using the Kokkos backend. + */ + +#ifndef PLSSVM_BACKENDS_KOKKOS_CSVM_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_CSVM_HPP_ +#pragma once + +#include "plssvm/backends/execution_range.hpp" // plssvm::detail::{dim_type, execution_range} +#include "plssvm/backends/gpu_csvm.hpp" // plssvm::detail::gpu_csvm +#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr +#include "plssvm/backends/Kokkos/detail/execution_space.hpp" // plssvm::kokkos::detail::execution_space +#include "plssvm/backends/Kokkos/detail/pinned_memory.hpp" // plssvm::kokkos::detail::pinned_memory +#include "plssvm/csvm.hpp" // plssvm::detail::csvm_backend_exists +#include "plssvm/detail/memory_size.hpp" // plssvm::detail::memory_size +#include "plssvm/detail/type_traits.hpp" // PLSSVM_REQUIRES +#include "plssvm/parameter.hpp" // plssvm::parameter, plssvm::detail::parameter +#include "plssvm/target_platforms.hpp" // plssvm::target_platform + +#include // std::size_t +#include // std::true_type +#include // std::forward +#include // std::vector + +namespace plssvm { + +namespace kokkos { + +/** + * @brief A C-SVM implementation using Kokkos as backend. + */ +class csvm : public ::plssvm::detail::gpu_csvm { + protected: + // protected for the test mock class + /// The template base type of the Kokkos C-SVM class. + using base_type = ::plssvm::detail::gpu_csvm; + + using base_type::data_distribution_; + using base_type::devices_; + + public: + using base_type::device_ptr_type; + using typename base_type::pinned_memory_type; + using typename base_type::queue_type; + + /** + * @brief Construct a new C-SVM using the Kokkos backend with the parameters given through @p params. + * @param[in] params struct encapsulating all possible parameters + * @throws plssvm::exception all exceptions thrown in the base class constructor + * @throws plssvm::kokkos::backend_exception if the requested target is not available + * @throws plssvm::kokkos::backend_exception if no device for the requested target was found + */ + explicit csvm(parameter params = {}); + /** + * @brief Construct a new C-SVM using the Kokkos backend on the @p target platform with the parameters given through @p params. + * @param[in] target the target platform used for this C-SVM + * @param[in] params struct encapsulating all possible SVM parameters + * @throws plssvm::exception all exceptions thrown in the base class constructor + * @throws plssvm::kokkos::backend_exception if the requested target is not available + * @throws plssvm::kokkos::backend_exception if no device for the requested target was found + */ + explicit csvm(target_platform target, parameter params = {}); + + /** + * @brief Construct a new C-SVM using the Kokkos backend and the optionally provided @p named_args. + * @param[in] named_args the additional optional named arguments + * @throws plssvm::exception all exceptions thrown in the base class constructor + * @throws plssvm::kokkos::backend_exception if the requested target is not available + * @throws plssvm::kokkos::backend_exception if no device for the requested target was found + */ + template )> + explicit csvm(Args &&...named_args) : + csvm{ plssvm::target_platform::automatic, std::forward(named_args)... } { } + + /** + * @brief Construct a new C-SVM using the Kokkos backend on the @p target platform and the optionally provided @p named_args. + * @param[in] target the target platform used for this C-SVM + * @param[in] named_args the additional optional named-parameters + * @throws plssvm::exception all exceptions thrown in the base class constructor + * @throws plssvm::kokkos::backend_exception if the requested target is not available + * @throws plssvm::kokkos::backend_exception if no device for the requested target was found + */ + template )> + explicit csvm(const target_platform target, Args &&...named_args) : + base_type{ std::forward(named_args)... } { + this->init(target); + } + + /** + * @copydoc plssvm::csvm::csvm(const plssvm::csvm &) + */ + csvm(const csvm &) = delete; + /** + * @copydoc plssvm::csvm::csvm(plssvm::csvm &&) noexcept + */ + csvm(csvm &&) noexcept = default; + /** + * @copydoc plssvm::csvm::operator=(const plssvm::csvm &) + */ + csvm &operator=(const csvm &) = delete; + /** + * @copydoc plssvm::csvm::operator=(plssvm::csvm &&) noexcept + */ + csvm &operator=(csvm &&) noexcept = default; + /** + * @brief Wait for all operations on all Kokkos devices to finish. + * @details Terminates the program, if any exception is thrown. + */ + ~csvm() override; + + protected: + /** + * @brief Initialize all important states related to the Kokkos backend. + * @param[in] target the target platform to use + * @throws plssvm::kokkos::backend_exception if the requested target is not available + * @throws plssvm::kokkos::backend_exception if no device for the requested target was found + */ + void init(target_platform target); + + /** + * @copydoc plssvm::csvm::get_device_memory + */ + [[nodiscard]] std::vector<::plssvm::detail::memory_size> get_device_memory() const final; + /** + * @copydoc plssvm::csvm::get_max_mem_alloc_size + */ + [[nodiscard]] std::vector<::plssvm::detail::memory_size> get_max_mem_alloc_size() const final; + /** + * @copydoc plssvm::detail::gpu_csvm::get_max_work_group_size + */ + [[nodiscard]] std::size_t get_max_work_group_size(std::size_t device_id) const final; + /** + * @copydoc plssvm::detail::gpu_csvm::get_max_grid_size + */ + [[nodiscard]] ::plssvm::detail::dim_type get_max_grid_size(std::size_t device_id) const override; + + //***************************************************// + // fit // + //***************************************************// + /** + * @copydoc plssvm::detail::gpu_csvm::run_assemble_kernel_matrix_explicit + */ + [[nodiscard]] device_ptr_type run_assemble_kernel_matrix_explicit(std::size_t device_id, const ::plssvm::detail::execution_range &exec, const parameter ¶ms, const device_ptr_type &data_d, const device_ptr_type &q_red_d, real_type QA_cost) const final; + /** + * @copydoc plssvm::detail::gpu_csvm::run_blas_level_3_kernel_explicit + */ + void run_blas_level_3_kernel_explicit(std::size_t device_id, const ::plssvm::detail::execution_range &exec, const ::plssvm::detail::execution_range &mirror_exec, real_type alpha, const device_ptr_type &A_d, const device_ptr_type &B_d, real_type beta, device_ptr_type &C_d) const final; + /** + * @copydoc plssvm::detail::gpu_csvm::run_assemble_kernel_matrix_implicit_blas_level_3 + */ + void run_assemble_kernel_matrix_implicit_blas_level_3(std::size_t device_id, const ::plssvm::detail::execution_range &exec, real_type alpha, const device_ptr_type &A_d, const parameter ¶ms, const device_ptr_type &q_red_d, real_type QA_cost, const device_ptr_type &B_d, device_ptr_type &C_d) const final; + /** + * @copydoc plssvm::detail::gpu_csvm::run_inplace_matrix_addition + */ + void run_inplace_matrix_addition(std::size_t device_id, const ::plssvm::detail::execution_range &exec, device_ptr_type &lhs_d, const device_ptr_type &rhs_d) const override; + /** + * @copydoc plssvm::detail::gpu_csvm::run_inplace_matrix_scale + */ + void run_inplace_matrix_scale(std::size_t device_id, const ::plssvm::detail::execution_range &exec, device_ptr_type &lhs_d, real_type scale) const override; + + //***************************************************// + // predict, score // + //***************************************************// + /** + * @copydoc plssvm::detail::gpu_csvm::run_w_kernel + */ + [[nodiscard]] device_ptr_type run_w_kernel(std::size_t device_id, const ::plssvm::detail::execution_range &exec, const device_ptr_type &alpha_d, const device_ptr_type &sv_d) const final; + /** + * @copydoc plssvm::detail::gpu_csvm::run_predict_kernel + */ + [[nodiscard]] device_ptr_type run_predict_kernel(std::size_t device_id, const ::plssvm::detail::execution_range &exec, const parameter ¶ms, const device_ptr_type &alpha_d, const device_ptr_type &rho_d, const device_ptr_type &sv_or_w_d, const device_ptr_type &predict_points_d) const final; + + /// The used Kokkos execution space. + detail::execution_space space_; +}; + +} // namespace kokkos + +namespace detail { + +/** + * @brief Sets the `value` to `true` since C-SVMs using the Kokkos backend are available. + */ +template <> +struct csvm_backend_exists : std::true_type { }; + +} // namespace detail + +} // namespace plssvm + +#endif // PLSSVM_BACKENDS_KOKKOS_CSVM_HPP_ diff --git a/include/plssvm/backends/Kokkos/detail/execution_space.hpp b/include/plssvm/backends/Kokkos/detail/execution_space.hpp new file mode 100644 index 000000000..8e89975c3 --- /dev/null +++ b/include/plssvm/backends/Kokkos/detail/execution_space.hpp @@ -0,0 +1,42 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Execution space enumeration for the ExecutionSpaces in Kokkos. + */ + +#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_EXECUTION_SPACE_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_DETAIL_EXECUTION_SPACE_HPP_ +#pragma once + +#include "fmt/base.h" // fmt::formatter +#include "fmt/ostream.h" // fmt::ostream_formatter + +#include // std::ostream forward declaration + +namespace plssvm::kokkos::detail { + +enum class execution_space { + cuda, + hip, + sycl, + hpx, + openmp, + openmp_target, + openacc, + threads, + serial +}; + +std::ostream &operator<<(std::ostream &out, execution_space space); + +} // namespace plssvm::kokkos::detail + +template <> +struct fmt::formatter : fmt::ostream_formatter { }; + +#endif // PLSSVM_BACKENDS_KOKKOS_DETAIL_EXECUTION_SPACE_HPP_ diff --git a/include/plssvm/backends/Kokkos/detail/utility.hpp b/include/plssvm/backends/Kokkos/detail/utility.hpp new file mode 100644 index 000000000..3b7a9c706 --- /dev/null +++ b/include/plssvm/backends/Kokkos/detail/utility.hpp @@ -0,0 +1,89 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Utility functions for the Kokkos backend. + */ + +#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_UTILITY_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_DETAIL_UTILITY_HPP_ +#pragma once + +#include "plssvm/backends/Kokkos/detail/execution_space.hpp" // plssvm::kokkos::detail::execution_space +#include "plssvm/target_platforms.hpp" // plssvm::target_platform + +#include "Kokkos_Core.hpp" // TODO: ? + +#include // std::size_t +#include // std::string +#include // std::is_same_v + +namespace plssvm::kokkos::detail { + +template +[[nodiscard]] execution_space determine_execution_space() noexcept { + // determine the execution_space enumeration value based on the provided Kokkos execution space +#if defined(KOKKOS_ENABLE_CUDA) + if constexpr (std::is_same_v) { + return execution_space::cuda; + } +#endif +#if defined(KOKKOS_ENABLE_HIP) + if constexpr (std::is_same_v) { + return execution_space::hip; + } +#endif +#if defined(KOKKOS_ENABLE_SYCL) + if constexpr (std::is_same_v) { + return execution_space::sycl; + } +#endif +#if defined(KOKKOS_ENABLE_HPX) + if constexpr (std::is_same_v) { + return execution_space::hpx; + } +#endif +#if defined(KOKKOS_ENABLE_OPENMP) + if constexpr (std::is_same_v) { + return execution_space::openmp; + } +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + if constexpr (std::is_same_v) { + return execution_space::openmp_target; + } +#endif +#if defined(KOKKOS_ENABLE_OPENACC) + if constexpr (std::is_same_v) { + return execution_space::openacc; + } +#endif +#if defined(KOKKOS_ENABLE_THREADS) + if constexpr (std::is_same_v) { + return execution_space::threads; + } +#endif +#if defined(KOKKOS_ENABLE_SERIAL) + if constexpr (std::is_same_v) { + return execution_space::serial; + } +#endif +} + +[[nodiscard]] target_platform determine_default_target_platform_from_execution_space(execution_space space); + +void check_execution_space_target_platform_combination(execution_space space, target_platform target); + +[[nodiscard]] std::string get_device_name(execution_space space, std::size_t device_id); + +void device_synchronize_all(); + +[[nodiscard]] std::string get_kokkos_version(); + +} // namespace plssvm::kokkos::detail + +#endif // PLSSVM_BACKENDS_KOKKOS_DETAIL_UTILITY_HPP_ diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp new file mode 100644 index 000000000..3c3afb022 --- /dev/null +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -0,0 +1,179 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/backends/Kokkos/csvm.hpp" + +#include "plssvm/backends/Kokkos/detail/execution_space.hpp" // plssvm::kokkos::detail::execution_space +#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::kokkos::detail::get_runtime_version +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/detail/logging.hpp" // plssvm::detail::log +#include "plssvm/detail/tracking/performance_tracker.hpp" // plssvm::detail::tracking::tracking_entry +#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception +#include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level + +#include "Kokkos_Core.hpp" // TODO: + +#include "fmt/format.h" // fmt::format + +#include // std::terminate +#include // std::cout, std::endl +#include // std::iota +#include // std::vector + +namespace plssvm::kokkos { + +csvm::csvm(parameter params) : + csvm{ plssvm::target_platform::automatic, params } { } + +csvm::csvm(target_platform target, parameter params) : + base_type{ params } { + this->init(target); +} + +void csvm::init(const target_platform target) { + // check whether the requested target platform has been enabled + switch (target) { + case target_platform::automatic: + break; + case target_platform::cpu: +#if !defined(PLSSVM_HAS_CPU_TARGET) + throw backend_exception{ fmt::format("Requested target platform '{}' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!", target) }; +#endif + break; + case target_platform::gpu_nvidia: +#if !defined(PLSSVM_HAS_NVIDIA_TARGET) + throw backend_exception{ fmt::format("Requested target platform '{}' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!", target) }; +#endif + break; + case target_platform::gpu_amd: +#if !defined(PLSSVM_HAS_AMD_TARGET) + throw backend_exception{ fmt::format("Requested target platform '{}' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!", target) }; +#endif + break; + case target_platform::gpu_intel: +#if !defined(PLSSVM_HAS_INTEL_TARGET) + throw backend_exception{ fmt::format("Requested target platform '{}' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!", target) }; +#endif + break; + } + + // TODO: document: we ALWAYS use the default execution space + + // set the execution space -> we always only use the Kokkos::DefaultExecutionSpace + space_ = detail::determine_execution_space(); + + plssvm::detail::log(verbosity_level::full, + "\nUsing Kokkos ({}) as backend with the Kokkos::DefaultExecutionSpace \"{}\".\n", + plssvm::detail::tracking::tracking_entry{ "dependencies", "kokkos_version", detail::get_kokkos_version() }, + plssvm::detail::tracking::tracking_entry{ "dependencies", "kokkos_default_execution_space", space_ }); + + // check whether the provided target platform is compatible with the Kokkos execution space + if (target == target_platform::automatic) { + // determine the default target based on the provided Kokkos execution space + target_ = detail::determine_default_target_platform_from_execution_space(space_); + plssvm::detail::log(verbosity_level::full, + "Using {} as automatic target platform.\n", + target_); + } else { + // check whether the provided target platform is compatible with the execution space + // throws a backend exception if the combination is invalid + detail::check_execution_space_target_platform_combination(space_, target); + target_ = target; + } + + // get all available devices wrt the requested target platform + devices_.resize(static_cast::size_type>(Kokkos::num_devices())); + std::iota(devices_.begin(), devices_.end(), 0); + + // throw exception if no CUDA devices could be found + if (devices_.empty()) { + throw backend_exception{ fmt::format("Not devices found for the Kokkos execution space {} with the target platform {}!", space_, target_) }; + } + + // print found Kokkos devices + plssvm::detail::log(verbosity_level::full, + "Found {} Kokkos device(s) for the target platform {}:\n", + plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() }, + plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }); + + std::vector device_names; + device_names.reserve(devices_.size()); + for (typename std::vector::size_type device = 0; device < devices_.size(); ++device) { + const std::string device_name = detail::get_device_name(space_, device); + plssvm::detail::log(verbosity_level::full, + " [{}, {}]\n", + device, + device_name); + device_names.emplace_back(device_name); + } + PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_names })); + plssvm::detail::log(verbosity_level::full | verbosity_level::timing, + "\n"); +} + +csvm::~csvm() { + try { + // be sure that all operations on the Kokkos execution spaces have finished before destruction + detail::device_synchronize_all(); + } catch (const plssvm::exception &e) { + std::cout << e.what_with_loc() << std::endl; + std::terminate(); + } +} + +std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const { + return {}; +} + +std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const { + return {}; +} + +std::size_t csvm::get_max_work_group_size(const std::size_t device_id) const { + return {}; +} + +::plssvm::detail::dim_type csvm::get_max_grid_size([[maybe_unused]] const std::size_t device_id) const { + return {}; +} + +//***************************************************// +// fit // +//***************************************************// + +auto csvm::run_assemble_kernel_matrix_explicit(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const parameter ¶ms, const device_ptr_type &data_d, const device_ptr_type &q_red_d, real_type QA_cost) const -> device_ptr_type { + return {}; +} + +void csvm::run_blas_level_3_kernel_explicit(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const ::plssvm::detail::execution_range &mirror_exec, const real_type alpha, const device_ptr_type &A_d, const device_ptr_type &B_d, const real_type beta, device_ptr_type &C_d) const { +} + +void csvm::run_inplace_matrix_addition(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, device_ptr_type &lhs_d, const device_ptr_type &rhs_d) const { +} + +void csvm::run_inplace_matrix_scale(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, device_ptr_type &lhs_d, const real_type scale) const { +} + +void csvm::run_assemble_kernel_matrix_implicit_blas_level_3(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const real_type alpha, const device_ptr_type &A_d, const parameter ¶ms, const device_ptr_type &q_red, const real_type QA_cost, const device_ptr_type &B_d, device_ptr_type &C_d) const { +} + +//***************************************************// +// predict, score // +//***************************************************// + +auto csvm::run_w_kernel(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const device_ptr_type &alpha_d, const device_ptr_type &sv_d) const -> device_ptr_type { + return {}; +} + +auto csvm::run_predict_kernel(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const parameter ¶ms, const device_ptr_type &alpha_d, const device_ptr_type &rho_d, const device_ptr_type &sv_or_w_d, const device_ptr_type &predict_points_d) const -> device_ptr_type { + return {}; +} + +} // namespace plssvm::kokkos diff --git a/src/plssvm/backends/Kokkos/detail/execution_space.cpp b/src/plssvm/backends/Kokkos/detail/execution_space.cpp new file mode 100644 index 000000000..65afa72b1 --- /dev/null +++ b/src/plssvm/backends/Kokkos/detail/execution_space.cpp @@ -0,0 +1,39 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/backends/Kokkos/detail/execution_space.hpp" + +#include // std::ostream + +namespace plssvm::kokkos::detail { + +std::ostream &operator<<(std::ostream &out, const execution_space space) { + switch (space) { + case execution_space::cuda: + return out << "Cuda"; + case execution_space::hip: + return out << "HIP"; + case execution_space::sycl: + return out << "SYCL"; + case execution_space::hpx: + return out << "HPX"; + case execution_space::openmp: + return out << "OpenMP"; + case execution_space::openmp_target: + return out << "OpenMPTarget"; + case execution_space::openacc: + return out << "OpenACC"; + case execution_space::threads: + return out << "Threads"; + case execution_space::serial: + return out << "Serial"; + } + return out << "unknown"; +} + +} // namespace plssvm::kokkos::detail diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp new file mode 100644 index 000000000..9458bb899 --- /dev/null +++ b/src/plssvm/backends/Kokkos/detail/utility.cpp @@ -0,0 +1,138 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/backends/Kokkos/detail/utility.hpp" + +#include "plssvm/backends/Kokkos/detail/execution_space.hpp" // plssvm::kokkos::detail::execution_space +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT +#include "plssvm/detail/utility.hpp" // plssvm::detail::unreachable +#include "plssvm/target_platforms.hpp" // plssvm::target_platform + +#include "Kokkos_Macros.hpp" + +#if defined(KOKKOS_ENABLE_CUDA) + #include "cuda_runtime.h" // cudaDeviceProp, cudaGetDeviceProperties +#endif +#if defined(KOKKOS_ENABLE_HIP) + #include "hip/hip_runtime_api.h" // HIP runtime functions +#endif + +#include "fmt/core.h" // fmt::format + +#include // std::size_t +#include // std::string + +namespace plssvm::kokkos::detail { + +target_platform determine_default_target_platform_from_execution_space(const execution_space space) { + switch (space) { + case execution_space::cuda: + return target_platform::gpu_nvidia; + case execution_space::hip: + return target_platform::gpu_amd; + case execution_space::sycl: + case execution_space::openmp_target: + case execution_space::openacc: + return target_platform::gpu_nvidia; // TODO: what to return here? + case execution_space::openmp: + case execution_space::hpx: + case execution_space::threads: + case execution_space::serial: + return target_platform::cpu; + } + ::plssvm::detail::unreachable(); +} + +void check_execution_space_target_platform_combination(const execution_space space, const target_platform target) { + PLSSVM_ASSERT(target != target_platform::automatic, "The provided target platform may not be the automatic target platform!"); + + switch (space) { + case execution_space::cuda: + if (target != target_platform::gpu_nvidia) { + throw backend_exception{ fmt::format("The target platform {} is not supported for Kokkos {} execution space!", target, space) }; + } + break; + case execution_space::hip: + if (target != target_platform::gpu_amd) { + throw backend_exception{ fmt::format("The target platform {} is not supported for Kokkos {} execution space!", target, space) }; + } + break; + case execution_space::sycl: + // SYCL may support all target platforms! + // TODO: use SYCL specific functions to check? + case execution_space::openmp_target: + // OpenMP Target Offloading may support all target platforms! + // TODO: use OpenMP Target Offloading specific functions to check? + case execution_space::openacc: + // OpenACC may support all target platforms! + // TODO: use OpenACC Target Offloading specific functions to check? + break; + case execution_space::openmp: + case execution_space::hpx: + case execution_space::threads: + case execution_space::serial: + if (target != target_platform::cpu) { + throw backend_exception{ fmt::format("The target platform {} is not supported for Kokkos {} execution space!", target, space) }; + } + break; + } +} + +// TODO: error checks? + +std::string get_device_name(const execution_space space, const std::size_t device_id) { + // TODO: implement for other backends! + switch (space) { + case execution_space::cuda: +#if defined(KOKKOS_ENABLE_CUDA) + { + cudaDeviceProp prop{}; + cudaGetDeviceProperties(&prop, static_cast(device_id)); + return std::string{ prop.name }; + } +#else + throw backend_exception{ fmt::format("Unsupported Kokkos execution space \"{}\"!", space) }; +#endif + case execution_space::hip: +#if defined(KOKKOS_ENABLE_HIP) + { + hipDeviceProp_t prop{}; + hipGetDeviceProperties(&prop, static_cast(device_id)); + return std::string{ prop.name }; + } +#else + throw backend_exception{ fmt::format("Unsupported Kokkos execution space \"{}\"!", space) }; +#endif + case execution_space::openmp: +#if defined(KOKKOS_ENABLE_HIP) + return "CPU host device"; +#else + throw backend_exception{ fmt::format("Unsupported Kokkos execution space \"{}\"!", space) }; +#endif + case execution_space::sycl: + case execution_space::hpx: + case execution_space::openmp_target: + case execution_space::openacc: + case execution_space::threads: + case execution_space::serial: + throw backend_exception{ fmt::format("Unsupported Kokkos execution space \"{}\"!", space) }; + } + return "unknown"; +} + +void device_synchronize_all() { + Kokkos::DefaultExecutionSpace::impl_static_fence("synchronize all"); +} + +std::string get_kokkos_version() { + // get the Kokkos version + return fmt::format("{}.{}.{}", KOKKOS_VERSION_MAJOR, KOKKOS_VERSION_MINOR, KOKKOS_VERSION_PATCH); +} + +} // namespace plssvm::kokkos::detail From b1dcb5a6dcdedddfbe366757942378a62fca1601 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 14 Oct 2024 12:21:28 +0200 Subject: [PATCH 005/123] Add Kokkos to the build. --- .clang-format | 2 +- CMakeLists.txt | 11 ++++ include/plssvm/core.hpp | 6 ++ include/plssvm/csvm_factory.hpp | 5 ++ src/main_predict.cpp | 22 ++++++++ src/main_train.cpp | 23 ++++++++ src/plssvm/backends/Kokkos/CMakeLists.txt | 67 +++++++++++++++++++++++ 7 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 src/plssvm/backends/Kokkos/CMakeLists.txt diff --git a/.clang-format b/.clang-format index 30a5ef1db..533b9bcab 100644 --- a/.clang-format +++ b/.clang-format @@ -79,7 +79,7 @@ IncludeBlocks: Regroup IncludeCategories: - Regex: '^"plssvm/' Priority: 1 - - Regex: '^"(cuda|hip|CL|sycl|omp)' + - Regex: '^"(cuda|hip|CL|sycl|omp|Kokkos)' Priority: 2 - Regex: '^"(tests|bindings)/' Priority: 3 diff --git a/CMakeLists.txt b/CMakeLists.txt index f0c01a9ec..e293f8e3e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -380,6 +380,13 @@ if (PLSSVM_ENABLE_SYCL_BACKEND MATCHES "AUTO" OR PLSSVM_ENABLE_SYCL_BACKEND) add_subdirectory(src/plssvm/backends/SYCL) endif () +## check for Kokkos backend +set(PLSSVM_ENABLE_KOKKOS_BACKEND AUTO CACHE STRING "Enable SYCL Backend") +set_property(CACHE PLSSVM_ENABLE_KOKKOS_BACKEND PROPERTY STRINGS AUTO ON OFF) +if (PLSSVM_ENABLE_KOKKOS_BACKEND MATCHES "AUTO" OR PLSSVM_ENABLE_KOKKOS_BACKEND) + add_subdirectory(src/plssvm/backends/Kokkos) +endif () + ## check if ANY backend is available/has been enabled get_target_property(PLSSVM_LINKED_BACKENDS ${PLSSVM_ALL_LIBRARY_NAME} INTERFACE_LINK_LIBRARIES) if (NOT PLSSVM_LINKED_BACKENDS) @@ -690,6 +697,10 @@ if (TARGET ${PLSSVM_SYCL_BACKEND_LIBRARY_NAME}) endforeach () list(APPEND PLSSVM_BACKEND_NAME_LIST "sycl") endif () +if (TARGET ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME}) + message(STATUS "${PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING}") + list(APPEND PLSSVM_BACKEND_NAME_LIST "kokkos") +endif () message(STATUS "") ######################################################################################################################## diff --git a/include/plssvm/core.hpp b/include/plssvm/core.hpp index a569d6915..1edf825c1 100644 --- a/include/plssvm/core.hpp +++ b/include/plssvm/core.hpp @@ -107,4 +107,10 @@ using namespace plssvm::PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION; /// Namespace containing the C-SVM using the SYCL backend with the preferred SYCL implementation. **Should not** directly be used by users. namespace plssvm::sycl::detail { } +/// Namespace containing the C-SVM using the Kokkos backend. +namespace plssvm::kokkos { } + +/// Namespace containing Kokkos backend specific implementation details. **Should not** directly be used by users. +namespace plssvm::kokkos::detail { } + #endif // PLSSVM_CORE_HPP_ diff --git a/include/plssvm/csvm_factory.hpp b/include/plssvm/csvm_factory.hpp index 0b923caa0..190ff8984 100644 --- a/include/plssvm/csvm_factory.hpp +++ b/include/plssvm/csvm_factory.hpp @@ -45,6 +45,9 @@ #include "plssvm/backends/SYCL/AdaptiveCpp/csvm.hpp" // plssvm::adaptivecpp::csvm, plssvm::csvm_backend_exists_v #endif #endif +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + #include "plssvm/backends/Kokkos/csvm.hpp" // plssvm::kokkos::csvm, plssvm::csvm_backend_exists_v +#endif #include "fmt/format.h" // fmt::format #include "igor/igor.hpp" // igor::parser, igor::has_unnamed_arguments @@ -138,6 +141,8 @@ template return make_csvm_default_impl(std::forward(args)...); case backend_type::sycl: return make_csvm_sycl_impl(std::forward(args)...); + case backend_type::kokkos: + return make_csvm_default_impl(std::forward(args)...); } throw unsupported_backend_exception{ "Unrecognized backend provided!" }; } diff --git a/src/main_predict.cpp b/src/main_predict.cpp index 31585a758..b4d47cb05 100644 --- a/src/main_predict.cpp +++ b/src/main_predict.cpp @@ -17,6 +17,10 @@ // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME #include "plssvm/detail/utility.hpp" // PLSSVM_IS_DEFINED +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + #include "Kokkos_Core.hpp" // Kokkos::initialize, Kokkos::is_initialized, Kokkos::finalize, Kokkos::is_finalized +#endif + #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED) #include "plssvm/detail/tracking/cpu/hardware_sampler.hpp" // plssvm::detail::tracking::cpu_hardware_sampler #include "plssvm/detail/tracking/hardware_sampler.hpp" // plssvm::detail::tracking::hardware_sampler @@ -72,6 +76,16 @@ int main(int argc, char *argv[]) { // check whether SYCL is used as backend (it is either requested directly or as automatic backend) const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) }; + // check whether Kokkos is used as backend (it is either requested directly or as automatic backend) + const bool use_kokkos_as_backend{ cmd_parser.backend == plssvm::backend_type::kokkos || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::kokkos) }; + + // initialize Kokkos if necessary +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + if (use_kokkos_as_backend) { + Kokkos::initialize(argc, argv); // TODO: set device? + PLSSVM_ASSERT(Kokkos::is_initialized(), "Something went wrong initializing the Kokkos environment!"); + } +#endif // create default csvm const std::unique_ptr svm = use_sycl_as_backend ? plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type) @@ -164,6 +178,14 @@ int main(int argc, char *argv[]) { PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HARDWARE_SAMPLER_ENTRY(*s); }); #endif + + // finalize Kokkos if necessary +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + if (use_kokkos_as_backend) { // TODO: what if an exception occurred? + Kokkos::finalize(); + PLSSVM_ASSERT(Kokkos::is_finalized(), "Something went wrong finalizing the Kokkos environment!"); + } +#endif }; std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(cmd_parser)); diff --git a/src/main_train.cpp b/src/main_train.cpp index 91958bf19..ff4365638 100644 --- a/src/main_train.cpp +++ b/src/main_train.cpp @@ -14,8 +14,13 @@ #include "plssvm/detail/logging.hpp" // plssvm::detail::log #include "plssvm/detail/tracking/performance_tracker.hpp" // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE, // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HARDWARE_SAMPLER_ENTRY, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME +#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT #include "plssvm/detail/utility.hpp" // PLSSVM_IS_DEFINED +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + #include "Kokkos_Core.hpp" // Kokkos::initialize, Kokkos::is_initialized, Kokkos::finalize, Kokkos::is_finalized +#endif + #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED) #include "plssvm/detail/tracking/cpu/hardware_sampler.hpp" // plssvm::detail::tracking::cpu_hardware_sampler #include "plssvm/detail/tracking/hardware_sampler.hpp" // plssvm::detail::tracking::hardware_sampler @@ -69,6 +74,16 @@ int main(int argc, char *argv[]) { // check whether SYCL is used as backend (it is either requested directly or as automatic backend) const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) }; + // check whether Kokkos is used as backend (it is either requested directly or as automatic backend) + const bool use_kokkos_as_backend{ cmd_parser.backend == plssvm::backend_type::kokkos || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::kokkos) }; + + // initialize Kokkos if necessary +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + if (use_kokkos_as_backend) { + Kokkos::initialize(argc, argv); // TODO: set device? + PLSSVM_ASSERT(Kokkos::is_initialized(), "Something went wrong initializing the Kokkos environment!"); + } +#endif // create SVM const std::unique_ptr svm = use_sycl_as_backend ? plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type, plssvm::sycl_kernel_invocation_type = cmd_parser.sycl_kernel_invocation_type) @@ -105,6 +120,14 @@ int main(int argc, char *argv[]) { PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HARDWARE_SAMPLER_ENTRY(*s); }); #endif + + // finalize Kokkos if necessary +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + if (use_kokkos_as_backend) { // TODO: what if an exception occurred + Kokkos::finalize(); + PLSSVM_ASSERT(Kokkos::is_finalized(), "Something went wrong finalizing the Kokkos environment!"); + } +#endif }; std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(cmd_parser)); diff --git a/src/plssvm/backends/Kokkos/CMakeLists.txt b/src/plssvm/backends/Kokkos/CMakeLists.txt new file mode 100644 index 000000000..d7d1037ce --- /dev/null +++ b/src/plssvm/backends/Kokkos/CMakeLists.txt @@ -0,0 +1,67 @@ +## Authors: Alexander Van Craen, Marcel Breyer +## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved +## License: This file is part of the PLSSVM project which is released under the MIT license. +## See the LICENSE.md file in the project root for full license information. +######################################################################################################################## + +list(APPEND CMAKE_MESSAGE_INDENT "Kokkos: ") + +# check if Kokkos can be enabled +message(CHECK_START "Checking for Kokkos backend") + +find_package(Kokkos) + +if (NOT Kokkos_FOUND) + message(CHECK_FAIL "not found") + if (PLSSVM_ENABLE_KOKKOS_BACKEND MATCHES "ON") + message(SEND_ERROR "Cannot find requested backend: Kokkos!") + endif () + return() +endif () +message(CHECK_PASS "found") + +# explicitly set sources +set(PLSSVM_KOKKOS_SOURCES + ${CMAKE_CURRENT_LIST_DIR}/detail/device_ptr.cpp + ${CMAKE_CURRENT_LIST_DIR}/detail/execution_space.cpp + ${CMAKE_CURRENT_LIST_DIR}/detail/pinned_memory.cpp + ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp + ${CMAKE_CURRENT_LIST_DIR}/csvm.cpp + ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp +) + +# set target properties +set_local_and_parent(PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME plssvm-Kokkos) +add_library(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} SHARED ${PLSSVM_KOKKOS_SOURCES}) +target_link_libraries(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC Kokkos::kokkos) + +# link base library against Kokkos library +target_link_libraries(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC ${PLSSVM_BASE_LIBRARY_NAME}) + +# set whether the kernel source should be compiled with fast math enabled or not # TODO: enable fast-math +#if (PLSSVM_ENABLE_FAST_MATH) +# target_compile_definitions(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE PLSSVM_ENABLE_FAST_MATH) +#endif () + +# set compile definition that the Kokkos backend is available +target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_HAS_KOKKOS_BACKEND) +target_compile_definitions(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC PLSSVM_HAS_KOKKOS_BACKEND) + +# link against interface library +target_link_libraries(${PLSSVM_ALL_LIBRARY_NAME} INTERFACE ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME}) + +# mark backend library as install target +append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME}) + +# generate summary string +set(PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_COMPILER " - Kokkos:") +include(${PROJECT_SOURCE_DIR}/cmake/assemble_summary_string.cmake) +assemble_summary_string(PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS) +# do not print any special target architecture information +string(REPLACE " (${PLSSVM_CPU_TARGET_ARCHS})" "" PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS "${PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS}") +string(REPLACE " (${PLSSVM_NVIDIA_TARGET_ARCHS})" "" PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS "${PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS}") +string(REPLACE " (${PLSSVM_AMD_TARGET_ARCHS})" "" PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS "${PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS}") +string(REPLACE " (${PLSSVM_INTEL_TARGET_ARCHS})" "" PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS "${PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS}") +set(PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING "${PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_COMPILER}${PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS}" PARENT_SCOPE) + +list(POP_BACK CMAKE_MESSAGE_INDENT) \ No newline at end of file From 0cf9e36e9cb15fdaf650c5b9e4900217db355eb5 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 14 Oct 2024 12:21:43 +0200 Subject: [PATCH 006/123] Kokkos device_ptr stub tests. --- .../backends/Kokkos/detail/device_ptr.hpp | 147 ++++++++++++++++++ .../backends/Kokkos/detail/device_ptr.cpp | 79 ++++++++++ 2 files changed, 226 insertions(+) create mode 100644 include/plssvm/backends/Kokkos/detail/device_ptr.hpp create mode 100644 src/plssvm/backends/Kokkos/detail/device_ptr.cpp diff --git a/include/plssvm/backends/Kokkos/detail/device_ptr.hpp b/include/plssvm/backends/Kokkos/detail/device_ptr.hpp new file mode 100644 index 000000000..a12021efb --- /dev/null +++ b/include/plssvm/backends/Kokkos/detail/device_ptr.hpp @@ -0,0 +1,147 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Small wrapper around a Kokkos view. + */ + +#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_PTR_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_PTR_HPP_ +#pragma once + +#include "plssvm/backends/gpu_device_ptr.hpp" // plssvm::detail::gpu_device_ptr +#include "plssvm/shape.hpp" // plssvm::shape + +#include "Kokkos_Core.hpp" // TODO: + +#include // std::size_t + +namespace plssvm::kokkos::detail { + +template +using device_view_type = Kokkos::View; + +template +using device_subview_type = Kokkos::Subview; + +template +using host_view_type = Kokkos::View; + +/** + * @brief Small wrapper class around a Kokkos view together with commonly used device functions. + * @tparam T the type of the kernel view to wrap + */ +template +class device_ptr : public ::plssvm::detail::gpu_device_ptr, device_ptr> { + /// The template base type of the Kokkos device_ptr class. + using base_type = ::plssvm::detail::gpu_device_ptr, device_ptr>; + + using base_type::data_; + using base_type::queue_; + using base_type::shape_; + + public: + // Be able to use overloaded base class functions. + using base_type::copy_to_device; + using base_type::copy_to_device_strided; + using base_type::copy_to_host; + using base_type::copy_to_other_device; + using base_type::fill; + using base_type::memset; + + using typename base_type::const_host_pointer_type; + using typename base_type::device_pointer_type; + using typename base_type::host_pointer_type; + using typename base_type::queue_type; + using typename base_type::size_type; + using typename base_type::value_type; + + // TODO: DOKU + + /** + * @brief Default construct a CUDA device_ptr with a size of 0. + * @details Always associated with device 0. + */ + device_ptr() = default; + /** + * @brief Allocates `size * sizeof(T)` bytes on the device with ID @p device. + * @param[in] size the number of elements represented by the device_ptr + * @param[in] device the associated CUDA device + * @throws plssvm::cuda::backend_exception if the given device ID is smaller than 0 or greater or equal than the available number of devices + */ + explicit device_ptr(size_type size, int device); + /** + * @brief Allocates `shape.x * shape.y * sizeof(T)` bytes on the device with ID @p device. + * @param[in] shape the number of elements represented by the device_ptr + * @param[in] device the associated CUDA device + * @throws plssvm::cuda::backend_exception if the given device ID is smaller than 0 or greater or equal than the available number of devices + */ + explicit device_ptr(plssvm::shape shape, int device); + /** + * @brief Allocates `(shape.x + padding.x) * (shape.y + padding.y) * sizeof(T)` bytes on the device with ID @p device. + * @param[in] shape the number of elements represented by the device_ptr + * @param[in] padding the number of padding elements added to the extent values + * @param[in] device the associated CUDA device + * @throws plssvm::cuda::backend_exception if the given device ID is smaller than 0 or greater or equal than the available number of devices + */ + device_ptr(plssvm::shape shape, plssvm::shape padding, int device); + + /** + * @copydoc plssvm::detail::gpu_device_ptr::gpu_device_ptr(const plssvm::detail::gpu_device_ptr &) + */ + device_ptr(const device_ptr &) = delete; + /** + * @copydoc plssvm::detail::gpu_device_ptr::gpu_device_ptr(plssvm::detail::gpu_device_ptr &&) + */ + device_ptr(device_ptr &&other) noexcept = default; + + /** + * @copydoc plssvm::detail::gpu_device_ptr::operator=(const plssvm::detail::gpu_device_ptr &) + */ + device_ptr &operator=(const device_ptr &) = delete; + /** + * @copydoc plssvm::detail::gpu_device_ptr::operator=(plssvm::detail::gpu_device_ptr &&) + */ + device_ptr &operator=(device_ptr &&other) noexcept = default; + + /** + * @copydoc plssvm::detail::gpu_device_ptr::~gpu_device_ptr() + */ + ~device_ptr() override; + + /** + * @copydoc plssvm::detail::gpu_device_ptr::memset(int, size_type, size_type) + */ + void memset(int pattern, size_type pos, size_type num_bytes) override; + /** + * @copydoc plssvm::detail::gpu_device_ptr::fill(value_type, size_type, size_type) + */ + void fill(value_type value, size_type pos, size_type count) override; + /** + * @copydoc plssvm::detail::gpu_device_ptr::copy_to_device(const_host_pointer_type, size_type, size_type) + */ + void copy_to_device(const_host_pointer_type data_to_copy, size_type pos, size_type count) override; + /** + * @copydoc plssvm::detail::gpu_device_ptr::copy_to_device_strided(const_host_pointer_type, std::size_t, std::size_t, std::size_t) + */ + void copy_to_device_strided(const_host_pointer_type data_to_copy, std::size_t spitch, std::size_t width, std::size_t height) override; + /** + * @copydoc plssvm::detail::gpu_device_ptr::copy_to_host(host_pointer_type, size_type, size_type) const + */ + void copy_to_host(host_pointer_type buffer, size_type pos, size_type count) const override; + /** + * @copydoc plssvm::detail::gpu_device_ptr::copy_to_other_device(derived_gpu_device_ptr &, size_type, size_type) const + */ + void copy_to_other_device(device_ptr &target, size_type pos, size_type count) const override; +}; + +extern template class device_ptr; +extern template class device_ptr; + +} // namespace plssvm::kokkos::detail + +#endif // PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_PTR_HPP_ diff --git a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp new file mode 100644 index 000000000..70af702b1 --- /dev/null +++ b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp @@ -0,0 +1,79 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" + +namespace plssvm::kokkos::detail { + +template +device_ptr::device_ptr(const size_type size, const int queue) : + device_ptr{ plssvm::shape{ size, 1 }, plssvm::shape{ 0, 0 }, queue } { } + +template +device_ptr::device_ptr(const plssvm::shape shape, const int queue) : + device_ptr{ shape, plssvm::shape{ 0, 0 }, queue } { } + +template +device_ptr::device_ptr(const plssvm::shape shape, const plssvm::shape padding, const int queue) : + base_type{ shape, padding, queue } { + static std::size_t count = 0; + // TODO: queue type, check range? + // TODO: how to assign a view to a GPU in a multi-GPU setting? + data_ = device_view_type{ fmt::format("device_ptr_{}", count++), this->size_padded() }; +} + +template +device_ptr::~device_ptr() { + // avoid compiler warnings + try { + // TODO: + } catch (const plssvm::exception &e) { + std::cout << e.what_with_loc() << std::endl; + std::terminate(); + } +} + +template +void device_ptr::memset(const int pattern, const size_type pos, const size_type num_bytes) { +} + +template +void device_ptr::fill(const value_type value, const size_type pos, const size_type count) { +} + +template +void device_ptr::copy_to_device(const_host_pointer_type data_to_copy, const size_type pos, const size_type count) { + PLSSVM_ASSERT(data_ != view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_to_copy != nullptr, "Invalid host pointer for the data to copy!"); + +// detail::set_device(queue_); // TODO: + const size_type rcount = std::min(count, this->size_padded() - pos); + + // create view of the host data + host_view_type host_view{ data_to_copy, rcount }; + // create subview of the device data + device_subview_type data_subview{ data_, Kokkos::RangePolicy(pos, rcount) }; // TODO: view via typedef + Kokkos::deep_copy(data_subview, host_view); +} + +template +void device_ptr::copy_to_device_strided(const_host_pointer_type data_to_copy, const std::size_t spitch, const std::size_t width, const std::size_t height) { +} + +template +void device_ptr::copy_to_host(host_pointer_type buffer, const size_type pos, const size_type count) const { +} + +template +void device_ptr::copy_to_other_device(device_ptr &target, const size_type pos, const size_type count) const { +} + +template class device_ptr; +template class device_ptr; + +} // namespace plssvm::kokkos::detail From 7559525756e22a314dc933679d9e1f1e10bb0dba Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Wed, 16 Oct 2024 09:24:18 +0200 Subject: [PATCH 007/123] First (maybe) functional Kokkos device_ptr implementation. --- .../backends/Kokkos/detail/device_ptr.cpp | 54 +++++++++++++++++-- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp index 70af702b1..5758a4309 100644 --- a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp @@ -8,6 +8,20 @@ #include "plssvm/backends/Kokkos/detail/device_ptr.hpp" +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT +#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception +#include "plssvm/shape.hpp" // plssvm::shape + +#include "Kokkos_Core.hpp" + +#include "fmt/format.h" // fmt::format + +#include // std::size_t +#include // std::terminate +#include // std::cout, std::endl +#include // std::vector + namespace plssvm::kokkos::detail { template @@ -51,26 +65,60 @@ void device_ptr::copy_to_device(const_host_pointer_type data_to_copy, const s PLSSVM_ASSERT(data_ != view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); PLSSVM_ASSERT(data_to_copy != nullptr, "Invalid host pointer for the data to copy!"); -// detail::set_device(queue_); // TODO: + // detail::set_device(queue_); // TODO: const size_type rcount = std::min(count, this->size_padded() - pos); // create view of the host data - host_view_type host_view{ data_to_copy, rcount }; + const host_view_type host_view{ data_to_copy, rcount }; // create subview of the device data - device_subview_type data_subview{ data_, Kokkos::RangePolicy(pos, rcount) }; // TODO: view via typedef + auto data_subview = Kokkos::subview(data_, std::make_pair(pos, pos + rcount)); + // copy the data to the device subview Kokkos::deep_copy(data_subview, host_view); } template void device_ptr::copy_to_device_strided(const_host_pointer_type data_to_copy, const std::size_t spitch, const std::size_t width, const std::size_t height) { + PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_to_copy != nullptr, "Invalid host pointer for the data to copy!"); + + if (width > spitch) { + throw backend_exception{ fmt::format("Invalid width and spitch combination specified (width: {} <= spitch: {})!", width, spitch) }; + } + + Kokkos::View view_2d{ data_.data(), this->shape_padded().x, this->shape_padded().y }; + // TODO: implement } template void device_ptr::copy_to_host(host_pointer_type buffer, const size_type pos, const size_type count) const { + PLSSVM_ASSERT(data_ != view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(buffer != nullptr, "Invalid host pointer for the data to copy!"); + + // detail::set_device(queue_); // TODO: + const size_type rcount = std::min(count, this->size_padded() - pos); + + // create view of the host data + const host_view_type host_view{ buffer, rcount }; + // create subview of the device data + auto data_subview = Kokkos::subview(data_, std::make_pair(pos, pos + rcount)); + // copy the data to the host + Kokkos::deep_copy(host_view, data_subview); } template void device_ptr::copy_to_other_device(device_ptr &target, const size_type pos, const size_type count) const { + PLSSVM_ASSERT(data_ != view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(target.get() != view_type{}, "Invalid target pointer! Maybe target has been default constructed?"); + + const size_type rcount = std::min(count, this->size_padded() - pos); + if (target.size_padded() < rcount) { + throw backend_exception{ fmt::format("Buffer too small to perform copy (needed: {}, provided: {})!", rcount, target.size_padded()) }; + } + + // TODO: use Kokkos function? + std::vector temp(rcount); + this->copy_to_host(temp, pos, rcount); + target.copy_to_device(temp); } template class device_ptr; From 0a318dccfaa0a9d585859a202465865776786524 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 17 Oct 2024 19:02:28 +0200 Subject: [PATCH 008/123] Add Kokkos CMake preset. --- CMakePresets.json | 1 + cmake/presets/kokkos.json | 142 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 cmake/presets/kokkos.json diff --git a/CMakePresets.json b/CMakePresets.json index 8e4925dd0..bd33cac2f 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -13,6 +13,7 @@ "cmake/presets/opencl.json", "cmake/presets/acpp.json", "cmake/presets/dpcpp.json", + "cmake/presets/kokkos.json", "cmake/presets/all.json" ] } \ No newline at end of file diff --git a/cmake/presets/kokkos.json b/cmake/presets/kokkos.json new file mode 100644 index 000000000..620e940e5 --- /dev/null +++ b/cmake/presets/kokkos.json @@ -0,0 +1,142 @@ +{ + "version": 6, + "include": [ + "common.json" + ], + "configurePresets": [ + { + "name": "kokkos", + "displayName": "Kokkos backend", + "inherits": "build", + "cacheVariables": { + "PLSSVM_ENABLE_KOKKOS_BACKEND": "ON" + } + }, + { + "name": "kokkos_python", + "displayName": "Kokkos backend + Python bindings", + "inherits": "build", + "cacheVariables": { + "PLSSVM_ENABLE_KOKKOS_BACKEND": "ON", + "PLSSVM_ENABLE_LANGUAGE_BINDINGS": "ON", + "PLSSVM_ENABLE_PYTHON_BINDINGS": "ON" + } + }, + { + "name": "kokkos_test", + "displayName": "Kokkos backend tests", + "inherits": "test", + "cacheVariables": { + "PLSSVM_ENABLE_KOKKOS_BACKEND": "ON" + } + } + ], + "buildPresets": [ + { + "name": "kokkos", + "displayName": "Kokkos backend", + "configurePreset": "kokkos", + "configuration": "RelWithDebInfo", + "inherits": "common" + }, + { + "name": "kokkos_python", + "displayName": "Kokkos backend + Python bindings", + "configurePreset": "kokkos_python", + "configuration": "RelWithDebInfo", + "inherits": "common" + }, + { + "name": "kokkos_test", + "displayName": "Kokkos backend tests", + "configurePreset": "kokkos_test", + "configuration": "Debug", + "inherits": "common" + } + ], + "testPresets": [ + { + "name": "kokkos_test", + "displayName": "Kokkos backend all tests", + "configurePreset": "kokkos_test", + "inherits": "common" + }, + { + "name": "kokkos_backend_test", + "displayName": "Kokkos backend specific tests", + "configurePreset": "kokkos_test", + "inherits": "common", + "filter": { + "include": { + "name": "Kokkos.*" + } + } + } + ], + "workflowPresets": [ + { + "name": "kokkos", + "displayName": "Kokkos backend workflow", + "steps": [ + { + "name": "kokkos", + "type": "configure" + }, + { + "name": "kokkos", + "type": "build" + } + ] + }, + { + "name": "kokkos_python", + "displayName": "Kokkos backend + Python bindings workflow", + "steps": [ + { + "name": "kokkos_python", + "type": "configure" + }, + { + "name": "kokkos_python", + "type": "build" + } + ] + }, + { + "name": "kokkos_test", + "displayName": "Kokkos test workflow", + "steps": [ + { + "name": "kokkos_test", + "type": "configure" + }, + { + "name": "kokkos_test", + "type": "build" + }, + { + "name": "kokkos_test", + "type": "test" + } + ] + }, + { + "name": "kokkos_backend_test", + "displayName": "Kokkos backend test workflow", + "steps": [ + { + "name": "kokkos_test", + "type": "configure" + }, + { + "name": "kokkos_test", + "type": "build" + }, + { + "name": "kokkos_backend_test", + "type": "test" + } + ] + } + ] +} \ No newline at end of file From d254a5b3eb900beade2a895b053ccdf770a289df Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 17 Oct 2024 19:02:56 +0200 Subject: [PATCH 009/123] Further improve implementation and add more placeholder functions. --- include/plssvm/backends/Kokkos/csvm.hpp | 6 +- .../backends/Kokkos/detail/device_ptr.hpp | 31 +- .../Kokkos/kernel/cg_explicit/blas.hpp | 44 +++ .../cg_explicit/kernel_matrix_assembly.hpp | 32 ++ .../kernel_matrix_assembly_blas.hpp | 32 ++ .../Kokkos/kernel/kernel_functions.hpp | 111 ++++++ .../backends/Kokkos/kernel/predict_kernel.hpp | 42 +++ src/plssvm/backends/Kokkos/csvm.cpp | 357 +++++++++++++++++- .../backends/Kokkos/detail/device_ptr.cpp | 30 +- 9 files changed, 625 insertions(+), 60 deletions(-) create mode 100644 include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp create mode 100644 include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp create mode 100644 include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp create mode 100644 include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp create mode 100644 include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp diff --git a/include/plssvm/backends/Kokkos/csvm.hpp b/include/plssvm/backends/Kokkos/csvm.hpp index 524f1bd4b..206d85a81 100644 --- a/include/plssvm/backends/Kokkos/csvm.hpp +++ b/include/plssvm/backends/Kokkos/csvm.hpp @@ -24,6 +24,8 @@ #include "plssvm/parameter.hpp" // plssvm::parameter, plssvm::detail::parameter #include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "Kokkos_Core.hpp" // TODO: + #include // std::size_t #include // std::true_type #include // std::forward @@ -36,11 +38,11 @@ namespace kokkos { /** * @brief A C-SVM implementation using Kokkos as backend. */ -class csvm : public ::plssvm::detail::gpu_csvm { +class csvm : public ::plssvm::detail::gpu_csvm { protected: // protected for the test mock class /// The template base type of the Kokkos C-SVM class. - using base_type = ::plssvm::detail::gpu_csvm; + using base_type = ::plssvm::detail::gpu_csvm; using base_type::data_distribution_; using base_type::devices_; diff --git a/include/plssvm/backends/Kokkos/detail/device_ptr.hpp b/include/plssvm/backends/Kokkos/detail/device_ptr.hpp index a12021efb..953faf3ed 100644 --- a/include/plssvm/backends/Kokkos/detail/device_ptr.hpp +++ b/include/plssvm/backends/Kokkos/detail/device_ptr.hpp @@ -16,7 +16,7 @@ #include "plssvm/backends/gpu_device_ptr.hpp" // plssvm::detail::gpu_device_ptr #include "plssvm/shape.hpp" // plssvm::shape -#include "Kokkos_Core.hpp" // TODO: +#include "Kokkos_Core.hpp" // TODO: Kokkos::DefaultExecutionSpace #include // std::size_t @@ -36,9 +36,9 @@ using host_view_type = Kokkos::View -class device_ptr : public ::plssvm::detail::gpu_device_ptr, device_ptr> { +class device_ptr : public ::plssvm::detail::gpu_device_ptr, device_ptr> { /// The template base type of the Kokkos device_ptr class. - using base_type = ::plssvm::detail::gpu_device_ptr, device_ptr>; + using base_type = ::plssvm::detail::gpu_device_ptr, device_ptr>; using base_type::data_; using base_type::queue_; @@ -60,35 +60,30 @@ class device_ptr : public ::plssvm::detail::gpu_device_ptr +class device_kernel_assembly { + public: + private: +}; + +} // namespace plssvm::kokkos::detail + +#endif // PLSSVM_BACKENDS_KOKKOS_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_ diff --git a/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp new file mode 100644 index 000000000..2d9e855b2 --- /dev/null +++ b/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp @@ -0,0 +1,32 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Functions for implicitly assembling the kernel matrix using the Kokkos backend. + */ + +#ifndef PLSSVM_BACKENDS_SYCL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_ +#define PLSSVM_BACKENDS_SYCL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_ +#pragma once + +#include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp" // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function} +#include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} +#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type + +#include "Kokkos_Core.hpp" // TODO: Kokkos::atomic_add + +namespace plssvm::kokkos::detail { + +template +class device_kernel_assembly_symm { + public: + private: +}; + +} // namespace plssvm::kokkos::detail + +#endif // PLSSVM_BACKENDS_SYCL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_ diff --git a/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp b/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp new file mode 100644 index 000000000..f7f422659 --- /dev/null +++ b/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp @@ -0,0 +1,111 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Implement the different kernel functions on the GPU using Kokkos. + */ + +#ifndef PLSSVM_BACKENDS_KOKKOS_KERNEL_KERNEL_FUNCTIONS_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_KERNEL_KERNEL_FUNCTIONS_HPP_ + +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/detail/utility.hpp" // plssvm::detail::always_false_v +#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type + +#include "Kokkos_MathematicalFunctions.hpp" // Kokkos::pow, Kokkos::exp, Kokkos::tanh, Kokkos::abs + +#include // std::numeric_limits +#include // std::tuple, std::get + +namespace plssvm::kokkos::detail { + +//***************************************************// +// feature reductions // +//***************************************************// + +/** + * @brief Compute the default feature reduction, i.e., a simple dot-product. + * @tparam kernel_function the kernel function type + * @param[in] val1 the first feature value + * @param[in] val2 the second feature value + * @return the reduced value (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline real_type feature_reduce(const real_type val1, const real_type val2) { + return val1 * val2; +} + +/** + * @brief Compute the feature reduction for the radial basis function kernel function, i.e., the squared euclidean distance. + * @param[in] val1 the first feature value + * @param[in] val2 the second feature value + * @return the reduced value (`[[nodiscard]]`) + */ +template <> +[[nodiscard]] inline real_type feature_reduce(const real_type val1, const real_type val2) { + const real_type d = val1 - val2; + return d * d; +} + +/** + * @brief Compute the feature reduction for the laplacian kernel function, i.e., the Manhattan distance. + * @param[in] val1 the first feature value + * @param[in] val2 the second feature value + * @return the reduced value (`[[nodiscard]]`) + */ +template <> +[[nodiscard]] inline real_type feature_reduce(const real_type val1, const real_type val2) { + return ::Kokkos::fabs(val1 - val2); +} + +/** + * @brief Compute the feature reduction for the chi-squared kernel function. + * @note Be sure that the denominator isn't 0.0 which may be the case for padding values. + * @param[in] val1 the first feature value + * @param[in] val2 the second feature value + * @return the reduced value (`[[nodiscard]]`) + */ +template <> +[[nodiscard]] inline real_type feature_reduce(const real_type val1, const real_type val2) { + const real_type d = val1 - val2; + return (real_type{ 1.0 } / (val1 + val2 + std::numeric_limits::min())) * d * d; +} + +//***************************************************// +// kernel functions // +//***************************************************// + +/** + * @brief Compute the @p kernel_function using @p value and the @p params. + * @tparam kernel_function the kernel function type + * @tparam Args the types of the potential kernel function parameters + * @param[in] value the value to apply the kernel function to + * @param[in] params the potential kernel function parameters + * @return the result value (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline real_type apply_kernel_function(const real_type value, const std::tuple params) { + if constexpr (kernel_function == kernel_function_type::linear) { + return value; + } else if constexpr (kernel_function == kernel_function_type::polynomial) { + return ::Kokkos::pow(std::get<1>(params) * value + std::get<2>(params), std::get<0>(params)); + } else if constexpr (kernel_function == kernel_function_type::rbf) { + return ::Kokkos::exp(-std::get<0>(params) * value); + } else if constexpr (kernel_function == kernel_function_type::sigmoid) { + return ::Kokkos::tanh(std::get<0>(params) * value + std::get<1>(params)); + } else if constexpr (kernel_function == kernel_function_type::laplacian) { + return ::Kokkos::exp(-std::get<0>(params) * value); + } else if constexpr (kernel_function == kernel_function_type::chi_squared) { + return ::Kokkos::exp(-std::get<0>(params) * value); + } else { + static_assert(::plssvm::detail::always_false_v, "Unsupported kernel function!"); + } +} + +} // namespace plssvm::kokkos::detail + +#endif // PLSSVM_BACKENDS_KOKKOS_KERNEL_KERNEL_FUNCTIONS_HPP_ diff --git a/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp b/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp new file mode 100644 index 000000000..a203cb7e9 --- /dev/null +++ b/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp @@ -0,0 +1,42 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Defines the functions used for prediction for the C-SVM using the Kokkos backend. + */ + +#ifndef PLSSVM_BACKENDS_KOKKOS_PREDICT_KERNEL_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_PREDICT_KERNEL_HPP_ +#pragma once + +#include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp" // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function} +#include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} +#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type + +#include "Kokkos_Core.hpp" // TODO: Kokkos::atomic_add + +namespace plssvm::kokkos::detail { + +class device_kernel_w_linear { + public: + private: +}; + +class device_kernel_predict_linear { + public: + private: +}; + +template +class device_kernel_predict { + public: + private: +}; + +} // namespace plssvm::kokkos::detail + +#endif // PLSSVM_BACKENDS_KOKKOS_PREDICT_KERNEL_HPP_ diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index 3c3afb022..f459d55e5 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -8,23 +8,34 @@ #include "plssvm/backends/Kokkos/csvm.hpp" -#include "plssvm/backends/Kokkos/detail/execution_space.hpp" // plssvm::kokkos::detail::execution_space -#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::kokkos::detail::get_runtime_version -#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception -#include "plssvm/detail/logging.hpp" // plssvm::detail::log -#include "plssvm/detail/tracking/performance_tracker.hpp" // plssvm::detail::tracking::tracking_entry -#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception -#include "plssvm/parameter.hpp" // plssvm::parameter -#include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level +#include "plssvm/backends/execution_range.hpp" // plssvm::detail::dim_type +#include "plssvm/backends/execution_range.hpp" // plssvm::detail::execution_range +#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr +#include "plssvm/backends/Kokkos/detail/execution_space.hpp" // plssvm::kokkos::detail::execution_space +#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::kokkos::detail::get_runtime_version +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp" // plssvm::kokkos::detail::{device_kernel_symm, device_kernel_symm_mirror, device_kernel_inplace_matrix_add, device_kernel_inplace_matrix_scale} +#include "plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp" // plssvm::kokkos::detail::device_kernel_assembly +#include "plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp" // plssvm::kokkos::detail::device_kernel_assembly_symm +#include "plssvm/backends/Kokkos/kernel/predict_kernel.hpp" // plssvm::kokkos::detail::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict} +#include "plssvm/detail/data_distribution.hpp" // plssvm::detail::triangular_data_distribution +#include "plssvm/detail/logging.hpp" // plssvm::detail::log +#include "plssvm/detail/memory_size.hpp" // plssvm::detail::memory_size +#include "plssvm/detail/tracking/performance_tracker.hpp" // plssvm::detail::tracking::tracking_entry +#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception +#include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level #include "Kokkos_Core.hpp" // TODO: +#include "fmt/core.h" // fmt::format #include "fmt/format.h" // fmt::format +#include // std::size_t #include // std::terminate #include // std::cout, std::endl -#include // std::iota +#include // std::string #include // std::vector namespace plssvm::kokkos { @@ -89,8 +100,18 @@ void csvm::init(const target_platform target) { } // get all available devices wrt the requested target platform - devices_.resize(static_cast::size_type>(Kokkos::num_devices())); - std::iota(devices_.begin(), devices_.end(), 0); +// TODO: HOW CAN ONE USE MULTIPLE KOKKOS DEVICES +// TODO: implement for other Kokkos execution spaces +#if defined(KOKKOS_ENABLE_CUDA) + for (int device = 0; device < Kokkos::num_devices(); ++device) { + // create CUDA stream using the CUDA specific functions + cudaSetDevice(device); + cudaStream_t stream{}; + cudaStreamCreate(&stream); + // create Kokkos execution space for the specific device + devices_.emplace_back(Kokkos::Cuda(stream, true)); + } +#endif // throw exception if no CUDA devices could be found if (devices_.empty()) { @@ -103,7 +124,7 @@ void csvm::init(const target_platform target) { plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() }, plssvm::detail::tracking::tracking_entry{ "backend", "target_platform", target_ }); - std::vector device_names; + std::vector device_names{}; device_names.reserve(devices_.size()); for (typename std::vector::size_type device = 0; device < devices_.size(); ++device) { const std::string device_name = detail::get_device_name(space_, device); @@ -129,19 +150,89 @@ csvm::~csvm() { } std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const { - return {}; + // TODO: implement for other execution spaces, guard behind ifdef + std::vector<::plssvm::detail::memory_size> res(this->num_available_devices()); + switch (space_) { + case detail::execution_space::cuda: + { + cudaDeviceProp prop{}; + for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { + cudaGetDeviceProperties(&prop, devices_[device_id].cuda_device()); + res[device_id] = ::plssvm::detail::memory_size{ static_cast(prop.totalGlobalMem) }; + } + } + break; + case detail::execution_space::hip: + case detail::execution_space::sycl: + case detail::execution_space::openmp_target: + case detail::execution_space::openacc: + case detail::execution_space::openmp: + case detail::execution_space::hpx: + case detail::execution_space::threads: + case detail::execution_space::serial: + throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; + } + + return res; } std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const { - return {}; + // TODO: implement for other execution spaces, guard behind ifdef + switch (space_) { + case detail::execution_space::cuda: + return this->get_device_memory(); + case detail::execution_space::hip: + case detail::execution_space::sycl: + case detail::execution_space::openmp_target: + case detail::execution_space::openacc: + case detail::execution_space::openmp: + case detail::execution_space::hpx: + case detail::execution_space::threads: + case detail::execution_space::serial: + throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; + } } std::size_t csvm::get_max_work_group_size(const std::size_t device_id) const { - return {}; + // TODO: implement for other execution spaces, guard behind ifdef + switch (space_) { + case detail::execution_space::cuda: + { + cudaDeviceProp prop{}; + cudaGetDeviceProperties(&prop, devices_[device_id].cuda_device()); + return static_cast(prop.maxThreadsPerBlock); + } + case detail::execution_space::hip: + case detail::execution_space::sycl: + case detail::execution_space::openmp_target: + case detail::execution_space::openacc: + case detail::execution_space::openmp: + case detail::execution_space::hpx: + case detail::execution_space::threads: + case detail::execution_space::serial: + throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; + } } ::plssvm::detail::dim_type csvm::get_max_grid_size([[maybe_unused]] const std::size_t device_id) const { - return {}; + // TODO: implement for other execution spaces, guard behind ifdef + switch (space_) { + case detail::execution_space::cuda: + { + cudaDeviceProp prop{}; + cudaGetDeviceProperties(&prop, devices_[device_id].cuda_device()); + return { static_cast(prop.maxGridSize[0]), static_cast(prop.maxGridSize[1]), static_cast(prop.maxGridSize[2]) }; + } + case detail::execution_space::hip: + case detail::execution_space::sycl: + case detail::execution_space::openmp_target: + case detail::execution_space::openacc: + case detail::execution_space::openmp: + case detail::execution_space::hpx: + case detail::execution_space::threads: + case detail::execution_space::serial: + throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; + } } //***************************************************// @@ -149,19 +240,179 @@ ::plssvm::detail::dim_type csvm::get_max_grid_size([[maybe_unused]] const std::s //***************************************************// auto csvm::run_assemble_kernel_matrix_explicit(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const parameter ¶ms, const device_ptr_type &data_d, const device_ptr_type &q_red_d, real_type QA_cost) const -> device_ptr_type { - return {}; + const unsigned long long num_rows_reduced = data_d.shape().x - 1; + const unsigned long long num_features = data_d.shape().y; + const queue_type &device = devices_[device_id]; + + // calculate the number of data points this device is responsible for + const unsigned long long device_specific_num_rows = data_distribution_->place_specific_num_rows(device_id); + + // get the offset of the data points this device is responsible for + const unsigned long long row_offset = data_distribution_->place_row_offset(device_id); + + // calculate the number of matrix entries + const ::plssvm::detail::triangular_data_distribution &dist = dynamic_cast<::plssvm::detail::triangular_data_distribution &>(*data_distribution_); + const std::size_t num_entries_padded = dist.calculate_explicit_kernel_matrix_num_entries_padded(device_id); + + device_ptr_type kernel_matrix_d{ num_entries_padded, device }; // only explicitly store the upper triangular matrix + const real_type cost_factor = real_type{ 1.0 } / params.cost; + + // TODO: implement + // // convert execution range block to CUDA's native dim3 + // const dim3 native_block = detail::dim_type_to_native(exec.block); + // + // for (const auto &[partial_grid, offsets] : exec.grids) { + // // convert execution range partial_grid to CUDA's native dim3 + // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); + // + // switch (params.kernel_type) { + // case kernel_function_type::linear: + // detail::device_kernel_assembly<<>>(kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y); + // break; + // case kernel_function_type::polynomial: + // detail::device_kernel_assembly<<>>(kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, params.degree, std::get(params.gamma), params.coef0); + // break; + // case kernel_function_type::rbf: + // detail::device_kernel_assembly<<>>(kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, std::get(params.gamma)); + // break; + // case kernel_function_type::sigmoid: + // detail::device_kernel_assembly<<>>(kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, std::get(params.gamma), params.coef0); + // break; + // case kernel_function_type::laplacian: + // detail::device_kernel_assembly<<>>(kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, std::get(params.gamma)); + // break; + // case kernel_function_type::chi_squared: + // detail::device_kernel_assembly<<>>(kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, std::get(params.gamma)); + // break; + // } + // } + detail::device_synchronize_all(); + + return kernel_matrix_d; } void csvm::run_blas_level_3_kernel_explicit(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const ::plssvm::detail::execution_range &mirror_exec, const real_type alpha, const device_ptr_type &A_d, const device_ptr_type &B_d, const real_type beta, device_ptr_type &C_d) const { + const unsigned long long num_rhs = B_d.shape().x; + const unsigned long long num_rows = B_d.shape().y; + const queue_type &device = devices_[device_id]; + + // calculate the number of data points this device is responsible for + const unsigned long long device_specific_num_rows = data_distribution_->place_specific_num_rows(device_id); + // get the offset of the data points this device is responsible for + const unsigned long long row_offset = data_distribution_->place_row_offset(device_id); + + // TODO: implement + // // convert execution range block to CUDA's native dim3 + // const dim3 native_block = detail::dim_type_to_native(exec.block); + // + // detail::set_device(device); + // for (const auto &[partial_grid, offsets] : exec.grids) { + // // convert execution range partial_grid to CUDA's native dim3 + // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); + // + // detail::device_kernel_symm<<>>(num_rows, num_rhs, device_specific_num_rows, row_offset, alpha, A_d.get(), B_d.get(), beta, C_d.get(), offsets.x, offsets.y); + // } + // + // // convert execution range block to CUDA's native dim3 + // const dim3 native_mirror_block = detail::dim_type_to_native(mirror_exec.block); + // + // for (const auto &[partial_grid, offsets] : mirror_exec.grids) { + // const unsigned long long num_mirror_rows = num_rows - row_offset - device_specific_num_rows; + // + // if (num_mirror_rows > 0) { + // // convert execution range partial_grid to CUDA's native dim3 + // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); + // + // detail::device_kernel_symm_mirror<<>>(num_rows, num_rhs, num_mirror_rows, device_specific_num_rows, row_offset, alpha, A_d.get(), B_d.get(), beta, C_d.get(), offsets.x, offsets.y); + // } + // } + // detail::peek_at_last_error(); + detail::device_synchronize_all(); } void csvm::run_inplace_matrix_addition(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, device_ptr_type &lhs_d, const device_ptr_type &rhs_d) const { + const unsigned long long num_rhs = lhs_d.shape().x; + const queue_type &device = devices_[device_id]; + + // // TODO: implement + // // convert execution range block to CUDA's native dim3 + // const dim3 native_block = detail::dim_type_to_native(exec.block); + // + // detail::set_device(device); + // for (const auto &[partial_grid, offsets] : exec.grids) { + // // convert execution range partial_grid to CUDA's native dim3 + // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); + // + // detail::device_kernel_inplace_matrix_add<<>>(num_rhs, lhs_d.get(), rhs_d.get(), offsets.x, offsets.y); + // } + // detail::peek_at_last_error(); + detail::device_synchronize_all(); } void csvm::run_inplace_matrix_scale(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, device_ptr_type &lhs_d, const real_type scale) const { + const unsigned long long num_rhs = lhs_d.shape().x; + const queue_type &device = devices_[device_id]; + + // TODO: implement + // // convert execution range block to CUDA's native dim3 + // const dim3 native_block = detail::dim_type_to_native(exec.block); + // + // detail::set_device(device); + // for (const auto &[partial_grid, offsets] : exec.grids) { + // // convert execution range partial_grid to CUDA's native dim3 + // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); + // + // detail::device_kernel_inplace_matrix_scale<<>>(num_rhs, lhs_d.get(), scale, offsets.x, offsets.y); + // } + // detail::peek_at_last_error(); + detail::device_synchronize_all(); } void csvm::run_assemble_kernel_matrix_implicit_blas_level_3(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const real_type alpha, const device_ptr_type &A_d, const parameter ¶ms, const device_ptr_type &q_red, const real_type QA_cost, const device_ptr_type &B_d, device_ptr_type &C_d) const { + const unsigned long long num_rows_reduced = A_d.shape().x - 1; + const unsigned long long num_features = A_d.shape().y; + const unsigned long long num_classes = B_d.shape().x; + const queue_type &device = devices_[device_id]; + + // calculate the number of data points this device is responsible for + const unsigned long long device_specific_num_rows = data_distribution_->place_specific_num_rows(device_id); + // get the offset of the data points this device is responsible for + const unsigned long long row_offset = data_distribution_->place_row_offset(device_id); + + const real_type cost_factor = real_type{ 1.0 } / params.cost; + + // TODO: implement + // // convert general execution range's block to CUDA specific block + // const dim3 native_block = detail::dim_type_to_native(exec.block); + // + // detail::set_device(device); + // for (const auto &[partial_grid, offsets] : exec.grids) { + // // convert execution range partial_grid to CUDA's native dim3 + // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); + // + // switch (params.kernel_type) { + // case kernel_function_type::linear: + // detail::device_kernel_assembly_symm<<>>(alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y); + // break; + // case kernel_function_type::polynomial: + // detail::device_kernel_assembly_symm<<>>(alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, params.degree, std::get(params.gamma), params.coef0); + // break; + // case kernel_function_type::rbf: + // detail::device_kernel_assembly_symm<<>>(alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, std::get(params.gamma)); + // break; + // case kernel_function_type::sigmoid: + // detail::device_kernel_assembly_symm<<>>(alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, std::get(params.gamma), params.coef0); + // break; + // case kernel_function_type::laplacian: + // detail::device_kernel_assembly_symm<<>>(alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, std::get(params.gamma)); + // break; + // case kernel_function_type::chi_squared: + // detail::device_kernel_assembly_symm<<>>(alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, std::get(params.gamma)); + // break; + // } + // } + // detail::peek_at_last_error(); + detail::device_synchronize_all(); } //***************************************************// @@ -169,11 +420,77 @@ void csvm::run_assemble_kernel_matrix_implicit_blas_level_3(const std::size_t de //***************************************************// auto csvm::run_w_kernel(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const device_ptr_type &alpha_d, const device_ptr_type &sv_d) const -> device_ptr_type { - return {}; + const unsigned long long num_classes = alpha_d.shape().x; + const unsigned long long num_sv = alpha_d.shape().y; + const unsigned long long device_specific_num_sv = sv_d.shape().x; + const unsigned long long num_features = sv_d.shape().y; + const queue_type &device = devices_[device_id]; + + // get the offset of the data points this device is responsible for + const unsigned long long sv_offset = data_distribution_->place_row_offset(device_id); + + device_ptr_type w_d{ shape{ num_classes, num_features }, shape{ PADDING_SIZE, PADDING_SIZE }, device }; + + // TODO: implement + // // convert execution range block to CUDA's native dim3 + // const dim3 native_block = detail::dim_type_to_native(exec.block); + // + // detail::set_device(device); + // for (const auto &[partial_grid, offsets] : exec.grids) { + // // convert execution range partial_grid to CUDA's native dim3 + // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); + // + // detail::device_kernel_w_linear<<>>(w_d.get(), alpha_d.get(), sv_d.get(), num_classes, num_sv, device_specific_num_sv, sv_offset, offsets.x, offsets.y); + // } + // detail::peek_at_last_error(); + detail::device_synchronize_all(); + + return w_d; } auto csvm::run_predict_kernel(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const parameter ¶ms, const device_ptr_type &alpha_d, const device_ptr_type &rho_d, const device_ptr_type &sv_or_w_d, const device_ptr_type &predict_points_d) const -> device_ptr_type { - return {}; + const unsigned long long num_classes = alpha_d.shape().x; + const unsigned long long num_predict_points = predict_points_d.shape().x; // = device_specific_num_rows + const unsigned long long num_features = predict_points_d.shape().y; + const unsigned long long num_sv = sv_or_w_d.shape().x; + const queue_type &device = devices_[device_id]; + + device_ptr_type out_d{ shape{ num_predict_points, num_classes }, shape{ PADDING_SIZE, PADDING_SIZE }, device }; + + // TODO: implement + // // convert execution range block to CUDA's native dim3 + // const dim3 native_block = detail::dim_type_to_native(exec.block); + // + // detail::set_device(device); + // for (const auto &[partial_grid, offsets] : exec.grids) { + // // convert execution range partial_grid to CUDA's native dim3 + // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); + // + // switch (params.kernel_type) { + // case kernel_function_type::linear: + // detail::device_kernel_predict_linear<<>>(out_d.get(), sv_or_w_d.get(), rho_d.get(), predict_points_d.get(), num_classes, num_predict_points, num_features, offsets.x, offsets.y); + // break; + // case kernel_function_type::polynomial: + // detail::device_kernel_predict<<>>(out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, params.degree, std::get(params.gamma), params.coef0); + // break; + // case kernel_function_type::rbf: + // detail::device_kernel_predict<<>>(out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, std::get(params.gamma)); + // break; + // case kernel_function_type::sigmoid: + // detail::device_kernel_predict<<>>(out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, std::get(params.gamma), params.coef0); + // break; + // case kernel_function_type::laplacian: + // detail::device_kernel_predict<<>>(out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, std::get(params.gamma)); + // break; + // case kernel_function_type::chi_squared: + // detail::device_kernel_predict<<>>(out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, std::get(params.gamma)); + // break; + // } + // } + // detail::peek_at_last_error(); + detail::device_synchronize_all(); + + return out_d; } } // namespace plssvm::kokkos diff --git a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp index 5758a4309..22719482c 100644 --- a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp @@ -15,7 +15,7 @@ #include "Kokkos_Core.hpp" -#include "fmt/format.h" // fmt::format +#include "fmt/core.h" // fmt::format #include // std::size_t #include // std::terminate @@ -25,31 +25,23 @@ namespace plssvm::kokkos::detail { template -device_ptr::device_ptr(const size_type size, const int queue) : - device_ptr{ plssvm::shape{ size, 1 }, plssvm::shape{ 0, 0 }, queue } { } +device_ptr::device_ptr(const size_type size, const Kokkos::DefaultExecutionSpace exec) : + device_ptr{ plssvm::shape{ size, 1 }, plssvm::shape{ 0, 0 }, exec } { } template -device_ptr::device_ptr(const plssvm::shape shape, const int queue) : - device_ptr{ shape, plssvm::shape{ 0, 0 }, queue } { } +device_ptr::device_ptr(const plssvm::shape shape, const Kokkos::DefaultExecutionSpace exec) : + device_ptr{ shape, plssvm::shape{ 0, 0 }, exec } { } template -device_ptr::device_ptr(const plssvm::shape shape, const plssvm::shape padding, const int queue) : - base_type{ shape, padding, queue } { - static std::size_t count = 0; - // TODO: queue type, check range? - // TODO: how to assign a view to a GPU in a multi-GPU setting? - data_ = device_view_type{ fmt::format("device_ptr_{}", count++), this->size_padded() }; +device_ptr::device_ptr(const plssvm::shape shape, const plssvm::shape padding, const Kokkos::DefaultExecutionSpace exec) : + base_type{ shape, padding, exec } { + // TODO: GUARD behind ifdef! + data_ = device_view_type{ fmt::format("device_{}_view", exec.cuda_device()), this->size_padded() }; } template device_ptr::~device_ptr() { - // avoid compiler warnings - try { - // TODO: - } catch (const plssvm::exception &e) { - std::cout << e.what_with_loc() << std::endl; - std::terminate(); - } + // Kokkos automatically frees the memory of a Kokkos::View if the View goes out of scope } template @@ -65,7 +57,6 @@ void device_ptr::copy_to_device(const_host_pointer_type data_to_copy, const s PLSSVM_ASSERT(data_ != view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); PLSSVM_ASSERT(data_to_copy != nullptr, "Invalid host pointer for the data to copy!"); - // detail::set_device(queue_); // TODO: const size_type rcount = std::min(count, this->size_padded() - pos); // create view of the host data @@ -94,7 +85,6 @@ void device_ptr::copy_to_host(host_pointer_type buffer, const size_type pos, PLSSVM_ASSERT(data_ != view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); PLSSVM_ASSERT(buffer != nullptr, "Invalid host pointer for the data to copy!"); - // detail::set_device(queue_); // TODO: const size_type rcount = std::min(count, this->size_padded() - pos); // create view of the host data From 752afd48b8d87aa2a6aff1d6538e482eec214371 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Fri, 18 Oct 2024 15:05:52 +0200 Subject: [PATCH 010/123] Don't hardcoded test against nullptr, but the default constructed device_pointer_type. --- include/plssvm/backends/gpu_device_ptr.hpp | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/include/plssvm/backends/gpu_device_ptr.hpp b/include/plssvm/backends/gpu_device_ptr.hpp index 7d364253b..55a3e18a9 100644 --- a/include/plssvm/backends/gpu_device_ptr.hpp +++ b/include/plssvm/backends/gpu_device_ptr.hpp @@ -416,14 +416,14 @@ void gpu_device_ptr::swap( template void gpu_device_ptr::memset(const int pattern, const size_type pos) { - PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); this->memset(pattern, pos, this->size_padded() * sizeof(value_type)); } template void gpu_device_ptr::fill(const value_type value, const size_type pos) { - PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); this->fill(value, pos, this->size_padded()); } @@ -431,7 +431,7 @@ void gpu_device_ptr::fill( template template void gpu_device_ptr::copy_to_device(const matrix &data_to_copy) { - PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); if (data_to_copy.size_padded() < this->size_padded()) { throw gpu_device_ptr_exception{ fmt::format("Too few data to perform copy (needed: {}, provided: {})!", this->size_padded(), data_to_copy.size_padded()) }; @@ -441,14 +441,14 @@ void gpu_device_ptr::copy_ template void gpu_device_ptr::copy_to_device(const std::vector &data_to_copy) { - PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); this->copy_to_device(data_to_copy, 0, this->size_padded()); } template void gpu_device_ptr::copy_to_device(const std::vector &data_to_copy, const size_type pos, const size_type count) { - PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); const size_type rcount = std::min(count, this->size_padded() - pos); if (data_to_copy.size() < rcount) { @@ -459,7 +459,7 @@ void gpu_device_ptr::copy_ template void gpu_device_ptr::copy_to_device(const_host_pointer_type data_to_copy) { - PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); PLSSVM_ASSERT(data_to_copy != nullptr, "Invalid host pointer for the data to copy!"); this->copy_to_device(data_to_copy, 0, this->size_padded()); @@ -468,7 +468,7 @@ void gpu_device_ptr::copy_ template template void gpu_device_ptr::copy_to_device_strided(const matrix &data_to_copy, const std::size_t start_row, const std::size_t num_rows) { - PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); if (start_row + num_rows > data_to_copy.num_rows()) { throw gpu_device_ptr_exception{ fmt::format("Tried to copy lines {}-{} (zero-based index) to the device, but the matrix has only {} lines!", start_row, start_row + num_rows - 1, data_to_copy.num_rows()) }; @@ -494,7 +494,7 @@ void gpu_device_ptr::copy_ template void gpu_device_ptr::copy_to_device_strided(const std::vector &data_to_copy, std::size_t spitch, std::size_t width, std::size_t height) { - PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); if (width > spitch) { throw gpu_device_ptr_exception{ fmt::format("Invalid width and spitch combination specified (width: {} <= spitch: {})!", width, spitch) }; @@ -509,7 +509,7 @@ void gpu_device_ptr::copy_ template template void gpu_device_ptr::copy_to_host(matrix &buffer) const { - PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); if (buffer.size_padded() < this->size_padded()) { throw gpu_device_ptr_exception{ fmt::format("Buffer too small to perform copy (needed: {}, provided: {})!", this->size_padded(), buffer.size_padded()) }; @@ -519,14 +519,14 @@ void gpu_device_ptr::copy_ template void gpu_device_ptr::copy_to_host(std::vector &buffer) const { - PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); this->copy_to_host(buffer, 0, this->size_padded()); } template void gpu_device_ptr::copy_to_host(std::vector &buffer, const size_type pos, const size_type count) const { - PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); const size_type rcount = std::min(count, this->size_padded() - pos); if (buffer.size() < rcount) { @@ -537,7 +537,7 @@ void gpu_device_ptr::copy_ template void gpu_device_ptr::copy_to_host(host_pointer_type buffer) const { - PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); PLSSVM_ASSERT(buffer != nullptr, "Invalid host pointer for the data to copy!"); this->copy_to_host(buffer, 0, this->size_padded()); @@ -545,8 +545,8 @@ void gpu_device_ptr::copy_ template void gpu_device_ptr::copy_to_other_device(derived_gpu_device_ptr &target) const { - PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); - PLSSVM_ASSERT(target.get() != nullptr, "Invalid target pointer! Maybe target has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(target.get() != device_pointer_type{}, "Invalid target pointer! Maybe target has been default constructed?"); this->copy_to_other_device(target, 0, this->size_padded()); } From 820c431778b42cfad2c2f289288f9402d62bbce3 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 21 Oct 2024 15:09:55 +0200 Subject: [PATCH 011/123] Implement missing device_ptr functionality. --- .../backends/Kokkos/detail/device_ptr.hpp | 3 +- .../backends/Kokkos/detail/device_ptr.cpp | 75 +++++++++++++++---- tests/backends/generic_device_ptr_tests.hpp | 40 +++++----- 3 files changed, 81 insertions(+), 37 deletions(-) diff --git a/include/plssvm/backends/Kokkos/detail/device_ptr.hpp b/include/plssvm/backends/Kokkos/detail/device_ptr.hpp index 953faf3ed..8f587b667 100644 --- a/include/plssvm/backends/Kokkos/detail/device_ptr.hpp +++ b/include/plssvm/backends/Kokkos/detail/device_ptr.hpp @@ -105,8 +105,9 @@ class device_ptr : public ::plssvm::detail::gpu_device_ptr::device_ptr(const plssvm::shape shape, const plssvm::shape padding data_ = device_view_type{ fmt::format("device_{}_view", exec.cuda_device()), this->size_padded() }; } -template -device_ptr::~device_ptr() { - // Kokkos automatically frees the memory of a Kokkos::View if the View goes out of scope -} - template void device_ptr::memset(const int pattern, const size_type pos, const size_type num_bytes) { + PLSSVM_ASSERT(data_ != device_view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + + if (pos >= this->size_padded()) { + throw backend_exception{ fmt::format("Illegal access in memset!: {} >= {}", pos, this->size_padded()) }; + } + const size_type rnum_bytes = std::min(num_bytes, (this->size_padded() - pos) * sizeof(value_type)); + + // create subview of the device data + auto data_subview = Kokkos::subview(data_, std::make_pair(pos, pos + (rnum_bytes / sizeof(value_type)))); + // fill subview with constant data + Kokkos::parallel_for("device_ptr_memset", num_bytes, KOKKOS_LAMBDA(const std::size_t idx) { + // Cast the view's data pointer to unsigned char* (byte access) + reinterpret_cast(data_subview.data())[idx] = pattern; }); + + detail::device_synchronize(queue_); } template void device_ptr::fill(const value_type value, const size_type pos, const size_type count) { + PLSSVM_ASSERT(data_ != device_view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + + if (pos >= this->size_padded()) { + throw backend_exception{ fmt::format("Illegal access in fill!: {} >= {}", pos, this->size_padded()) }; + } + const size_type rcount = std::min(count, this->size_padded() - pos); + + // create subview of the device data + auto data_subview = Kokkos::subview(data_, std::make_pair(pos, pos + rcount)); + // fill subview with constant data + Kokkos::deep_copy(data_subview, value); + + detail::device_synchronize(queue_); } template void device_ptr::copy_to_device(const_host_pointer_type data_to_copy, const size_type pos, const size_type count) { - PLSSVM_ASSERT(data_ != view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); PLSSVM_ASSERT(data_to_copy != nullptr, "Invalid host pointer for the data to copy!"); const size_type rcount = std::min(count, this->size_padded() - pos); @@ -65,24 +89,39 @@ void device_ptr::copy_to_device(const_host_pointer_type data_to_copy, const s auto data_subview = Kokkos::subview(data_, std::make_pair(pos, pos + rcount)); // copy the data to the device subview Kokkos::deep_copy(data_subview, host_view); + + detail::device_synchronize(queue_); } template void device_ptr::copy_to_device_strided(const_host_pointer_type data_to_copy, const std::size_t spitch, const std::size_t width, const std::size_t height) { - PLSSVM_ASSERT(data_ != nullptr, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); PLSSVM_ASSERT(data_to_copy != nullptr, "Invalid host pointer for the data to copy!"); if (width > spitch) { throw backend_exception{ fmt::format("Invalid width and spitch combination specified (width: {} <= spitch: {})!", width, spitch) }; } - Kokkos::View view_2d{ data_.data(), this->shape_padded().x, this->shape_padded().y }; - // TODO: implement + // TODO: strided copy to device in Kokkos currently not possible + if (spitch == width) { + // can use normal copy since we have no line strides + this->copy_to_device(data_to_copy, 0, width * height); + } else { + std::vector temp(this->shape_padded().x * height, value_type{ 0.0 }); + value_type *pos = temp.data(); + for (std::size_t row = 0; row < height; ++row) { + std::memcpy(pos, data_to_copy + row * spitch, width * sizeof(value_type)); + pos += this->shape_padded().x; + } + this->copy_to_device(temp); + } + + detail::device_synchronize(queue_); } template void device_ptr::copy_to_host(host_pointer_type buffer, const size_type pos, const size_type count) const { - PLSSVM_ASSERT(data_ != view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); PLSSVM_ASSERT(buffer != nullptr, "Invalid host pointer for the data to copy!"); const size_type rcount = std::min(count, this->size_padded() - pos); @@ -93,12 +132,14 @@ void device_ptr::copy_to_host(host_pointer_type buffer, const size_type pos, auto data_subview = Kokkos::subview(data_, std::make_pair(pos, pos + rcount)); // copy the data to the host Kokkos::deep_copy(host_view, data_subview); + + detail::device_synchronize(queue_); } template void device_ptr::copy_to_other_device(device_ptr &target, const size_type pos, const size_type count) const { - PLSSVM_ASSERT(data_ != view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); - PLSSVM_ASSERT(target.get() != view_type{}, "Invalid target pointer! Maybe target has been default constructed?"); + PLSSVM_ASSERT(data_ != device_view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(target.get() != device_view_type{}, "Invalid target pointer! Maybe target has been default constructed?"); const size_type rcount = std::min(count, this->size_padded() - pos); if (target.size_padded() < rcount) { @@ -109,6 +150,8 @@ void device_ptr::copy_to_other_device(device_ptr &target, const size_type pos std::vector temp(rcount); this->copy_to_host(temp, pos, rcount); target.copy_to_device(temp); + + detail::device_synchronize(queue_); } template class device_ptr; diff --git a/tests/backends/generic_device_ptr_tests.hpp b/tests/backends/generic_device_ptr_tests.hpp index 6a8713dc7..3f2407005 100644 --- a/tests/backends/generic_device_ptr_tests.hpp +++ b/tests/backends/generic_device_ptr_tests.hpp @@ -46,7 +46,7 @@ TYPED_TEST_P(DevicePtr, default_construct) { // empty data EXPECT_FALSE(static_cast(ptr)); - EXPECT_EQ(ptr.get(), nullptr); + EXPECT_EQ(ptr.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(ptr.size(), 0); EXPECT_EQ(ptr.shape(), (plssvm::shape{ 0, 0 })); EXPECT_TRUE(ptr.empty()); @@ -63,7 +63,7 @@ TYPED_TEST_P(DevicePtr, construct_size) { // check data EXPECT_TRUE(static_cast(ptr)); - EXPECT_NE(ptr.get(), nullptr); + EXPECT_NE(ptr.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(ptr.shape(), (plssvm::shape{ 42, 1 })); // check padding EXPECT_EQ(ptr.padding(), (plssvm::shape{ 0, 0 })); @@ -81,7 +81,7 @@ TYPED_TEST_P(DevicePtr, construct_shape) { // check data EXPECT_TRUE(static_cast(ptr)); - EXPECT_NE(ptr.get(), nullptr); + EXPECT_NE(ptr.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(ptr.shape(), (plssvm::shape{ 42, 16 })); // check padding EXPECT_EQ(ptr.padding(), (plssvm::shape{ 0, 0 })); @@ -99,7 +99,7 @@ TYPED_TEST_P(DevicePtr, construct_shape_and_padding) { // check data EXPECT_TRUE(static_cast(ptr)); - EXPECT_NE(ptr.get(), nullptr); + EXPECT_NE(ptr.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(ptr.shape(), (plssvm::shape{ 42, 16 })); // check padding EXPECT_EQ(ptr.padding(), (plssvm::shape{ 4, 4 })); @@ -119,7 +119,7 @@ TYPED_TEST_P(DevicePtr, move_construct) { // check data EXPECT_TRUE(static_cast(second)); // EXPECT_EQ(second.queue(), queue); - EXPECT_NE(second.get(), nullptr); + EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 1 })); // check padding EXPECT_EQ(second.padding(), (plssvm::shape{ 0, 0 })); @@ -127,7 +127,7 @@ TYPED_TEST_P(DevicePtr, move_construct) { // check moved-from data EXPECT_FALSE(static_cast(first)); - EXPECT_EQ(first.get(), nullptr); + EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 })); // check padding EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 })); @@ -147,7 +147,7 @@ TYPED_TEST_P(DevicePtr, move_construct_with_padding) { // check data EXPECT_TRUE(static_cast(second)); // EXPECT_EQ(second.queue(), queue); - EXPECT_NE(second.get(), nullptr); + EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 10 })); // check padding EXPECT_EQ(second.padding(), (plssvm::shape{ 4, 5 })); @@ -155,7 +155,7 @@ TYPED_TEST_P(DevicePtr, move_construct_with_padding) { // check moved-from data EXPECT_FALSE(static_cast(first)); - EXPECT_EQ(first.get(), nullptr); + EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 })); // check padding EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 })); @@ -177,7 +177,7 @@ TYPED_TEST_P(DevicePtr, move_assign) { // check data EXPECT_TRUE(static_cast(second)); - EXPECT_NE(second.get(), nullptr); + EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 1 })); // check padding EXPECT_EQ(second.padding(), (plssvm::shape{ 0, 0 })); @@ -185,7 +185,7 @@ TYPED_TEST_P(DevicePtr, move_assign) { // check moved-from data EXPECT_FALSE(static_cast(first)); - EXPECT_EQ(first.get(), nullptr); + EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 })); // check padding EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 })); @@ -207,7 +207,7 @@ TYPED_TEST_P(DevicePtr, move_assign_with_padding) { // check data EXPECT_TRUE(static_cast(second)); - EXPECT_NE(second.get(), nullptr); + EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 10 })); // check padding EXPECT_EQ(second.padding(), (plssvm::shape{ 4, 5 })); @@ -215,7 +215,7 @@ TYPED_TEST_P(DevicePtr, move_assign_with_padding) { // check moved-from data EXPECT_FALSE(static_cast(first)); - EXPECT_EQ(first.get(), nullptr); + EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 })); // check padding EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 })); @@ -237,14 +237,14 @@ TYPED_TEST_P(DevicePtr, swap_member_function) { // check data EXPECT_TRUE(static_cast(second)); - EXPECT_NE(second.get(), nullptr); + EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 1 })); // check padding EXPECT_EQ(second.padding(), (plssvm::shape{ 0, 0 })); EXPECT_EQ(second.shape_padded(), (plssvm::shape{ 42, 1 })); EXPECT_FALSE(static_cast(first)); - EXPECT_EQ(first.get(), nullptr); + EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 })); // check padding EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 })); @@ -266,14 +266,14 @@ TYPED_TEST_P(DevicePtr, swap_member_function_with_padding) { // check data EXPECT_TRUE(static_cast(second)); - EXPECT_NE(second.get(), nullptr); + EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 10 })); // check padding EXPECT_EQ(second.padding(), (plssvm::shape{ 4, 5 })); EXPECT_EQ(second.shape_padded(), (plssvm::shape{ 46, 15 })); EXPECT_FALSE(static_cast(first)); - EXPECT_EQ(first.get(), nullptr); + EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 })); // check padding EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 })); @@ -296,14 +296,14 @@ TYPED_TEST_P(DevicePtr, swap_free_function) { // check data EXPECT_TRUE(static_cast(second)); - EXPECT_NE(second.get(), nullptr); + EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 1 })); // check padding EXPECT_EQ(second.padding(), (plssvm::shape{ 0, 0 })); EXPECT_EQ(second.shape_padded(), (plssvm::shape{ 42, 1 })); EXPECT_FALSE(static_cast(first)); - EXPECT_EQ(first.get(), nullptr); + EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 })); // check padding EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 })); @@ -326,14 +326,14 @@ TYPED_TEST_P(DevicePtr, swap_free_function_with_padding) { // check data EXPECT_TRUE(static_cast(second)); - EXPECT_NE(second.get(), nullptr); + EXPECT_NE(second.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(second.shape(), (plssvm::shape{ 42, 10 })); // check padding EXPECT_EQ(second.padding(), (plssvm::shape{ 4, 5 })); EXPECT_EQ(second.shape_padded(), (plssvm::shape{ 46, 15 })); EXPECT_FALSE(static_cast(first)); - EXPECT_EQ(first.get(), nullptr); + EXPECT_EQ(first.get(), typename device_ptr_type::device_pointer_type{}); EXPECT_EQ(first.shape(), (plssvm::shape{ 0, 0 })); // check padding EXPECT_EQ(first.padding(), (plssvm::shape{ 0, 0 })); From 2fc2616318b39d56ada99f533450ce78a9076983 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 21 Oct 2024 15:18:48 +0200 Subject: [PATCH 012/123] Add total_size function to dim_type struct. --- include/plssvm/backends/execution_range.hpp | 9 +++++++++ tests/backends/execution_range.cpp | 14 ++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/plssvm/backends/execution_range.hpp b/include/plssvm/backends/execution_range.hpp index 52c78f8e1..c44aab7b6 100644 --- a/include/plssvm/backends/execution_range.hpp +++ b/include/plssvm/backends/execution_range.hpp @@ -77,6 +77,15 @@ struct [[nodiscard]] dim_type { swap_ull(z, other.z); } + /** + * @brief Return the total number of elements in the dimensional type. + * @details Equal to: `x * y * z`. + * @return the total number of elements (`[[nodiscard]]`) + */ + [[nodiscard]] constexpr unsigned long long total_size() const noexcept { + return x * y * z; + } + /// The dimensional size in x direction. unsigned long long x{ 1 }; /// The dimensional size in y direction. diff --git a/tests/backends/execution_range.cpp b/tests/backends/execution_range.cpp index 75fe16ef2..866dae83a 100644 --- a/tests/backends/execution_range.cpp +++ b/tests/backends/execution_range.cpp @@ -94,6 +94,20 @@ TEST(DimType, swap_free_function) { EXPECT_EQ(dim2.z, 1ull); } +TEST(DimType, total_size) { + // create dim types + constexpr plssvm::detail::dim_type dim1{}; + constexpr plssvm::detail::dim_type dim2{ 64ull }; + constexpr plssvm::detail::dim_type dim3{ 64ull, 32ull }; + constexpr plssvm::detail::dim_type dim4{ 64ull, 32ull, 16ull }; + + // test total_size function + EXPECT_EQ(dim1.total_size(), 1ull); + EXPECT_EQ(dim2.total_size(), 64ull); + EXPECT_EQ(dim3.total_size(), 2048ull); + EXPECT_EQ(dim4.total_size(), 32768ull); +} + TEST(DimType, equality) { // create dim types constexpr plssvm::detail::dim_type dim1{}; From 83dd160ad005039406658b08a67d6d191e231cfc Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 21 Oct 2024 15:20:33 +0200 Subject: [PATCH 013/123] Add first working (but WIP) Kokkos backend implementation. --- .../Kokkos/detail/standard_layout_tuple.hpp | 129 ++++++ .../plssvm/backends/Kokkos/detail/utility.hpp | 3 +- .../Kokkos/kernel/cg_explicit/blas.hpp | 359 +++++++++++++++- .../cg_explicit/kernel_matrix_assembly.hpp | 145 ++++++- .../kernel_matrix_assembly_blas.hpp | 253 +++++++++++- .../Kokkos/kernel/kernel_functions.hpp | 36 +- .../backends/Kokkos/kernel/predict_kernel.hpp | 371 +++++++++++++++++ src/main_predict.cpp | 4 +- src/main_train.cpp | 5 +- src/plssvm/backends/Kokkos/csvm.cpp | 384 ++++++++++-------- src/plssvm/backends/Kokkos/detail/utility.cpp | 4 +- 11 files changed, 1496 insertions(+), 197 deletions(-) create mode 100644 include/plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp diff --git a/include/plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp b/include/plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp new file mode 100644 index 000000000..3f5fddddd --- /dev/null +++ b/include/plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp @@ -0,0 +1,129 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Implementation of a basic and minimalistic tuple class which is standard-layout conform. + */ + +#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_STANDARD_LAYOUT_TUPLE_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_DETAIL_STANDARD_LAYOUT_TUPLE_HPP_ +#pragma once + +#include "plssvm/constants.hpp" // plssvm::real_type + +#include // std::size_t +#include // std::is_standard_layout +#include // std::forward + +namespace plssvm::kokkos::detail { + +/* + * Empty base implementation. + */ +template +struct standard_layout_tuple; + +/** + * @brief Save the value of type @p T as scalar and the remaining values of type @p Rest recursively in another standard layout tuple. + * @tparam T the type of the value to save in this tuple + * @tparam Rest the remaining types saved in a recursive tuple + */ +template +struct standard_layout_tuple { + /// The stored value. + T value; + /// The remaining values stored in their own tuple. + standard_layout_tuple remaining; +}; + +/** + * @brief Special case for an empty tuple (recursion termination criterion). + */ +template <> +struct standard_layout_tuple<> { }; + +namespace impl { + +/** + * @brief Recursively traverse (at compile time) the tuple @p t and retrieve the value at position @p I. + * @tparam I the index of the tuple value to get + */ +template +struct get_impl { + /** + * @brief Recursively traverse (at compile time) the tuple @p t and retrieve the value at position @p I. + * @tparam Types the types in the tuple + * @param[in] t the tuple to traverse + * @return the requested value (`[[nodiscard]]`) + */ + template + KOKKOS_INLINE_FUNCTION constexpr static auto get(const standard_layout_tuple &t) { + return get_impl::get(t.remaining); + } +}; + +/** + * @brief Special case to retrieve the currently held value (recursion termination criterion). + */ +template <> +struct get_impl<0> { + /** + * @brief Get the held value from @p t. + * @tparam Types the types in the tuple + * @param[in] t the tuple to get the value from + * @return the requested value (`[[nodiscard]]`) + */ + template + KOKKOS_INLINE_FUNCTION constexpr static auto get(const standard_layout_tuple &t) { + return t.value; + } +}; + +} // namespace impl + +/** + * @brief Get the value at position @p I of the tuple @p t holding the @p Types. + * @tparam I the position of the element in the tuple to get + * @tparam Types the types stored in the tuple + * @param[in] t the tuple + * @return the value of the tuple @p t at position @p I (`[[nodiscard]]`) + */ +template +KOKKOS_INLINE_FUNCTION constexpr auto get(const standard_layout_tuple &t) { + static_assert(I < sizeof...(Types), "Invalid standard_layout_tuple index!"); + return impl::get_impl::get(t); +} + +/** + * @brief Special case: return an empty tuple if no values have bee provided. + * @return an empty tuple (`[[nodiscard]]`) + */ +[[nodiscard]] inline constexpr standard_layout_tuple<> make_standard_layout_tuple() { + return standard_layout_tuple<>{}; +} + +/** + * @brief Create a new tuple storing the values @p arg and @p remaining. + * @tparam T the type of the first value + * @tparam Rest the types of the remaining values (if any) + * @param[in,out] arg the first value + * @param[in,out] remaining the remaining values (if any) + * @return the constructed tuple (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline constexpr standard_layout_tuple make_standard_layout_tuple(T &&arg, Rest &&...remaining) { + return standard_layout_tuple{ std::forward(arg), make_standard_layout_tuple(std::forward(remaining)...) }; +} + +// sanity checks: be sure that the important use cases are indeed standard layout types! +static_assert(std::is_standard_layout_v>, "standard_layout_tuple<> has no standard layout!"); +static_assert(std::is_standard_layout_v>, "standard_layout_tuple has no standard layout!"); +static_assert(std::is_standard_layout_v>, "standard_layout_tuple has no standard layout!"); + +} // namespace plssvm::kokkos::detail + +#endif // PLSSVM_BACKENDS_KOKKOS_DETAIL_STANDARD_LAYOUT_TUPLE_HPP_ diff --git a/include/plssvm/backends/Kokkos/detail/utility.hpp b/include/plssvm/backends/Kokkos/detail/utility.hpp index 3b7a9c706..523900aa9 100644 --- a/include/plssvm/backends/Kokkos/detail/utility.hpp +++ b/include/plssvm/backends/Kokkos/detail/utility.hpp @@ -80,7 +80,8 @@ void check_execution_space_target_platform_combination(execution_space space, ta [[nodiscard]] std::string get_device_name(execution_space space, std::size_t device_id); -void device_synchronize_all(); +void device_synchronize(const Kokkos::DefaultExecutionSpace& exec); + [[nodiscard]] std::string get_kokkos_version(); diff --git a/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp index 79f96283e..c12220b0b 100644 --- a/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp +++ b/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp @@ -13,30 +13,387 @@ #define PLSSVM_BACKENDS_KOKKOS_CG_EXPLICIT_BLAS_HPP_ #pragma once -#include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} +#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // TODO: view type aliases +#include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} #include "Kokkos_Core.hpp" // TODO: namespace plssvm::kokkos::detail { +/** + * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a `m x k` symmetric matrix (memory optimized), @p B is a `k x n` matrix, @p C is a `m x n` matrix, and @p alpha and @p beta are scalars. + */ class device_kernel_symm { public: + /** + * @brief Initialize the Kokkos kernel function object. + * @param[in] num_rows the number of rows in @p A and @p C + * @param[in] num_rhs the number of columns in @p B and @p C + * @param[in] device_specific_num_rows the number of rows in @p A and number of rows in @p B; thr rows in @p A are potentially distributed across multiple devices + * @param[in] row_offset the first row this device is responsible for + * @param[in] alpha the scalar alpha value + * @param[in] A the matrix @p A + * @param[in] B the matrix @p B + * @param[in] beta the scalar beta value + * @param[in,out] C the matrix @p C, also used as result matrix + * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + */ + device_kernel_symm(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type alpha, device_view_type A, device_view_type B, const real_type beta, device_view_type C, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) : + num_rows_{ num_rows }, + num_rhs_{ num_rhs }, + device_specific_num_rows_{ device_specific_num_rows }, + row_offset_{ row_offset }, + alpha_{ alpha }, + A_{ A }, + B_{ B }, + beta_{ beta }, + C_{ C }, + grid_x_offset_{ grid_x_offset }, + grid_y_offset_{ grid_y_offset }, + grid_size_x_{ grid_size_x } { } + + KOKKOS_INLINE_FUNCTION + void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows + const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); + const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); + const auto FEATURE_BLOCK_SIZE_sz = static_cast(FEATURE_BLOCK_SIZE); + const auto PADDING_SIZE_sz = static_cast(PADDING_SIZE); + const auto threadIdx_x = static_cast(team.team_rank()) / THREAD_BLOCK_SIZE_sz; // current thread in block x-dimension + const auto threadIdx_y = static_cast(team.team_rank()) % THREAD_BLOCK_SIZE_sz; // current thread in block y-dimension + const auto blockDim_x = THREAD_BLOCK_SIZE_sz; // number of threads in block x-dimension + const auto blockDim_y = THREAD_BLOCK_SIZE_sz; // number of threads in block y-dimension + const auto blockIdx_x = static_cast(team.league_rank()) % grid_size_x_ + grid_x_offset_; // current block in grid x-dimension + offsets if the grid size would be too large + const auto blockIdx_y = static_cast(team.league_rank()) / grid_size_x_ + grid_y_offset_; // current block in grid y-dimension + offsets if the grid size would be too large + + // calculate the indices used in the current thread + const auto i = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz; // # rhs -> num_rhs + const auto i_linear = blockIdx_x * blockDim_x * INTERNAL_BLOCK_SIZE_sz + threadIdx_x; + const auto j = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz; // # rows -> num_mirror_rows + const auto j_linear = blockIdx_y * blockDim_y * INTERNAL_BLOCK_SIZE_sz + threadIdx_x; + + // create the shared memory arrays used for caching data point features + constexpr std::size_t shmem_size = FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE; + real_type *data_cache_ptr = static_cast(team.team_shmem().get_shmem(2 * shmem_size * sizeof(real_type))); + Kokkos::mdspan> A_cache{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + Kokkos::mdspan> B_cache{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + + // create a thread private array used for internal caching + real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{}; + + // iterate over all features using blocking to be able to cache them for faster memory accesses + for (std::size_t dim = 0; dim < (num_rows_ - row_offset_); dim += FEATURE_BLOCK_SIZE_sz) { + // load data into shared memory + for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) { + const auto global_i = i_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; + const auto global_j = j_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; + + // determine on which side of the diagonal we are located + if (dim + threadIdx_y < global_j) { + A_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = A_[(dim + threadIdx_y) * (num_rows_ - row_offset_ + PADDING_SIZE_sz) + global_j - (dim + threadIdx_y) * (dim + threadIdx_y + std::size_t{ 1 }) / std::size_t{ 2 }]; + } else { + A_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = A_[global_j * (num_rows_ - row_offset_ + PADDING_SIZE_sz) + dim + threadIdx_y - global_j * (global_j + std::size_t{ 1 }) / std::size_t{ 2 }]; + } + // determine on which side of the diagonal we are located + if (dim + threadIdx_y + THREAD_BLOCK_SIZE < global_j) { + A_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = A_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ - row_offset_ + PADDING_SIZE_sz) + global_j - (dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (dim + threadIdx_y + THREAD_BLOCK_SIZE_sz + std::size_t{ 1 }) / std::size_t{ 2 }]; + } else { + A_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = A_[global_j * (num_rows_ - row_offset_ + PADDING_SIZE_sz) + dim + threadIdx_y + THREAD_BLOCK_SIZE_sz - global_j * (global_j + std::size_t{ 1 }) / std::size_t{ 2 }]; + } + + B_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = B_[(dim + row_offset_ + threadIdx_y) * (num_rhs_ + PADDING_SIZE_sz) + global_i]; + B_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = B_[(dim + row_offset_ + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rhs_ + PADDING_SIZE_sz) + global_i]; + } + team.team_barrier(); // wait until all threads loaded their part of the data + + // perform the dot product calculation + for (unsigned block_dim = 0; block_dim < FEATURE_BLOCK_SIZE; ++block_dim) { + for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) { + for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) { + temp[internal_i][internal_j] += A_cache(block_dim, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_j) * B_cache(block_dim, threadIdx_x * INTERNAL_BLOCK_SIZE + internal_i); + } + } + } + team.team_barrier(); // wait until all threads performed their part of the calculations + } + + // apply the (partial) BLAS operation and update C + for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) { + for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) { + const auto global_i = i + static_cast(internal_i); + const auto device_global_j = j + static_cast(internal_j); + const auto global_j = row_offset_ + j + static_cast(internal_j); + + // be sure to not perform out of bounds accesses + if (global_i < num_rhs_ && device_global_j < device_specific_num_rows_) { + C_[global_j * (num_rhs_ + PADDING_SIZE_sz) + global_i] = alpha_ * temp[internal_i][internal_j] + beta_ * C_[global_j * (num_rhs_ + PADDING_SIZE_sz) + global_i]; + } + } + } + } + private: + /// @cond Doxygen_suppress + const std::size_t num_rows_; + const std::size_t num_rhs_; + const std::size_t device_specific_num_rows_; + const std::size_t row_offset_; + const real_type alpha_; + device_view_type A_; + device_view_type B_; + const real_type beta_; + device_view_type C_; + const std::size_t grid_x_offset_; + const std::size_t grid_y_offset_; + const std::size_t grid_size_x_; + /// @endcond }; +/** + * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a `m x k` symmetric matrix (memory optimized), @p B is a `k x n` matrix, @p C is a `m x n` matrix, and @p alpha and @p beta are scalars. + * @details In a multi-GPU setting, this function is responsible for mirroring down the columns this device is responsible for! + */ class device_kernel_symm_mirror { public: + /** + * @brief Initialize the Kokkos kernel function object. + * @param[in] num_rows the number of rows in @p A and @p C + * @param[in] num_rhs the number of columns in @p B and @p C + * @param[in] num_mirror_rows the number of rows to mirror down + * @param[in] device_specific_num_rows the number of rows in @p A and number of rows in @p B; thr rows in @p A are potentially distributed across multiple devices + * @param[in] row_offset the first row this device is responsible for + * @param[in] alpha the scalar alpha value + * @param[in] A the matrix @p A + * @param[in] B the matrix @p B + * @param[in] beta the scalar beta value + * @param[in,out] C the matrix @p C, also used as result matrix + * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + */ + device_kernel_symm_mirror(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t num_mirror_rows, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type alpha, device_view_type A, device_view_type B, const real_type beta, device_view_type C, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) : + num_rows_{ num_rows }, + num_rhs_{ num_rhs }, + num_mirror_rows_{ num_mirror_rows }, + device_specific_num_rows_{ device_specific_num_rows }, + row_offset_{ row_offset }, + alpha_{ alpha }, + A_{ A }, + B_{ B }, + beta_{ beta }, + C_{ C }, + grid_x_offset_{ grid_x_offset }, + grid_y_offset_{ grid_y_offset }, + grid_size_x_{ grid_size_x } { } + + KOKKOS_INLINE_FUNCTION + void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows + const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); + const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); + const auto FEATURE_BLOCK_SIZE_sz = static_cast(FEATURE_BLOCK_SIZE); + const auto PADDING_SIZE_sz = static_cast(PADDING_SIZE); + const auto threadIdx_x = static_cast(team.team_rank()) / THREAD_BLOCK_SIZE_sz; // current thread in block x-dimension + const auto threadIdx_y = static_cast(team.team_rank()) % THREAD_BLOCK_SIZE_sz; // current thread in block y-dimension + const auto blockDim_x = THREAD_BLOCK_SIZE_sz; // number of threads in block x-dimension + const auto blockDim_y = THREAD_BLOCK_SIZE_sz; // number of threads in block y-dimension + const auto blockIdx_x = static_cast(team.league_rank()) % grid_size_x_ + grid_x_offset_; // current block in grid x-dimension + offsets if the grid size would be too large + const auto blockIdx_y = static_cast(team.league_rank()) / grid_size_x_ + grid_y_offset_; // current block in grid y-dimension + offsets if the grid size would be too large + + // calculate the indices used in the current thread + const auto i = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz; // # rhs -> num_rhs + const auto i_linear = blockIdx_x * blockDim_x * INTERNAL_BLOCK_SIZE_sz + threadIdx_x; + const auto j = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz; // # rows -> num_mirror_rows + const auto j_linear = blockIdx_y * blockDim_y * INTERNAL_BLOCK_SIZE_sz + threadIdx_x; + + // create the shared memory arrays used for caching data point features + constexpr std::size_t shmem_size = FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE; + real_type *data_cache_ptr = static_cast(team.team_shmem().get_shmem(2 * shmem_size * sizeof(real_type))); + Kokkos::mdspan> A_cache{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + Kokkos::mdspan> B_cache{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + + // create a thread private array used for internal caching + real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{}; + + // iterate over the remaining features using blocking to be able to cache them for faster memory accesses + for (std::size_t dim = 0; dim < device_specific_num_rows_; dim += FEATURE_BLOCK_SIZE_sz) { + // load data into shared memory + for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) { + const auto global_i = i_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; + const auto global_j = j_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; + + // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory + A_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = A_[(dim + threadIdx_y) * (num_rows_ - row_offset_ + PADDING_SIZE_sz) - (dim + threadIdx_y - std::size_t{ 1 }) * (dim + threadIdx_y) / std::size_t{ 2 } + device_specific_num_rows_ - (dim + threadIdx_y) + global_j]; + A_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = A_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ - row_offset_ + PADDING_SIZE_sz) - (dim + threadIdx_y + THREAD_BLOCK_SIZE_sz - std::size_t{ 1 }) * (dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) / std::size_t{ 2 } + device_specific_num_rows_ - (dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) + global_j]; + B_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = B_[(row_offset_ + dim + threadIdx_y) * (num_rhs_ + PADDING_SIZE_sz) + global_i]; + B_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = B_[(row_offset_ + dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rhs_ + PADDING_SIZE_sz) + global_i]; + } + team.team_barrier(); // wait until all threads loaded their part of the data + + // perform the feature reduction calculation + for (unsigned block_dim = 0; block_dim < FEATURE_BLOCK_SIZE; ++block_dim) { + for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) { + for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) { + temp[internal_i][internal_j] += A_cache(block_dim, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_j) * B_cache(block_dim, threadIdx_x * INTERNAL_BLOCK_SIZE + internal_i); + } + } + } + team.team_barrier(); // wait until all threads performed their part of the calculations + } + + // apply the (remaining) BLAS operation and update C + for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) { + for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) { + const auto global_i = i + static_cast(internal_i); + const auto partial_global_j = j + static_cast(internal_j); + const auto global_j = row_offset_ + device_specific_num_rows_ + j + static_cast(internal_j); + + // be sure to not perform out of bounds accesses + if (global_i < num_rhs_ && partial_global_j < num_mirror_rows_) { + C_[global_j * (num_rhs_ + PADDING_SIZE_sz) + global_i] = alpha_ * temp[internal_i][internal_j] + beta_ * C_[global_j * (num_rhs_ + PADDING_SIZE_sz) + global_i]; + } + } + } + } + private: + /// @cond Doxygen_suppress + const std::size_t num_rows_; + const std::size_t num_rhs_; + const std::size_t num_mirror_rows_; + const std::size_t device_specific_num_rows_; + const std::size_t row_offset_; + const real_type alpha_; + device_view_type A_; + device_view_type B_; + const real_type beta_; + device_view_type C_; + const std::size_t grid_x_offset_; + const std::size_t grid_y_offset_; + const std::size_t grid_size_x_; + /// @endcond }; +/** + * @brief Perform a simple inplace matrix addition: lhs += rhs. + */ class device_kernel_inplace_matrix_add { public: + /** + * @brief Initialize the Kokkos kernel function object. + * @param[in] num_cols the number of columns in both matrices + * @param[in,out] lhs the first matrix (updated inplace) + * @param[in] rhs the second matrix + * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + */ + device_kernel_inplace_matrix_add(const std::size_t num_cols, device_view_type lhs, device_view_type rhs, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) : + num_cols_{ num_cols }, + lhs_{ lhs }, + rhs_{ rhs }, + grid_x_offset_{ grid_x_offset }, + grid_y_offset_{ grid_y_offset }, + grid_size_x_{ grid_size_x } { } + + KOKKOS_INLINE_FUNCTION + void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows + const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); + const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); + const auto PADDING_SIZE_sz = static_cast(PADDING_SIZE); + const auto threadIdx_x = static_cast(team.team_rank()) / THREAD_BLOCK_SIZE_sz; // current thread in block x-dimension + const auto threadIdx_y = static_cast(team.team_rank()) % THREAD_BLOCK_SIZE_sz; // current thread in block y-dimension + const auto blockDim_x = THREAD_BLOCK_SIZE_sz; // number of threads in block x-dimension + const auto blockDim_y = THREAD_BLOCK_SIZE_sz; // number of threads in block y-dimension + const auto blockIdx_x = static_cast(team.league_rank()) % grid_size_x_ + grid_x_offset_; // current block in grid x-dimension + offsets if the grid size would be too large + const auto blockIdx_y = static_cast(team.league_rank()) / grid_size_x_ + grid_y_offset_; // current block in grid y-dimension + offsets if the grid size would be too large + + // Calculate the indices used in the current thread + const auto i = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz; // num_rows + const auto j = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz; // num_rhs + + for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) { + for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) { + const auto global_i = i + static_cast(internal_i); + const auto global_j = j + static_cast(internal_j); + + // if (global_i < lhs_.extent(0) && global_j < rhs_.extent(0)) { // TODO: + lhs_[global_i * (num_cols_ + PADDING_SIZE_sz) + global_j] += rhs_[global_i * (num_cols_ + PADDING_SIZE_sz) + global_j]; + // } + } + } + } + private: + /// @cond Doxygen_suppress + const std::size_t num_cols_; + device_view_type lhs_; + device_view_type rhs_; + const std::size_t grid_x_offset_; + const std::size_t grid_y_offset_; + const std::size_t grid_size_x_; + /// @endcond }; +/** + * @brief Perform a simple inplace matrix scale: lhs *= scalar. + */ class device_kernel_inplace_matrix_scale { public: + /** + * @brief Initialize the Kokkos kernel function object. + * @param[in] num_cols the number of columns in the matrix + * @param[in,out] lhs the first matrix (updated inplace) + * @param[in] scale the value to scale + * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + */ + device_kernel_inplace_matrix_scale(const std::size_t num_cols, device_view_type lhs, const real_type scale, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) : + num_cols_{ num_cols }, + lhs_{ lhs }, + scale_{ scale }, + grid_x_offset_{ grid_x_offset }, + grid_y_offset_{ grid_y_offset }, + grid_size_x_{ grid_size_x } { } + + KOKKOS_INLINE_FUNCTION + void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows + const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); + const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); + const auto PADDING_SIZE_sz = static_cast(PADDING_SIZE); + const auto threadIdx_x = static_cast(team.team_rank()) / THREAD_BLOCK_SIZE_sz; // current thread in block x-dimension + const auto threadIdx_y = static_cast(team.team_rank()) % THREAD_BLOCK_SIZE_sz; // current thread in block y-dimension + const auto blockDim_x = THREAD_BLOCK_SIZE_sz; // number of threads in block x-dimension + const auto blockDim_y = THREAD_BLOCK_SIZE_sz; // number of threads in block y-dimension + const auto blockIdx_x = static_cast(team.league_rank()) % grid_size_x_ + grid_x_offset_; // current block in grid x-dimension + offsets if the grid size would be too large + const auto blockIdx_y = static_cast(team.league_rank()) / grid_size_x_ + grid_y_offset_; // current block in grid y-dimension + offsets if the grid size would be too large + + // Calculate the indices used in the current thread + const auto i = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz; // num_rows + const auto j = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz; // num_rhs + + for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) { + for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) { + const auto global_i = i + static_cast(internal_i); + const auto global_j = j + static_cast(internal_j); + + // if (global_i < lhs_.extent(0)) { // TODO: + lhs_[global_i * (num_cols_ + PADDING_SIZE_sz) + global_j] *= scale_; + // } + } + } + } + private: + /// @cond Doxygen_suppress + const std::size_t num_cols_; + device_view_type lhs_; + const real_type scale_; + const std::size_t grid_x_offset_; + const std::size_t grid_y_offset_; + const std::size_t grid_size_x_; + /// @endcond }; } // namespace plssvm::kokkos::detail diff --git a/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp index ff74257b9..ad9397377 100644 --- a/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp +++ b/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp @@ -13,18 +13,157 @@ #define PLSSVM_BACKENDS_KOKKOS_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_ #pragma once -#include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp" // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function} -#include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} -#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type +#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // TODO: view type aliases +#include "plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp" // plssvm::kokkos::detail::standard_layout_tuple +#include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp" // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function} +#include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} +#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type #include "Kokkos_Core.hpp" // TODO: +#include // std::size_t + namespace plssvm::kokkos::detail { +/** + * @brief Create the explicit kernel matrix using the @p kernel_function. + * @tparam kernel_function the type of the used kernel function + * @tparam Args the types of the parameters necessary for the specific kernel function; stored in a `standard_layout_tuple` + */ template class device_kernel_assembly { public: + /** + * @brief Initialize the SYCL kernel function object. + * @param[in] cgh the SYCL handler used to allocate the local memory + * @param[out] kernel_matrix_d the calculated kernel matrix + * @param[in] data_d the data points to calculate the kernel matrix from + * @param[in] num_rows the number of data points + * @param[in] device_num_rows the number of rows the current device is responsible for + * @param[in] row_offset the first row in @p data_d the current device is responsible for + * @param[in] num_features the number of features per data point + * @param[in] q the vector used in the dimensional reduction + * @param[in] QA_cost the scalar used in the dimensional reduction + * @param[in] cost the cost factor the diagonal is scaled with + * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function + */ + device_kernel_assembly(device_view_type kernel_matrix_d, device_view_type data_d, const std::size_t num_rows, const std::size_t device_num_rows, const std::size_t row_offset, const std::size_t num_features, device_view_type q, const real_type QA_cost, const real_type cost, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x, Args... kernel_function_parameter) : + kernel_matrix_d_{ kernel_matrix_d }, + data_d_{ data_d }, + num_rows_{ num_rows }, + device_num_rows_{ device_num_rows }, + row_offset_{ row_offset }, + num_features_{ num_features }, + q_{ q }, + QA_cost_{ QA_cost }, + cost_{ cost }, + grid_x_offset_{ grid_x_offset }, + grid_y_offset_{ grid_y_offset }, + grid_size_x_{ grid_size_x }, + kernel_function_parameter_{ detail::make_standard_layout_tuple(std::forward(kernel_function_parameter)...) } { + } + + KOKKOS_INLINE_FUNCTION + void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows + const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); + const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); + const auto FEATURE_BLOCK_SIZE_sz = static_cast(FEATURE_BLOCK_SIZE); + const auto PADDING_SIZE_sz = static_cast(PADDING_SIZE); + const auto threadIdx_x = static_cast(team.team_rank()) / THREAD_BLOCK_SIZE_sz; // current thread in block x-dimension + const auto threadIdx_y = static_cast(team.team_rank()) % THREAD_BLOCK_SIZE_sz; // current thread in block y-dimension + const auto blockDim_x = THREAD_BLOCK_SIZE_sz; // number of threads in block x-dimension + const auto blockDim_y = THREAD_BLOCK_SIZE_sz; // number of threads in block y-dimension + const auto blockIdx_x = static_cast(team.league_rank()) % grid_size_x_ + grid_x_offset_; // current block in grid x-dimension + offsets if the grid size would be too large + const auto blockIdx_y = static_cast(team.league_rank()) / grid_size_x_ + grid_y_offset_; // current block in grid y-dimension + offsets if the grid size would be too large + + // calculate the indices used in the current thread + const auto i = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz; // # rhs -> num_rhs + const auto i_linear = blockIdx_x * blockDim_x * INTERNAL_BLOCK_SIZE_sz + threadIdx_x; + const auto j = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz; // # rows -> num_mirror_rows + const auto j_linear = blockIdx_y * blockDim_y * INTERNAL_BLOCK_SIZE_sz + threadIdx_x; + + // create the shared memory arrays used for caching data point features + constexpr std::size_t shmem_size = FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE; + real_type *data_cache_ptr = static_cast(team.team_shmem().get_shmem(2 * shmem_size)); + Kokkos::mdspan> data_cache_i{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + Kokkos::mdspan> data_cache_j{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + + // only calculate the upper triangular matrix -> can't use threadIdx since all threads in a warp must progress further + if (blockIdx_x >= blockIdx_y) { + // create a thread private array used for internal caching + real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{}; + + // iterate over all features using blocking to be able to cache them for faster memory accesses + for (std::size_t dim = 0; dim < num_features_; dim += FEATURE_BLOCK_SIZE_sz) { + // load data into shared memory + for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) { + const auto global_i = row_offset_ + i_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; + const auto global_j = row_offset_ + j_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; + + // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory + data_cache_i(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y) * (num_rows_ + 1ull + PADDING_SIZE_sz) + global_i]; + data_cache_i(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ + 1ull + PADDING_SIZE_sz) + global_i]; + data_cache_j(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y) * (num_rows_ + 1ull + PADDING_SIZE_sz) + global_j]; + data_cache_j(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ + 1ull + PADDING_SIZE_sz) + global_j]; + } + team.team_barrier(); // wait until all threads loaded their part of the data + + // perform the feature reduction calculation + for (unsigned block_dim = 0; block_dim < FEATURE_BLOCK_SIZE; ++block_dim) { + for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) { + for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) { + temp[internal_i][internal_j] += detail::feature_reduce(data_cache_i(block_dim, threadIdx_x * INTERNAL_BLOCK_SIZE + internal_i), + data_cache_j(block_dim, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_j)); + } + } + } + team.team_barrier(); // wait until all threads performed their part of the calculations + } + + // apply the remaining part of the kernel function and store the value in the output kernel matrix + for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) { + for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) { + // calculate the indices to access the kernel matrix (the part stored on the current device) + const auto device_global_i = i + static_cast(internal_i); + const auto global_i = row_offset_ + i + static_cast(internal_i); + const auto device_global_j = j + static_cast(internal_j); + const auto global_j = row_offset_ + j + static_cast(internal_j); + + // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix) + if (device_global_i < (num_rows_ - row_offset_) && device_global_j < device_num_rows_ && global_i >= global_j) { + real_type temp_ij = temp[internal_i][internal_j]; + temp_ij = detail::apply_kernel_function(temp_ij, kernel_function_parameter_) + QA_cost_ - q_[global_i] - q_[global_j]; + // apply the cost on the diagonal + if (global_i == global_j) { + temp_ij += cost_; + } + // update the kernel matrix + kernel_matrix_d_[device_global_j * (num_rows_ - row_offset_ + PADDING_SIZE_sz) - device_global_j * (device_global_j + 1ull) / 2ull + device_global_i] = temp_ij; + } + } + } + } + } + private: + /// @cond Doxygen_suppress + device_view_type kernel_matrix_d_; + device_view_type data_d_; + const std::size_t num_rows_; + const std::size_t device_num_rows_; + const std::size_t row_offset_; + const std::size_t num_features_; + device_view_type q_; + const real_type QA_cost_; + const real_type cost_; + const std::size_t grid_x_offset_; + const std::size_t grid_y_offset_; + const std::size_t grid_size_x_; + const detail::standard_layout_tuple kernel_function_parameter_; + /// @endcond }; } // namespace plssvm::kokkos::detail diff --git a/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp index 2d9e855b2..2f0f6619c 100644 --- a/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp +++ b/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp @@ -9,24 +9,265 @@ * @brief Functions for implicitly assembling the kernel matrix using the Kokkos backend. */ -#ifndef PLSSVM_BACKENDS_SYCL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_ -#define PLSSVM_BACKENDS_SYCL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_ +#ifndef PLSSVM_BACKENDS_KOKKOS_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_ #pragma once -#include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp" // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function} -#include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} -#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type +#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // TODO: view type aliases +#include "plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp" // plssvm::kokkos::detail::standard_layout_tuple +#include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp" // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function} +#include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} +#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type #include "Kokkos_Core.hpp" // TODO: Kokkos::atomic_add namespace plssvm::kokkos::detail { +/** + * @brief Perform an implicit BLAS SYMM-like operation: `C = alpha * A * B + C` where `A` is the implicitly calculated kernel matrix using the @p kernel_function (never actually stored, reducing the amount of needed global memory), @p B and @p C are matrices, and @p alpha is a scalar. + * @tparam kernel_function the type of the used kernel function + * @tparam Args the types of the parameters necessary for the specific kernel function + */ template class device_kernel_assembly_symm { public: + /** + * @brief Initialize the Kokkos kernel function object. + * @param[in] alpha the scalar alpha value + * @param[in] q the vector used in the dimensional reduction + * @param[in] data_d the data points to calculate the implicit kernel matrix from + * @param[in] num_rows the total number of data points (= total number of rows) + * @param[in] device_num_rows the number of rows the current device is responsible for + * @param[in] row_offset the first row in @p data_d the current device is responsible for + * @param[in] num_features the number of features per data point + * @param[in] QA_cost the scalar used in the dimensional reduction + * @param[in] cost the cost factor the diagonal is scaled with + * @param[in] B the matrix @p B + * @param[in,out] C the matrix @p C + * @param[in] num_classes the number of classes in the data set + * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function + */ + device_kernel_assembly_symm(const real_type alpha, device_view_type q, device_view_type data_d, const std::size_t num_rows, const std::size_t device_num_rows, const std::size_t row_offset, const std::size_t num_features, const real_type QA_cost, const real_type cost, device_view_type B, device_view_type C, const std::size_t num_classes, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x, Args... kernel_function_parameter) : + alpha_{ alpha }, + q_{ q }, + data_d_{ data_d }, + num_rows_{ num_rows }, + device_num_rows_{ device_num_rows }, + row_offset_{ row_offset }, + num_features_{ num_features }, + QA_cost_{ QA_cost }, + cost_{ cost }, + B_{ B }, + C_{ C }, + num_classes_{ num_classes }, + grid_x_offset_{ grid_x_offset }, + grid_y_offset_{ grid_y_offset }, + grid_size_x_{ grid_size_x }, + kernel_function_parameter_{ detail::make_standard_layout_tuple(std::forward(kernel_function_parameter)...) } { } + + KOKKOS_INLINE_FUNCTION + void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows + const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); + const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); + const auto FEATURE_BLOCK_SIZE_sz = static_cast(FEATURE_BLOCK_SIZE); + const auto PADDING_SIZE_sz = static_cast(PADDING_SIZE); + const auto threadIdx_x = static_cast(team.team_rank()) / THREAD_BLOCK_SIZE_sz; // current thread in block x-dimension + const auto threadIdx_y = static_cast(team.team_rank()) % THREAD_BLOCK_SIZE_sz; // current thread in block y-dimension + const auto blockDim_x = THREAD_BLOCK_SIZE_sz; // number of threads in block x-dimension + const auto blockDim_y = THREAD_BLOCK_SIZE_sz; // number of threads in block y-dimension + const auto blockIdx_x = static_cast(team.league_rank()) % grid_size_x_ + grid_x_offset_; // current block in grid x-dimension + offsets if the grid size would be too large + const auto blockIdx_y = static_cast(team.league_rank()) / grid_size_x_ + grid_y_offset_; // current block in grid y-dimension + offsets if the grid size would be too large + + // calculate the indices used in the current thread + const auto i = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz; // # rhs -> num_rhs + const auto i_linear = blockIdx_x * blockDim_x * INTERNAL_BLOCK_SIZE_sz + threadIdx_x; + const auto j = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz; // # rows -> num_mirror_rows + const auto j_linear = blockIdx_y * blockDim_y * INTERNAL_BLOCK_SIZE_sz + threadIdx_x; + + // only calculate the upper triangular matrix -> can't use threadIdx since all threads in a warp must progress further + if (blockIdx_x >= blockIdx_y) { + // create a thread private array used for internal caching + real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{}; + + // create the shared memory arrays used for caching data point features + constexpr std::size_t shmem_size = FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE; + real_type *data_cache_ptr = static_cast(team.team_shmem().get_shmem(2 * shmem_size)); + + { + // create the shared memory arrays used for caching data point features + Kokkos::mdspan> data_cache_i{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + Kokkos::mdspan> data_cache_j{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + + // iterate over all features using blocking to be able to cache them for faster memory accesses + for (std::size_t dim = 0; dim < num_features_; dim += FEATURE_BLOCK_SIZE_sz) { + // load data into shared memory + for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) { + const auto global_i = row_offset_ + i_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; + const auto global_j = row_offset_ + j_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; + + // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory + data_cache_i(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_i]; + data_cache_i(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_i]; + data_cache_j(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_j]; + data_cache_j(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_j]; + } + team.team_barrier(); // wait until all threads loaded their part of the data + + // perform the feature reduction calculation + for (unsigned block_dim = 0; block_dim < FEATURE_BLOCK_SIZE; ++block_dim) { + for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) { + for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) { + temp[internal_i][internal_j] += detail::feature_reduce(data_cache_i(block_dim, threadIdx_x * INTERNAL_BLOCK_SIZE + internal_i), + data_cache_j(block_dim, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_j)); + } + } + } + team.team_barrier(); // wait until all threads performed their part of the calculations + } + } + + // apply the remaining part of the kernel function and store the value in the output kernel matrix + for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) { + for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) { + const auto global_i = row_offset_ + i + static_cast(internal_i); + const auto device_global_i = i + static_cast(internal_i); + const auto global_j = row_offset_ + j + static_cast(internal_j); + const auto device_global_j = j + static_cast(internal_j); + + // be sure to not perform out of bounds accesses for the kernel matrix (only using the upper triangular matrix) + if ((device_global_i < (num_rows_ - row_offset_) && device_global_j < device_num_rows_ && global_i >= global_j)) { + temp[internal_i][internal_j] = detail::apply_kernel_function(temp[internal_i][internal_j], kernel_function_parameter_) + QA_cost_ - q_[global_i] - q_[global_j]; + // apply the cost on the diagonal + if (global_i == global_j) { + temp[internal_i][internal_j] += cost_; + } + } else { + // be sure to set the value to zero otherwise + temp[internal_i][internal_j] = real_type{ 0.0 }; + } + } + } + + // calculate C += alpha * temp * B for the UPPER triangular matrix + { + // same shared memory size but with different dimensions + Kokkos::mdspan> B_cache{ data_cache_ptr, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE, FEATURE_BLOCK_SIZE }; + Kokkos::mdspan> C_out_cache{ data_cache_ptr + shmem_size, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE, FEATURE_BLOCK_SIZE }; + + // iterate over all classes using blocking to be able to cache them for faster memory accesses + for (std::size_t dim = 0; dim < num_classes_; dim += FEATURE_BLOCK_SIZE_sz) { + // load data into shared memory + for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) { + const auto global_i = row_offset_ + i_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; + + // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory + B_cache(internal * THREAD_BLOCK_SIZE + threadIdx_x, threadIdx_y) = alpha_ * B_[global_i * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y]; + B_cache(internal * THREAD_BLOCK_SIZE + threadIdx_x, threadIdx_y + THREAD_BLOCK_SIZE) = alpha_ * B_[global_i * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y + THREAD_BLOCK_SIZE_sz]; + C_out_cache(internal * THREAD_BLOCK_SIZE + threadIdx_x, threadIdx_y) = real_type{ 0.0 }; + C_out_cache(internal * THREAD_BLOCK_SIZE + threadIdx_x, threadIdx_y + THREAD_BLOCK_SIZE) = real_type{ 0.0 }; + } + team.team_barrier(); // wait until all threads loaded their part of the data + + // calculate intermediate results and store them in shared memory + for (unsigned class_idx = 0; class_idx < FEATURE_BLOCK_SIZE; ++class_idx) { + for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) { + for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) { + C_out_cache(threadIdx_y * INTERNAL_BLOCK_SIZE + internal_j, (class_idx + threadIdx_x) % FEATURE_BLOCK_SIZE) += + temp[internal_i][internal_j] * B_cache(threadIdx_x * INTERNAL_BLOCK_SIZE + internal_i, (class_idx + threadIdx_x) % FEATURE_BLOCK_SIZE); + } + } + team.team_barrier(); // wait until all threads performed their part of the calculations + } + + // add intermediate cached results to C + for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) { + const auto global_j = row_offset_ + j + static_cast(internal); + Kokkos::atomic_add(&C_[global_j * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_x], C_out_cache(threadIdx_y * INTERNAL_BLOCK_SIZE + internal, threadIdx_x)); + Kokkos::atomic_add(&C_[global_j * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_x + THREAD_BLOCK_SIZE_sz], C_out_cache(threadIdx_y * INTERNAL_BLOCK_SIZE + internal, threadIdx_x + THREAD_BLOCK_SIZE)); + } + team.team_barrier(); // wai until all threads updated C with their values + } + } + + // set potential diagonal entries in temp to 0.0 such that we don't apply the main diagonal twice to C + for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) { + for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) { + const auto global_i = row_offset_ + i + static_cast(internal_i); + const auto global_j = row_offset_ + j + static_cast(internal_j); + + if (global_i == global_j) { + temp[internal_i][internal_j] = real_type{ 0.0 }; + } + } + } + + // calculate C += alpha * temp * B for the LOWER triangular matrix + { + // same shared memory size but with different dimensions + Kokkos::mdspan> B_cache{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + Kokkos::mdspan> C_out_cache{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + + // iterate over all classes using blocking to be able to cache them for faster memory accesses + for (std::size_t dim = 0; dim < num_classes_; dim += FEATURE_BLOCK_SIZE_sz) { + // load data into shared memory + for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) { + const auto global_j = row_offset_ + j_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; + + // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory + B_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = alpha_ * B_[global_j * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y]; + B_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = alpha_ * B_[global_j * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y + THREAD_BLOCK_SIZE_sz]; + C_out_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = real_type{ 0.0 }; + C_out_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = real_type{ 0.0 }; + } + team.team_barrier(); // wait until all threads loaded their part of the data + + // calculate intermediate results and store them in shared memory + for (unsigned class_idx = 0; class_idx < FEATURE_BLOCK_SIZE; ++class_idx) { + for (unsigned internal_i = 0; internal_i < INTERNAL_BLOCK_SIZE; ++internal_i) { + for (unsigned internal_j = 0; internal_j < INTERNAL_BLOCK_SIZE; ++internal_j) { + C_out_cache((class_idx + threadIdx_y) % FEATURE_BLOCK_SIZE, internal_i * THREAD_BLOCK_SIZE + threadIdx_x) += + temp[internal_i][internal_j] * B_cache((class_idx + threadIdx_y) % FEATURE_BLOCK_SIZE, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_j); + } + } + team.team_barrier(); // wait until all threads performed their part of the calculations + } + + // add intermediate cached results to C + for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) { + const auto global_i = row_offset_ + i + static_cast(internal); + Kokkos::atomic_add(&C_[global_i * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y], C_out_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x)); + Kokkos::atomic_add(&C_[global_i * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y + THREAD_BLOCK_SIZE_sz], C_out_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x)); + } + team.team_barrier(); // wait until all threads updated C with their values + } + } + } + } + private: + /// @cond Doxygen_suppress + const real_type alpha_; + device_view_type q_; + device_view_type data_d_; + const std::size_t num_rows_; + const std::size_t device_num_rows_; + const std::size_t row_offset_; + const std::size_t num_features_; + const real_type QA_cost_; + const real_type cost_; + device_view_type B_; + device_view_type C_; + const std::size_t num_classes_; + const std::size_t grid_x_offset_; + const std::size_t grid_y_offset_; + const std::size_t grid_size_x_; + const detail::standard_layout_tuple kernel_function_parameter_; + /// @endcond }; } // namespace plssvm::kokkos::detail -#endif // PLSSVM_BACKENDS_SYCL_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_ +#endif // PLSSVM_BACKENDS_KOKKOS_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_ diff --git a/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp b/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp index f7f422659..952b1e99f 100644 --- a/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp +++ b/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp @@ -12,14 +12,14 @@ #ifndef PLSSVM_BACKENDS_KOKKOS_KERNEL_KERNEL_FUNCTIONS_HPP_ #define PLSSVM_BACKENDS_KOKKOS_KERNEL_KERNEL_FUNCTIONS_HPP_ -#include "plssvm/constants.hpp" // plssvm::real_type -#include "plssvm/detail/utility.hpp" // plssvm::detail::always_false_v -#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type +#include "plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp" // plssvm::kokkos::detail::standard_layout_tuple +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/detail/utility.hpp" // plssvm::detail::always_false_v +#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type #include "Kokkos_MathematicalFunctions.hpp" // Kokkos::pow, Kokkos::exp, Kokkos::tanh, Kokkos::abs -#include // std::numeric_limits -#include // std::tuple, std::get +#include // std::is_same_v namespace plssvm::kokkos::detail { @@ -35,7 +35,7 @@ namespace plssvm::kokkos::detail { * @return the reduced value (`[[nodiscard]]`) */ template -[[nodiscard]] inline real_type feature_reduce(const real_type val1, const real_type val2) { +KOKKOS_INLINE_FUNCTION real_type feature_reduce(const real_type val1, const real_type val2) { return val1 * val2; } @@ -46,7 +46,7 @@ template * @return the reduced value (`[[nodiscard]]`) */ template <> -[[nodiscard]] inline real_type feature_reduce(const real_type val1, const real_type val2) { +KOKKOS_INLINE_FUNCTION real_type feature_reduce(const real_type val1, const real_type val2) { const real_type d = val1 - val2; return d * d; } @@ -58,7 +58,7 @@ template <> * @return the reduced value (`[[nodiscard]]`) */ template <> -[[nodiscard]] inline real_type feature_reduce(const real_type val1, const real_type val2) { +KOKKOS_INLINE_FUNCTION real_type feature_reduce(const real_type val1, const real_type val2) { return ::Kokkos::fabs(val1 - val2); } @@ -70,9 +70,13 @@ template <> * @return the reduced value (`[[nodiscard]]`) */ template <> -[[nodiscard]] inline real_type feature_reduce(const real_type val1, const real_type val2) { +KOKKOS_INLINE_FUNCTION real_type feature_reduce(const real_type val1, const real_type val2) { const real_type d = val1 - val2; - return (real_type{ 1.0 } / (val1 + val2 + std::numeric_limits::min())) * d * d; + if constexpr (std::is_same_v) { + return (real_type{ 1.0 } / (val1 + val2 + FLT_MIN)) * d * d; // TODO: std::numeric_limits::min + } else { + return (real_type{ 1.0 } / (val1 + val2 + DBL_MIN)) * d * d; // TODO: std::numeric_limits::min + } } //***************************************************// @@ -88,19 +92,19 @@ template <> * @return the result value (`[[nodiscard]]`) */ template -[[nodiscard]] inline real_type apply_kernel_function(const real_type value, const std::tuple params) { +KOKKOS_INLINE_FUNCTION real_type apply_kernel_function(const real_type value, const detail::standard_layout_tuple params) { if constexpr (kernel_function == kernel_function_type::linear) { return value; } else if constexpr (kernel_function == kernel_function_type::polynomial) { - return ::Kokkos::pow(std::get<1>(params) * value + std::get<2>(params), std::get<0>(params)); + return ::Kokkos::pow(detail::get<1>(params) * value + detail::get<2>(params), detail::get<0>(params)); } else if constexpr (kernel_function == kernel_function_type::rbf) { - return ::Kokkos::exp(-std::get<0>(params) * value); + return ::Kokkos::exp(-detail::get<0>(params) * value); } else if constexpr (kernel_function == kernel_function_type::sigmoid) { - return ::Kokkos::tanh(std::get<0>(params) * value + std::get<1>(params)); + return ::Kokkos::tanh(detail::get<0>(params) * value + detail::get<1>(params)); } else if constexpr (kernel_function == kernel_function_type::laplacian) { - return ::Kokkos::exp(-std::get<0>(params) * value); + return ::Kokkos::exp(-detail::get<0>(params) * value); } else if constexpr (kernel_function == kernel_function_type::chi_squared) { - return ::Kokkos::exp(-std::get<0>(params) * value); + return ::Kokkos::exp(-detail::get<0>(params) * value); } else { static_assert(::plssvm::detail::always_false_v, "Unsupported kernel function!"); } diff --git a/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp b/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp index a203cb7e9..629a0901f 100644 --- a/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp +++ b/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp @@ -13,6 +13,7 @@ #define PLSSVM_BACKENDS_KOKKOS_PREDICT_KERNEL_HPP_ #pragma once +#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // TODO: view type aliases #include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp" // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function} #include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type @@ -21,20 +22,390 @@ namespace plssvm::kokkos::detail { +/** + * @brief Calculate the `q` vector used to speedup the prediction using the linear kernel function. + */ class device_kernel_w_linear { public: + /** + * @brief Initialize the Kokkos kernel function object. + * @param[in,out] w_d the vector to speedup the linear prediction + * @param[in] alpha_d the previously learned weights + * @param[in] sv_d the support vectors + * @param[in] num_classes the number of classes + * @param[in] num_sv the number of support vectors + * @param[in] device_specific_num_sv the number of support vectors the current device is responsible for + * @param[in] sv_offset the first support vector (row in @p alpha_d) the current device is responsible for + * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + */ + device_kernel_w_linear(device_view_type w_d, device_view_type alpha_d, device_view_type sv_d, const std::size_t num_classes, const std::size_t num_sv, const std::size_t device_specific_num_sv, const std::size_t sv_offset, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) : + w_d_{ w_d }, + alpha_d_{ alpha_d }, + sv_d_{ sv_d }, + num_classes_{ num_classes }, + num_sv_{ num_sv }, + device_specific_num_sv_{ device_specific_num_sv }, + sv_offset_{ sv_offset }, + grid_x_offset_{ grid_x_offset }, + grid_y_offset_{ grid_y_offset }, + grid_size_x_{ grid_size_x } { } + + KOKKOS_INLINE_FUNCTION + void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows + const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); + const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); + const auto PADDING_SIZE_sz = static_cast(PADDING_SIZE); + const auto threadIdx_x = static_cast(team.team_rank()) / THREAD_BLOCK_SIZE_sz; // current thread in block x-dimension + const auto threadIdx_y = static_cast(team.team_rank()) % THREAD_BLOCK_SIZE_sz; // current thread in block y-dimension + const auto blockDim_x = THREAD_BLOCK_SIZE_sz; // number of threads in block x-dimension + const auto blockDim_y = THREAD_BLOCK_SIZE_sz; // number of threads in block y-dimension + const auto blockIdx_x = static_cast(team.league_rank()) % grid_size_x_ + grid_x_offset_; // current block in grid x-dimension + offsets if the grid size would be too large + const auto blockIdx_y = static_cast(team.league_rank()) / grid_size_x_ + grid_y_offset_; // current block in grid y-dimension + offsets if the grid size would be too large + + // calculate the indices used in the current thread + const auto feature_idx = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz; + const auto feature_idx_linear = blockIdx_x * blockDim_x * INTERNAL_BLOCK_SIZE_sz + threadIdx_x; + const auto class_idx = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz; + const auto class_idx_linear = blockIdx_y * blockDim_y * INTERNAL_BLOCK_SIZE_sz + threadIdx_x; + + // create the shared memory arrays used for caching data point features + constexpr std::size_t shmem_size = THREAD_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE; + real_type *data_cache_ptr = static_cast(team.team_shmem().get_shmem(2 * shmem_size)); + Kokkos::mdspan> data_cache_feature{ data_cache_ptr, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + Kokkos::mdspan> data_cache_alpha{ data_cache_ptr + shmem_size, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + + // create a thread private array used for internal caching + real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{}; + + // iterate over all support vectors using blocking to be able to cache them for faster memory accesses + for (std::size_t sv = 0; sv < device_specific_num_sv_; sv += THREAD_BLOCK_SIZE_sz) { + // load data into shared memory + for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) { + const auto global_feature_idx = feature_idx_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; + const auto global_class_idx = class_idx_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; + + data_cache_feature(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = sv_d_[global_feature_idx * (device_specific_num_sv_ + PADDING_SIZE_sz) + sv + threadIdx_y]; // SoA + data_cache_alpha(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = alpha_d_[global_class_idx * (num_sv_ + PADDING_SIZE_sz) + sv + sv_offset_ + threadIdx_y]; // AoS + } + team.team_barrier(); // wait until all threads loaded their part of the data + + // perform the dot product calculation + for (unsigned block_dim = 0; block_dim < THREAD_BLOCK_SIZE; ++block_dim) { + for (unsigned internal_feature = 0; internal_feature < INTERNAL_BLOCK_SIZE; ++internal_feature) { + for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) { + temp[internal_feature][internal_class] += data_cache_alpha(block_dim, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_class) * data_cache_feature(block_dim, threadIdx_x * INTERNAL_BLOCK_SIZE + internal_feature); + } + } + } + team.team_barrier(); // wait until all threads performed their part of the calculations + } + + // update global array with local one + for (unsigned internal_feature = 0; internal_feature < INTERNAL_BLOCK_SIZE; ++internal_feature) { + for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) { + const auto global_feature_idx = feature_idx + static_cast(internal_feature); + const auto global_class_idx = class_idx + static_cast(internal_class); + + w_d_[global_feature_idx * (num_classes_ + PADDING_SIZE_sz) + global_class_idx] = temp[internal_feature][internal_class]; + } + } + } + private: + /// @cond Doxygen_suppress + device_view_type w_d_; + device_view_type alpha_d_; + device_view_type sv_d_; + const std::size_t num_classes_; + const std::size_t num_sv_; + const std::size_t device_specific_num_sv_; + const std::size_t sv_offset_; + const std::size_t grid_x_offset_; + const std::size_t grid_y_offset_; + const std::size_t grid_size_x_; + /// @endcond }; +/** + * @brief Predict the @p predict_points_d using the linear kernel speeding up the calculation using the @p w_d vector. + */ class device_kernel_predict_linear { public: + /** + * @brief Initialize the Kokkos kernel function object. + * @param[out] prediction_d the predicted values + * @param[in] w_d the vector to speedup the calculations + * @param[in] rho_d the previously learned bias + * @param[in] predict_points_d the data points to predict + * @param[in] num_classes the number of classes + * @param[in] num_predict_points the number of data points to predict + * @param[in] num_features the number of features per data point + * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + */ + device_kernel_predict_linear(device_view_type prediction_d, device_view_type w_d, device_view_type rho_d, device_view_type predict_points_d, const std::size_t num_classes, const std::size_t num_predict_points, const std::size_t num_features, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) : + prediction_d_{ prediction_d }, + w_d_{ w_d }, + rho_d_{ rho_d }, + predict_points_d_{ predict_points_d }, + num_classes_{ num_classes }, + num_predict_points_{ num_predict_points }, + num_features_{ num_features }, + grid_x_offset_{ grid_x_offset }, + grid_y_offset_{ grid_y_offset }, + grid_size_x_{ grid_size_x } { } + + KOKKOS_INLINE_FUNCTION + void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows + const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); + const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); + const auto FEATURE_BLOCK_SIZE_sz = static_cast(FEATURE_BLOCK_SIZE); + const auto PADDING_SIZE_sz = static_cast(PADDING_SIZE); + const auto threadIdx_x = static_cast(team.team_rank()) / THREAD_BLOCK_SIZE_sz; // current thread in block x-dimension + const auto threadIdx_y = static_cast(team.team_rank()) % THREAD_BLOCK_SIZE_sz; // current thread in block y-dimension + const auto blockDim_x = THREAD_BLOCK_SIZE_sz; // number of threads in block x-dimension + const auto blockDim_y = THREAD_BLOCK_SIZE_sz; // number of threads in block y-dimension + const auto blockIdx_x = static_cast(team.league_rank()) % grid_size_x_ + grid_x_offset_; // current block in grid x-dimension + offsets if the grid size would be too large + const auto blockIdx_y = static_cast(team.league_rank()) / grid_size_x_ + grid_y_offset_; // current block in grid y-dimension + offsets if the grid size would be too large + + // calculate the indices used in the current thread + const auto pp_idx = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz; + const auto pp_idx_linear = blockIdx_x * blockDim_x * INTERNAL_BLOCK_SIZE_sz + threadIdx_x; + const auto class_idx = (blockIdx_y * blockDim_y + threadIdx_y) * INTERNAL_BLOCK_SIZE_sz; + const auto class_idx_linear = blockIdx_y * blockDim_y * INTERNAL_BLOCK_SIZE_sz + threadIdx_x; + + // create the shared memory arrays used for caching data point features + constexpr std::size_t shmem_size = FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE; + real_type *data_cache_ptr = static_cast(team.team_shmem().get_shmem(2 * shmem_size)); + Kokkos::mdspan> data_cache_pp{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + Kokkos::mdspan> data_cache_w{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + + // create a thread private array used for internal caching + real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{}; + + // iterate over all features using blocking to be able to cache them for faster memory accesses + for (std::size_t dim = 0; dim < num_features_; dim += FEATURE_BLOCK_SIZE_sz) { + // load data into shared memory + for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) { + const auto global_pp_idx = pp_idx_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; + const auto global_class_idx = class_idx_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; + + // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory + data_cache_pp(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = predict_points_d_[(dim + threadIdx_y) * (num_predict_points_ + PADDING_SIZE_sz) + global_pp_idx]; + data_cache_pp(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = predict_points_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_predict_points_ + PADDING_SIZE_sz) + global_pp_idx]; + data_cache_w(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = w_d_[(dim + threadIdx_y) * (num_classes_ + PADDING_SIZE_sz) + global_class_idx]; + data_cache_w(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = w_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_classes_ + PADDING_SIZE_sz) + global_class_idx]; + } + team.team_barrier(); // wait until all threads loaded their part of the data + + // perform the dot product calculation + for (unsigned block_dim = 0; block_dim < FEATURE_BLOCK_SIZE; ++block_dim) { + for (unsigned internal_pd = 0; internal_pd < INTERNAL_BLOCK_SIZE; ++internal_pd) { + for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) { + temp[internal_pd][internal_class] += data_cache_w(block_dim, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_class) * data_cache_pp(block_dim, threadIdx_x * INTERNAL_BLOCK_SIZE + internal_pd); + } + } + } + team.team_barrier(); // wait until all threads performed their part of the calculations + } + + // update global array with local one + for (unsigned internal_pd = 0; internal_pd < INTERNAL_BLOCK_SIZE; ++internal_pd) { + for (unsigned internal_class = 0; internal_class < INTERNAL_BLOCK_SIZE; ++internal_class) { + const auto global_pp_idx = pp_idx + static_cast(internal_pd); + const auto global_class_idx = class_idx + static_cast(internal_class); + + prediction_d_[global_pp_idx * (num_classes_ + PADDING_SIZE_sz) + global_class_idx] = temp[internal_pd][internal_class] - rho_d_[global_class_idx]; + } + } + } + private: + /// @cond Doxygen_suppress + device_view_type prediction_d_; + device_view_type w_d_; + device_view_type rho_d_; + device_view_type predict_points_d_; + const std::size_t num_classes_; + const std::size_t num_predict_points_; + const std::size_t num_features_; + const std::size_t grid_x_offset_; + const std::size_t grid_y_offset_; + const std::size_t grid_size_x_; + /// @endcond }; +/** + * @brief Predict the @p predict_points_d using the @p kernel_function. + * @tparam kernel_function the type of the used kernel function + * @tparam Args the types of the parameters necessary for the specific kernel function + */ template class device_kernel_predict { public: + /** + * @brief Initialize the SYCL kernel function object. + * @param[in] prediction_d the predicted values + * @param[in] alpha_d the previously learned weights + * @param[in] rho_d the previously learned biases + * @param[in] sv_d the support vectors + * @param[in] predict_points_d the data points to predict + * @param[in] num_classes the number of classes + * @param[in] num_sv the number of support vectors + * @param[in] num_predict_points the number of data points to predict + * @param[in] num_features the number of features per data point + * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function + */ + device_kernel_predict(device_view_type prediction_d, device_view_type alpha_d, device_view_type rho_d, device_view_type sv_d, device_view_type predict_points_d, const std::size_t num_classes, const std::size_t num_sv, const std::size_t num_predict_points, const std::size_t num_features, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x, Args... kernel_function_parameter) : + prediction_d_{ prediction_d }, + alpha_d_{ alpha_d }, + rho_d_{ rho_d }, + sv_d_{ sv_d }, + predict_points_d_{ predict_points_d }, + num_classes_{ num_classes }, + num_sv_{ num_sv }, + num_predict_points_{ num_predict_points }, + num_features_{ num_features }, + grid_x_offset_{ grid_x_offset }, + grid_y_offset_{ grid_y_offset }, + grid_size_x_{ grid_size_x }, + kernel_function_parameter_{ detail::make_standard_layout_tuple(std::forward(kernel_function_parameter)...) } { } + + KOKKOS_INLINE_FUNCTION + void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows + const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); + const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); + const auto FEATURE_BLOCK_SIZE_sz = static_cast(FEATURE_BLOCK_SIZE); + const auto PADDING_SIZE_sz = static_cast(PADDING_SIZE); + const auto threadIdx_x = static_cast(team.team_rank()) / THREAD_BLOCK_SIZE_sz; // current thread in block x-dimension + const auto threadIdx_y = static_cast(team.team_rank()) % THREAD_BLOCK_SIZE_sz; // current thread in block y-dimension + const auto blockDim_x = THREAD_BLOCK_SIZE_sz; // number of threads in block x-dimension + const auto blockDim_y = THREAD_BLOCK_SIZE_sz; // number of threads in block y-dimension + const auto blockIdx_x = static_cast(team.league_rank()) % grid_size_x_ + grid_x_offset_; // current block in grid x-dimension + offsets if the grid size would be too large + const auto blockIdx_y = static_cast(team.league_rank()) / grid_size_x_ + grid_y_offset_; // current block in grid y-dimension + offsets if the grid size would be too large + + // calculate the indices used in the current thread + const auto pp_idx = (blockIdx_x * blockDim_x + threadIdx_x) * INTERNAL_BLOCK_SIZE_sz; + const auto pp_idx_linear = blockIdx_x * blockDim_x * INTERNAL_BLOCK_SIZE_sz + threadIdx_x; + const auto sv_idx_linear = blockIdx_y * blockDim_y * INTERNAL_BLOCK_SIZE_sz + threadIdx_x; + + constexpr std::size_t shmem_size = FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE; + real_type *data_cache_ptr = static_cast(team.team_shmem().get_shmem(2 * shmem_size)); + + // create a thread private array used for internal caching + real_type temp[INTERNAL_BLOCK_SIZE][INTERNAL_BLOCK_SIZE]{}; + + { + // create the shared memory arrays used for caching data point features + Kokkos::mdspan> data_cache_pp{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + Kokkos::mdspan> data_cache_sv{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + + // iterate over all features using blocking to be able to cache them for faster memory accesses + for (std::size_t dim = 0; dim < num_features_; dim += FEATURE_BLOCK_SIZE_sz) { + // load data into shared memory + for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) { + const auto global_pp_idx = pp_idx_linear + static_cast(internal) * THREAD_BLOCK_SIZE; + const auto global_sv_idx = sv_idx_linear + static_cast(internal) * THREAD_BLOCK_SIZE; + + // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory + data_cache_pp(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = predict_points_d_[(dim + threadIdx_y) * (num_predict_points_ + PADDING_SIZE_sz) + global_pp_idx]; + data_cache_pp(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = predict_points_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_predict_points_ + PADDING_SIZE_sz) + global_pp_idx]; + data_cache_sv(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = sv_d_[(dim + threadIdx_y) * (num_sv_ + PADDING_SIZE_sz) + global_sv_idx]; + data_cache_sv(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = sv_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_sv_ + PADDING_SIZE_sz) + global_sv_idx]; + } + team.team_barrier(); // wait until all threads loaded their part of the data + + // perform the feature reduction calculation + for (unsigned block_dim = 0; block_dim < FEATURE_BLOCK_SIZE; ++block_dim) { + for (unsigned internal_pd = 0; internal_pd < INTERNAL_BLOCK_SIZE; ++internal_pd) { + for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) { + temp[internal_pd][internal_sv] += detail::feature_reduce(data_cache_sv(block_dim, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_sv), + data_cache_pp(block_dim, threadIdx_x * INTERNAL_BLOCK_SIZE + internal_pd)); + } + } + } + team.team_barrier(); // wait until all threads performed their part of the calculations + } + } + + // update temp using the respective kernel function + for (unsigned internal_pd = 0; internal_pd < INTERNAL_BLOCK_SIZE; ++internal_pd) { + for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) { + temp[internal_pd][internal_sv] = detail::apply_kernel_function(temp[internal_pd][internal_sv], kernel_function_parameter_); + } + } + + { + // create the shared memory arrays used for caching data point features + Kokkos::mdspan> alpha_cache{ data_cache_ptr, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + Kokkos::mdspan> out_cache{ data_cache_ptr + shmem_size, FEATURE_BLOCK_SIZE, INTERNAL_BLOCK_SIZE * THREAD_BLOCK_SIZE }; + + // iterate over all features using blocking to be able to cache them for faster memory accesses + for (std::size_t dim = 0; dim < num_classes_; dim += FEATURE_BLOCK_SIZE_sz) { + // load data into shared memory + for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) { + const std::size_t global_sv_idx = sv_idx_linear + internal * THREAD_BLOCK_SIZE; + + // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory + alpha_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = alpha_d_[(dim + threadIdx_y) * (num_sv_ + PADDING_SIZE_sz) + global_sv_idx]; + alpha_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = alpha_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_sv_ + PADDING_SIZE_sz) + global_sv_idx]; + + // the bias (rho) must only be applied once for all support vectors + if (blockIdx_y == 0ull) { + out_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = -rho_d_[dim + threadIdx_y]; + out_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = -rho_d_[dim + threadIdx_y + THREAD_BLOCK_SIZE_sz]; + } else { + out_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = real_type{ 0.0 }; + out_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = real_type{ 0.0 }; + } + } + team.team_barrier(); // wait until all threads loaded their part of the data + + // calculate intermediate results and store them in shared memory + for (unsigned class_idx = 0; class_idx < FEATURE_BLOCK_SIZE; ++class_idx) { + for (unsigned internal_pd = 0; internal_pd < INTERNAL_BLOCK_SIZE; ++internal_pd) { + for (unsigned internal_sv = 0; internal_sv < INTERNAL_BLOCK_SIZE; ++internal_sv) { + out_cache((class_idx + threadIdx_y) % FEATURE_BLOCK_SIZE, internal_pd * THREAD_BLOCK_SIZE + threadIdx_x) += + temp[internal_pd][internal_sv] * alpha_cache((class_idx + threadIdx_y) % FEATURE_BLOCK_SIZE, threadIdx_y * INTERNAL_BLOCK_SIZE + internal_sv); + } + } + team.team_barrier(); // wait until all threads performed their part of the calculations + } + + // add intermediate cached results to prediction_d + for (unsigned internal = 0; internal < INTERNAL_BLOCK_SIZE; ++internal) { + const auto global_pp_idx = pp_idx + static_cast(internal); + + Kokkos::atomic_add(&prediction_d_[global_pp_idx * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y], out_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x)); + Kokkos::atomic_add(&prediction_d_[global_pp_idx * (num_classes_ + PADDING_SIZE_sz) + dim + threadIdx_y + THREAD_BLOCK_SIZE_sz], out_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x)); + } + team.team_barrier(); // wait until all threads updated their part of the prediction + } + } + } + private: + /// @cond Doxygen_suppress + device_view_type prediction_d_; + device_view_type alpha_d_; + device_view_type rho_d_; + device_view_type sv_d_; + device_view_type predict_points_d_; + const std::size_t num_classes_; + const std::size_t num_sv_; + const std::size_t num_predict_points_; + const std::size_t num_features_; + const std::size_t grid_x_offset_; + const std::size_t grid_y_offset_; + const std::size_t grid_size_x_; + const detail::standard_layout_tuple kernel_function_parameter_; + /// @endcond }; } // namespace plssvm::kokkos::detail diff --git a/src/main_predict.cpp b/src/main_predict.cpp index f9e70e1d5..1fe40d102 100644 --- a/src/main_predict.cpp +++ b/src/main_predict.cpp @@ -15,6 +15,7 @@ #include "plssvm/detail/tracking/performance_tracker.hpp" // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE, // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HWS_ENTRY // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME +#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT #include "plssvm/detail/utility.hpp" // PLSSVM_IS_DEFINED #if defined(PLSSVM_HAS_KOKKOS_BACKEND) @@ -74,11 +75,12 @@ int main(int argc, char *argv[]) { // check whether SYCL is used as backend (it is either requested directly or as automatic backend) const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) }; + +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) // check whether Kokkos is used as backend (it is either requested directly or as automatic backend) const bool use_kokkos_as_backend{ cmd_parser.backend == plssvm::backend_type::kokkos || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::kokkos) }; // initialize Kokkos if necessary -#if defined(PLSSVM_HAS_KOKKOS_BACKEND) if (use_kokkos_as_backend) { Kokkos::initialize(argc, argv); // TODO: set device? PLSSVM_ASSERT(Kokkos::is_initialized(), "Something went wrong initializing the Kokkos environment!"); diff --git a/src/main_train.cpp b/src/main_train.cpp index f7ed20d9c..1d18d2744 100644 --- a/src/main_train.cpp +++ b/src/main_train.cpp @@ -14,7 +14,7 @@ #include "plssvm/detail/logging.hpp" // plssvm::detail::log #include "plssvm/detail/tracking/performance_tracker.hpp" // plssvm::detail::tracking::tracking_entry, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE, // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HWS_ENTRY, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME -#include "plssvm/detail/assert.hpp" +#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT #include "plssvm/detail/utility.hpp" // PLSSVM_IS_DEFINED #if defined(PLSSVM_HAS_KOKKOS_BACKEND) @@ -72,11 +72,12 @@ int main(int argc, char *argv[]) { // check whether SYCL is used as backend (it is either requested directly or as automatic backend) const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) }; + +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) // check whether Kokkos is used as backend (it is either requested directly or as automatic backend) const bool use_kokkos_as_backend{ cmd_parser.backend == plssvm::backend_type::kokkos || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::kokkos) }; // initialize Kokkos if necessary -#if defined(PLSSVM_HAS_KOKKOS_BACKEND) if (use_kokkos_as_backend) { Kokkos::initialize(argc, argv); // TODO: set device? PLSSVM_ASSERT(Kokkos::is_initialized(), "Something went wrong initializing the Kokkos environment!"); diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index f459d55e5..f113c7e2c 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -8,8 +8,7 @@ #include "plssvm/backends/Kokkos/csvm.hpp" -#include "plssvm/backends/execution_range.hpp" // plssvm::detail::dim_type -#include "plssvm/backends/execution_range.hpp" // plssvm::detail::execution_range +#include "plssvm/backends/execution_range.hpp" // plssvm::detail::{execution_range, dim_type} #include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr #include "plssvm/backends/Kokkos/detail/execution_space.hpp" // plssvm::kokkos::detail::execution_space #include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::kokkos::detail::get_runtime_version @@ -18,11 +17,14 @@ #include "plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp" // plssvm::kokkos::detail::device_kernel_assembly #include "plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp" // plssvm::kokkos::detail::device_kernel_assembly_symm #include "plssvm/backends/Kokkos/kernel/predict_kernel.hpp" // plssvm::kokkos::detail::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict} +#include "plssvm/constants.hpp" // plssvm::THREAD_BLOCK_SIZE, plssvm::INTERNAL_BLOCK_SIZE, plssvm::FEATURE_BLOCK_SIZE #include "plssvm/detail/data_distribution.hpp" // plssvm::detail::triangular_data_distribution #include "plssvm/detail/logging.hpp" // plssvm::detail::log #include "plssvm/detail/memory_size.hpp" // plssvm::detail::memory_size #include "plssvm/detail/tracking/performance_tracker.hpp" // plssvm::detail::tracking::tracking_entry +#include "plssvm/detail/utility.hpp" // plssvm::detail::unreachable // TODO: remove #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception +#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type #include "plssvm/parameter.hpp" // plssvm::parameter #include "plssvm/target_platforms.hpp" // plssvm::target_platform #include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level @@ -141,8 +143,10 @@ void csvm::init(const target_platform target) { csvm::~csvm() { try { - // be sure that all operations on the Kokkos execution spaces have finished before destruction - detail::device_synchronize_all(); + // be sure that all operations on the CUDA devices have finished before destruction + for (const queue_type &device : devices_) { + detail::device_synchronize(device); + } } catch (const plssvm::exception &e) { std::cout << e.what_with_loc() << std::endl; std::terminate(); @@ -191,9 +195,12 @@ std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const case detail::execution_space::serial: throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; } + ::plssvm::detail::unreachable(); } std::size_t csvm::get_max_work_group_size(const std::size_t device_id) const { + PLSSVM_ASSERT(device_id < this->num_available_devices(), "Invalid device {} requested!", device_id); + // TODO: implement for other execution spaces, guard behind ifdef switch (space_) { case detail::execution_space::cuda: @@ -212,9 +219,12 @@ std::size_t csvm::get_max_work_group_size(const std::size_t device_id) const { case detail::execution_space::serial: throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; } + ::plssvm::detail::unreachable(); } -::plssvm::detail::dim_type csvm::get_max_grid_size([[maybe_unused]] const std::size_t device_id) const { +::plssvm::detail::dim_type csvm::get_max_grid_size(const std::size_t device_id) const { + PLSSVM_ASSERT(device_id < this->num_available_devices(), "Invalid device {} requested!", device_id); + // TODO: implement for other execution spaces, guard behind ifdef switch (space_) { case detail::execution_space::cuda: @@ -233,6 +243,7 @@ ::plssvm::detail::dim_type csvm::get_max_grid_size([[maybe_unused]] const std::s case detail::execution_space::serial: throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; } + ::plssvm::detail::unreachable(); } //***************************************************// @@ -256,37 +267,55 @@ auto csvm::run_assemble_kernel_matrix_explicit(const std::size_t device_id, cons device_ptr_type kernel_matrix_d{ num_entries_padded, device }; // only explicitly store the upper triangular matrix const real_type cost_factor = real_type{ 1.0 } / params.cost; + const std::size_t scratch_memory_size = static_cast(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); + + // save the team sizes + const ::plssvm::detail::dim_type team_sizes = exec.block; - // TODO: implement - // // convert execution range block to CUDA's native dim3 - // const dim3 native_block = detail::dim_type_to_native(exec.block); - // - // for (const auto &[partial_grid, offsets] : exec.grids) { - // // convert execution range partial_grid to CUDA's native dim3 - // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); - // - // switch (params.kernel_type) { - // case kernel_function_type::linear: - // detail::device_kernel_assembly<<>>(kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y); - // break; - // case kernel_function_type::polynomial: - // detail::device_kernel_assembly<<>>(kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, params.degree, std::get(params.gamma), params.coef0); - // break; - // case kernel_function_type::rbf: - // detail::device_kernel_assembly<<>>(kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, std::get(params.gamma)); - // break; - // case kernel_function_type::sigmoid: - // detail::device_kernel_assembly<<>>(kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, std::get(params.gamma), params.coef0); - // break; - // case kernel_function_type::laplacian: - // detail::device_kernel_assembly<<>>(kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, std::get(params.gamma)); - // break; - // case kernel_function_type::chi_squared: - // detail::device_kernel_assembly<<>>(kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, std::get(params.gamma)); - // break; - // } - // } - detail::device_synchronize_all(); + for (const auto &[partial_grid, offsets] : exec.grids) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy<> team_policy(device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO); + + switch (params.kernel_type) { + case kernel_function_type::linear: + { + using functor_type = detail::device_kernel_assembly; + Kokkos::parallel_for("assemble_kernel_matrix_explicit_linear", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x }); + } + break; + case kernel_function_type::polynomial: + { + using functor_type = detail::device_kernel_assembly; + Kokkos::parallel_for("assemble_kernel_matrix_explicit_polynomial", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, params.degree, std::get(params.gamma), params.coef0 }); + } + break; + case kernel_function_type::rbf: + { + using functor_type = detail::device_kernel_assembly; + Kokkos::parallel_for("assemble_kernel_matrix_explicit_rbf", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + case kernel_function_type::sigmoid: + { + using functor_type = detail::device_kernel_assembly; + Kokkos::parallel_for("assemble_kernel_matrix_explicit_sigmoid", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma), params.coef0 }); + } + break; + case kernel_function_type::laplacian: + { + using functor_type = detail::device_kernel_assembly; + Kokkos::parallel_for("assemble_kernel_matrix_explicit_laplacian", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + case kernel_function_type::chi_squared: + { + using functor_type = detail::device_kernel_assembly; + Kokkos::parallel_for("assemble_kernel_matrix_explicit_chi_squared", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + } + } + detail::device_synchronize(device); return kernel_matrix_d; } @@ -300,72 +329,65 @@ void csvm::run_blas_level_3_kernel_explicit(const std::size_t device_id, const : const unsigned long long device_specific_num_rows = data_distribution_->place_specific_num_rows(device_id); // get the offset of the data points this device is responsible for const unsigned long long row_offset = data_distribution_->place_row_offset(device_id); + // the necessary amount of scratch memory for the kernels + const std::size_t scratch_memory_size = static_cast(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); + + // save the team sizes + const ::plssvm::detail::dim_type team_sizes = exec.block; - // TODO: implement - // // convert execution range block to CUDA's native dim3 - // const dim3 native_block = detail::dim_type_to_native(exec.block); - // - // detail::set_device(device); - // for (const auto &[partial_grid, offsets] : exec.grids) { - // // convert execution range partial_grid to CUDA's native dim3 - // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); - // - // detail::device_kernel_symm<<>>(num_rows, num_rhs, device_specific_num_rows, row_offset, alpha, A_d.get(), B_d.get(), beta, C_d.get(), offsets.x, offsets.y); - // } - // - // // convert execution range block to CUDA's native dim3 - // const dim3 native_mirror_block = detail::dim_type_to_native(mirror_exec.block); - // - // for (const auto &[partial_grid, offsets] : mirror_exec.grids) { - // const unsigned long long num_mirror_rows = num_rows - row_offset - device_specific_num_rows; - // - // if (num_mirror_rows > 0) { - // // convert execution range partial_grid to CUDA's native dim3 - // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); - // - // detail::device_kernel_symm_mirror<<>>(num_rows, num_rhs, num_mirror_rows, device_specific_num_rows, row_offset, alpha, A_d.get(), B_d.get(), beta, C_d.get(), offsets.x, offsets.y); - // } - // } - // detail::peek_at_last_error(); - detail::device_synchronize_all(); + for (const auto &[partial_grid, offsets] : exec.grids) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy<> team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + + Kokkos::parallel_for("blas_level_3_kernel_explicit", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_symm(num_rows, num_rhs, device_specific_num_rows, row_offset, alpha, A_d.get(), B_d.get(), beta, C_d.get(), offsets.x, offsets.y, partial_grid.x)); + } + + // save the mirror team sizes + const ::plssvm::detail::dim_type mirror_team_sizes = mirror_exec.block; + + for (const auto &[partial_grid, offsets] : mirror_exec.grids) { + const unsigned long long num_mirror_rows = num_rows - row_offset - device_specific_num_rows; + + if (num_mirror_rows > 0) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy<> team_policy{ static_cast(partial_grid.total_size()), static_cast(mirror_team_sizes.total_size()), Kokkos::AUTO }; + + Kokkos::parallel_for("blas_level_3_kernel_explicit_mirror", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_symm_mirror(num_rows, num_rhs, num_mirror_rows, device_specific_num_rows, row_offset, alpha, A_d.get(), B_d.get(), beta, C_d.get(), offsets.x, offsets.y, partial_grid.x)); + } + } + detail::device_synchronize(device); } void csvm::run_inplace_matrix_addition(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, device_ptr_type &lhs_d, const device_ptr_type &rhs_d) const { const unsigned long long num_rhs = lhs_d.shape().x; const queue_type &device = devices_[device_id]; - // // TODO: implement - // // convert execution range block to CUDA's native dim3 - // const dim3 native_block = detail::dim_type_to_native(exec.block); - // - // detail::set_device(device); - // for (const auto &[partial_grid, offsets] : exec.grids) { - // // convert execution range partial_grid to CUDA's native dim3 - // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); - // - // detail::device_kernel_inplace_matrix_add<<>>(num_rhs, lhs_d.get(), rhs_d.get(), offsets.x, offsets.y); - // } - // detail::peek_at_last_error(); - detail::device_synchronize_all(); + // save the team sizes + const ::plssvm::detail::dim_type team_sizes = exec.block; + + for (const auto &[partial_grid, offsets] : exec.grids) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy<> team_policy{ static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + + Kokkos::parallel_for("inplace_matrix_addition", team_policy, detail::device_kernel_inplace_matrix_add(num_rhs, lhs_d.get(), rhs_d.get(), offsets.x, offsets.y, partial_grid.x)); + } + detail::device_synchronize(device); } void csvm::run_inplace_matrix_scale(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, device_ptr_type &lhs_d, const real_type scale) const { const unsigned long long num_rhs = lhs_d.shape().x; const queue_type &device = devices_[device_id]; - // TODO: implement - // // convert execution range block to CUDA's native dim3 - // const dim3 native_block = detail::dim_type_to_native(exec.block); - // - // detail::set_device(device); - // for (const auto &[partial_grid, offsets] : exec.grids) { - // // convert execution range partial_grid to CUDA's native dim3 - // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); - // - // detail::device_kernel_inplace_matrix_scale<<>>(num_rhs, lhs_d.get(), scale, offsets.x, offsets.y); - // } - // detail::peek_at_last_error(); - detail::device_synchronize_all(); + // save the team sizes + const ::plssvm::detail::dim_type team_sizes = exec.block; + + for (const auto &[partial_grid, offsets] : exec.grids) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy<> team_policy{ static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + + Kokkos::parallel_for("inplace_matrix_scale", team_policy, detail::device_kernel_inplace_matrix_scale(num_rhs, lhs_d.get(), scale, offsets.x, offsets.y, partial_grid.x)); + } + detail::device_synchronize(device); } void csvm::run_assemble_kernel_matrix_implicit_blas_level_3(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const real_type alpha, const device_ptr_type &A_d, const parameter ¶ms, const device_ptr_type &q_red, const real_type QA_cost, const device_ptr_type &B_d, device_ptr_type &C_d) const { @@ -380,39 +402,55 @@ void csvm::run_assemble_kernel_matrix_implicit_blas_level_3(const std::size_t de const unsigned long long row_offset = data_distribution_->place_row_offset(device_id); const real_type cost_factor = real_type{ 1.0 } / params.cost; + const std::size_t scratch_memory_size = static_cast(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); + + // save the team sizes + const ::plssvm::detail::dim_type team_sizes = exec.block; + + for (const auto &[partial_grid, offsets] : exec.grids) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy<> team_policy(device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO); - // TODO: implement - // // convert general execution range's block to CUDA specific block - // const dim3 native_block = detail::dim_type_to_native(exec.block); - // - // detail::set_device(device); - // for (const auto &[partial_grid, offsets] : exec.grids) { - // // convert execution range partial_grid to CUDA's native dim3 - // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); - // - // switch (params.kernel_type) { - // case kernel_function_type::linear: - // detail::device_kernel_assembly_symm<<>>(alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y); - // break; - // case kernel_function_type::polynomial: - // detail::device_kernel_assembly_symm<<>>(alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, params.degree, std::get(params.gamma), params.coef0); - // break; - // case kernel_function_type::rbf: - // detail::device_kernel_assembly_symm<<>>(alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, std::get(params.gamma)); - // break; - // case kernel_function_type::sigmoid: - // detail::device_kernel_assembly_symm<<>>(alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, std::get(params.gamma), params.coef0); - // break; - // case kernel_function_type::laplacian: - // detail::device_kernel_assembly_symm<<>>(alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, std::get(params.gamma)); - // break; - // case kernel_function_type::chi_squared: - // detail::device_kernel_assembly_symm<<>>(alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, std::get(params.gamma)); - // break; - // } - // } - // detail::peek_at_last_error(); - detail::device_synchronize_all(); + switch (params.kernel_type) { + case kernel_function_type::linear: + { + using functor_type = detail::device_kernel_assembly_symm; + Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_linear", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, partial_grid.x }); + } + break; + case kernel_function_type::polynomial: + { + using functor_type = detail::device_kernel_assembly_symm; + Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_polynomial", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, partial_grid.x, params.degree, std::get(params.gamma), params.coef0 }); + } + break; + case kernel_function_type::rbf: + { + using functor_type = detail::device_kernel_assembly_symm; + Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_rbf", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + case kernel_function_type::sigmoid: + { + using functor_type = detail::device_kernel_assembly_symm; + Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_sigmoid", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma), params.coef0 }); + } + break; + case kernel_function_type::laplacian: + { + using functor_type = detail::device_kernel_assembly_symm; + Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_laplacian", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + case kernel_function_type::chi_squared: + { + using functor_type = detail::device_kernel_assembly_symm; + Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_chi_squared", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + } + } + detail::device_synchronize(device); } //***************************************************// @@ -431,19 +469,18 @@ auto csvm::run_w_kernel(const std::size_t device_id, const ::plssvm::detail::exe device_ptr_type w_d{ shape{ num_classes, num_features }, shape{ PADDING_SIZE, PADDING_SIZE }, device }; - // TODO: implement - // // convert execution range block to CUDA's native dim3 - // const dim3 native_block = detail::dim_type_to_native(exec.block); - // - // detail::set_device(device); - // for (const auto &[partial_grid, offsets] : exec.grids) { - // // convert execution range partial_grid to CUDA's native dim3 - // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); - // - // detail::device_kernel_w_linear<<>>(w_d.get(), alpha_d.get(), sv_d.get(), num_classes, num_sv, device_specific_num_sv, sv_offset, offsets.x, offsets.y); - // } - // detail::peek_at_last_error(); - detail::device_synchronize_all(); + const std::size_t scratch_memory_size = static_cast(2u * THREAD_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); + + // save the team sizes + const ::plssvm::detail::dim_type team_sizes = exec.block; + + for (const auto &[partial_grid, offsets] : exec.grids) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy<> team_policy{ static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + + Kokkos::parallel_for("w_kernel", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_w_linear(w_d.get(), alpha_d.get(), sv_d.get(), num_classes, num_sv, device_specific_num_sv, sv_offset, offsets.x, offsets.y, partial_grid.x)); + } + detail::device_synchronize(device); return w_d; } @@ -457,38 +494,55 @@ auto csvm::run_predict_kernel(const std::size_t device_id, const ::plssvm::detai device_ptr_type out_d{ shape{ num_predict_points, num_classes }, shape{ PADDING_SIZE, PADDING_SIZE }, device }; - // TODO: implement - // // convert execution range block to CUDA's native dim3 - // const dim3 native_block = detail::dim_type_to_native(exec.block); - // - // detail::set_device(device); - // for (const auto &[partial_grid, offsets] : exec.grids) { - // // convert execution range partial_grid to CUDA's native dim3 - // const dim3 native_partial_grid = detail::dim_type_to_native(partial_grid); - // - // switch (params.kernel_type) { - // case kernel_function_type::linear: - // detail::device_kernel_predict_linear<<>>(out_d.get(), sv_or_w_d.get(), rho_d.get(), predict_points_d.get(), num_classes, num_predict_points, num_features, offsets.x, offsets.y); - // break; - // case kernel_function_type::polynomial: - // detail::device_kernel_predict<<>>(out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, params.degree, std::get(params.gamma), params.coef0); - // break; - // case kernel_function_type::rbf: - // detail::device_kernel_predict<<>>(out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, std::get(params.gamma)); - // break; - // case kernel_function_type::sigmoid: - // detail::device_kernel_predict<<>>(out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, std::get(params.gamma), params.coef0); - // break; - // case kernel_function_type::laplacian: - // detail::device_kernel_predict<<>>(out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, std::get(params.gamma)); - // break; - // case kernel_function_type::chi_squared: - // detail::device_kernel_predict<<>>(out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, std::get(params.gamma)); - // break; - // } - // } - // detail::peek_at_last_error(); - detail::device_synchronize_all(); + const std::size_t scratch_memory_size = static_cast(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); + + // save the team sizes + const ::plssvm::detail::dim_type team_sizes = exec.block; + + for (const auto &[partial_grid, offsets] : exec.grids) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy<> team_policy{ static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + + switch (params.kernel_type) { + case kernel_function_type::linear: + { + using functor_type = detail::device_kernel_predict_linear; + Kokkos::parallel_for("predict_kernel_linear", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get(), sv_or_w_d.get(), rho_d.get(), predict_points_d.get(), num_classes, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x }); + } + break; + case kernel_function_type::polynomial: + { + using functor_type = detail::device_kernel_predict; + Kokkos::parallel_for("predict_kernel_polynomial", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, params.degree, std::get(params.gamma), params.coef0 }); + } + break; + case kernel_function_type::rbf: + { + using functor_type = detail::device_kernel_predict; + Kokkos::parallel_for("predict_kernel_rbf", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + case kernel_function_type::sigmoid: + { + using functor_type = detail::device_kernel_predict; + Kokkos::parallel_for("predict_kernel_sigmoid", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma), params.coef0 }); + } + break; + case kernel_function_type::laplacian: + { + using functor_type = detail::device_kernel_predict; + Kokkos::parallel_for("predict_kernel_laplacian", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + case kernel_function_type::chi_squared: + { + using functor_type = detail::device_kernel_predict; + Kokkos::parallel_for("predict_kernel_chi_squared", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + } + } + detail::device_synchronize(device); return out_d; } diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp index 9458bb899..4505c0515 100644 --- a/src/plssvm/backends/Kokkos/detail/utility.cpp +++ b/src/plssvm/backends/Kokkos/detail/utility.cpp @@ -126,8 +126,8 @@ std::string get_device_name(const execution_space space, const std::size_t devic return "unknown"; } -void device_synchronize_all() { - Kokkos::DefaultExecutionSpace::impl_static_fence("synchronize all"); +void device_synchronize(const Kokkos::DefaultExecutionSpace& exec) { + exec.fence(); } std::string get_kokkos_version() { From bc47002ab1a48993b6a16318759ef07dc1f09f05 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 21 Oct 2024 15:21:04 +0200 Subject: [PATCH 014/123] Add missing parameterized test suites. --- tests/main.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/main.cpp b/tests/main.cpp index 69570930a..d27eddd7d 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -13,18 +13,27 @@ // silence GTest warnings/test errors GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVM); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolver); GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunction); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolver); GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunction); GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionClassification); GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionClassification); + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMDeathTest); GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverDeathTest); GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionDeathTest); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionDeathTest); + GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVM); GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMKernelFunction); + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMDeathTest); + GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtr); GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrLayout); + GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrDeathTest); + GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Exception); int main(int argc, char **argv) { From dfbb0cc44db0117231056bc4d18d2aef0ffc7e58 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 21 Oct 2024 15:21:18 +0200 Subject: [PATCH 015/123] Add first Kokkos backend tests. --- tests/backends/CMakeLists.txt | 5 + tests/backends/Kokkos/CMakeLists.txt | 35 +++++ tests/backends/Kokkos/detail/device_ptr.cpp | 42 +++++ tests/backends/Kokkos/exceptions.cpp | 25 +++ tests/backends/Kokkos/kokkos_csvm.cpp | 162 ++++++++++++++++++++ tests/backends/Kokkos/mock_kokkos_csvm.hpp | 85 ++++++++++ tests/kokkos_main.cpp | 63 ++++++++ 7 files changed, 417 insertions(+) create mode 100644 tests/backends/Kokkos/CMakeLists.txt create mode 100644 tests/backends/Kokkos/detail/device_ptr.cpp create mode 100644 tests/backends/Kokkos/exceptions.cpp create mode 100644 tests/backends/Kokkos/kokkos_csvm.cpp create mode 100644 tests/backends/Kokkos/mock_kokkos_csvm.hpp create mode 100644 tests/kokkos_main.cpp diff --git a/tests/backends/CMakeLists.txt b/tests/backends/CMakeLists.txt index 805e8bc1b..6acf4f638 100644 --- a/tests/backends/CMakeLists.txt +++ b/tests/backends/CMakeLists.txt @@ -32,4 +32,9 @@ endif () # create SYCL tests if the SYCL backend is available if (TARGET ${PLSSVM_SYCL_BACKEND_LIBRARY_NAME}) add_subdirectory(SYCL) +endif () + +# create Kokkos tests if the Kokkos backend is available +if (TARGET ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME}) + add_subdirectory(Kokkos) endif () \ No newline at end of file diff --git a/tests/backends/Kokkos/CMakeLists.txt b/tests/backends/Kokkos/CMakeLists.txt new file mode 100644 index 000000000..1a4d3d089 --- /dev/null +++ b/tests/backends/Kokkos/CMakeLists.txt @@ -0,0 +1,35 @@ +## Authors: Alexander Van Craen, Marcel Breyer +## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved +## License: This file is part of the PLSSVM project which is released under the MIT license. +## See the LICENSE.md file in the project root for full license information. +######################################################################################################################## + +## create Kokkos tests +set(PLSSVM_KOKKOS_TEST_NAME Kokkos_tests) + +# list all necessary sources +set(PLSSVM_KOKKOS_TEST_SOURCES + ${CMAKE_CURRENT_LIST_DIR}/detail/device_ptr.cpp +# ${CMAKE_CURRENT_LIST_DIR}/detail/pinned_memory.cpp +# ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cu + ${CMAKE_CURRENT_LIST_DIR}/kokkos_csvm.cpp + ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp +) + +find_package(Kokkos REQUIRED) + +# add test executable +add_executable(${PLSSVM_KOKKOS_TEST_NAME} ${CMAKE_CURRENT_LIST_DIR}/../../kokkos_main.cpp ${PLSSVM_KOKKOS_TEST_SOURCES}) + +# link against test library +target_link_libraries(${PLSSVM_KOKKOS_TEST_NAME} PRIVATE ${PLSSVM_BASE_TEST_LIBRARY_NAME}) + +# add tests to google test +include(GoogleTest) +include(${PROJECT_SOURCE_DIR}/cmake/discover_tests_with_death_test_filter.cmake) +discover_tests_with_death_test_filter(${PLSSVM_KOKKOS_TEST_NAME}) + +# add test as coverage dependency +if (TARGET coverage) + add_dependencies(coverage ${PLSSVM_KOKKOS_TEST_NAME}) +endif () \ No newline at end of file diff --git a/tests/backends/Kokkos/detail/device_ptr.cpp b/tests/backends/Kokkos/detail/device_ptr.cpp new file mode 100644 index 000000000..797bfef78 --- /dev/null +++ b/tests/backends/Kokkos/detail/device_ptr.cpp @@ -0,0 +1,42 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Tests for the Kokkos backend device pointer. + */ + +#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr + +#include "tests/backends/generic_device_ptr_tests.hpp" // generic device pointer tests to instantiate +#include "tests/naming.hpp" // naming::test_parameter_to_name +#include "tests/types_to_test.hpp" // util::{combine_test_parameters_gtest_t, cartesian_type_product_t, layout_type_list} + +#include "gtest/gtest.h" // INSTANTIATE_TYPED_TEST_SUITE_P + +#include // std::tuple + +template +struct kokkos_device_ptr_test_type { + using device_ptr_type = plssvm::kokkos::detail::device_ptr; + using queue_type = Kokkos::DefaultExecutionSpace; + + static const queue_type &default_queue() { + static const queue_type queue{}; + return queue; + } +}; + +using kokkos_device_ptr_tuple = std::tuple, kokkos_device_ptr_test_type>; + +// the tests used in the instantiated GTest test suites +using kokkos_device_ptr_type_gtest = util::combine_test_parameters_gtest_t>; +using kokkos_device_ptr_layout_type_gtest = util::combine_test_parameters_gtest_t, util::layout_type_list>; + +// instantiate type-parameterized tests +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosDevicePtr, DevicePtr, kokkos_device_ptr_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosDevicePtr, DevicePtrLayout, kokkos_device_ptr_layout_type_gtest, naming::test_parameter_to_name); + +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosDevicePtrDeathTest, DevicePtrDeathTest, kokkos_device_ptr_type_gtest, naming::test_parameter_to_name); diff --git a/tests/backends/Kokkos/exceptions.cpp b/tests/backends/Kokkos/exceptions.cpp new file mode 100644 index 000000000..d78ac7801 --- /dev/null +++ b/tests/backends/Kokkos/exceptions.cpp @@ -0,0 +1,25 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Tests for the custom exception classes related to the Kokkos backend. + */ + +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception + +#include "tests/backends/generic_exceptions_tests.hpp" // generic exception tests to instantiate + +#include "gtest/gtest.h" // INSTANTIATE_TYPED_TEST_SUITE_P + +#include // std::string_view + +struct exception_test_type { + using exception_type = plssvm::kokkos::backend_exception; + constexpr static std::string_view name = "kokkos::backend_exception"; +}; + +// instantiate type-parameterized tests +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosExceptions, Exception, exception_test_type); diff --git a/tests/backends/Kokkos/kokkos_csvm.cpp b/tests/backends/Kokkos/kokkos_csvm.cpp new file mode 100644 index 000000000..e7af88d5b --- /dev/null +++ b/tests/backends/Kokkos/kokkos_csvm.cpp @@ -0,0 +1,162 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Tests for the functionality related to the Kokkos backend. + */ + +#include "plssvm/backends/Kokkos/csvm.hpp" // plssvm::kokkos::csvm +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/detail/type_list.hpp" // plssvm::detail::label_type_list +#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type +#include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/target_platforms.hpp" // plssvm::target_platform + +#include "tests/backends/Kokkos/mock_kokkos_csvm.hpp" +#include "tests/backends/generic_csvm_tests.hpp" // generic CSVM tests to instantiate +#include "tests/backends/generic_gpu_csvm_tests.hpp" // generic GPU CSVM tests to instantiate +#include "tests/custom_test_macros.hpp" // EXPECT_THROW_WHAT +#include "tests/naming.hpp" // naming::test_parameter_to_name +#include "tests/types_to_test.hpp" // util::{cartesian_type_product_t, combine_test_parameters_gtest_t} +#include "tests/utility.hpp" // util::redirect_output + +#include "gtest/gtest.h" // TEST_F, EXPECT_NO_THROW, INSTANTIATE_TYPED_TEST_SUITE_P, ::testing::Test + +#include // std::make_tuple, std::tuple + +class KokkosCSVM : public ::testing::Test, + private util::redirect_output<> { }; + +//// check whether the constructor correctly fails when using an incompatible target platform +//TEST_F(CUDACSVM, construct_parameter) { +//#if defined(PLSSVM_HAS_NVIDIA_TARGET) +// // the automatic target platform must always be available +// EXPECT_NO_THROW(plssvm::cuda::csvm{ plssvm::parameter{} }); +//#else +// EXPECT_THROW_WHAT(plssvm::cuda::csvm{ plssvm::parameter{} }, +// plssvm::cuda::backend_exception, +// "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); +//#endif +//} +// +//TEST_F(CUDACSVM, construct_target_and_parameter) { +// // create parameter struct +// const plssvm::parameter params{}; +// +//#if defined(PLSSVM_HAS_NVIDIA_TARGET) +// // only automatic or gpu_nvidia are allowed as target platform for the CUDA backend +// EXPECT_NO_THROW((plssvm::cuda::csvm{ plssvm::target_platform::automatic, params })); +// EXPECT_NO_THROW((plssvm::cuda::csvm{ plssvm::target_platform::gpu_nvidia, params })); +//#else +// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::automatic, params }), +// plssvm::cuda::backend_exception, +// "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); +// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::gpu_nvidia, params }), +// plssvm::cuda::backend_exception, +// "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); +//#endif +// +// // all other target platforms must throw +// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::cpu, params }), +// plssvm::cuda::backend_exception, +// "Invalid target platform 'cpu' for the CUDA backend!"); +// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::gpu_amd, params }), +// plssvm::cuda::backend_exception, +// "Invalid target platform 'gpu_amd' for the CUDA backend!"); +// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::gpu_intel, params }), +// plssvm::cuda::backend_exception, +// "Invalid target platform 'gpu_intel' for the CUDA backend!"); +//} +// +//TEST_F(CUDACSVM, construct_named_args) { +//#if defined(PLSSVM_HAS_NVIDIA_TARGET) +// // only automatic or gpu_nvidia are allowed as target platform for the CUDA backend +// EXPECT_NO_THROW((plssvm::cuda::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 })); +// EXPECT_NO_THROW((plssvm::cuda::csvm{ plssvm::cost = 2.0 })); +//#else +// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }), +// plssvm::cuda::backend_exception, +// "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); +//#endif +//} +// +//TEST_F(CUDACSVM, construct_target_and_named_args) { +//#if defined(PLSSVM_HAS_NVIDIA_TARGET) +// // only automatic or gpu_nvidia are allowed as target platform for the CUDA backend +// EXPECT_NO_THROW((plssvm::cuda::csvm{ plssvm::target_platform::automatic, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 })); +// EXPECT_NO_THROW((plssvm::cuda::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::cost = 2.0 })); +//#else +// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::automatic, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }), +// plssvm::cuda::backend_exception, +// "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); +// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::cost = 2.0 }), +// plssvm::cuda::backend_exception, +// "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); +//#endif +// +// // all other target platforms must throw +// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::cpu, plssvm::cost = 2.0 }), +// plssvm::cuda::backend_exception, +// "Invalid target platform 'cpu' for the CUDA backend!"); +// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::gpu_amd, plssvm::cost = 2.0 }), +// plssvm::cuda::backend_exception, +// "Invalid target platform 'gpu_amd' for the CUDA backend!"); +// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::gpu_intel, plssvm::cost = 2.0 }), +// plssvm::cuda::backend_exception, +// "Invalid target platform 'gpu_intel' for the CUDA backend!"); +//} + +template +struct kokkos_csvm_test_type { + using mock_csvm_type = mock_kokkos_csvm; + using csvm_type = plssvm::kokkos::csvm; + using device_ptr_type = typename csvm_type::device_ptr_type; + inline constexpr static auto additional_arguments = std::make_tuple(); +}; + +using kokkos_csvm_test_tuple = std::tuple>; +using kokkos_csvm_test_label_type_list = util::cartesian_type_product_t; +using kokkos_csvm_test_type_list = util::cartesian_type_product_t; + +// the tests used in the instantiated GTest test suites +using kokkos_csvm_test_type_gtest = util::combine_test_parameters_gtest_t; +using kokkos_solver_type_gtest = util::combine_test_parameters_gtest_t; +using kokkos_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; +using kokkos_solver_and_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; +using kokkos_label_type_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t; +using kokkos_label_type_solver_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t; + +// instantiate type-parameterized tests +// generic CSVM tests +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVM, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMKernelFunction, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolver, kokkos_solver_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunction, kokkos_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMKernelFunctionClassification, kokkos_label_type_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name); +//INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunctionClassification, kokkos_label_type_solver_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name); + +// generic CSVM DeathTests +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMDeathTest, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMSolverDeathTest, kokkos_solver_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMKernelFunctionDeathTest, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMSolverKernelFunctionDeathTest, kokkos_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name); + +// generic GPU CSVM tests - correct grid sizes +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericGPUCSVM, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericGPUCSVMKernelFunction, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); + +// generic GPU CSVM DeathTests - correct grid sizes +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericGPUCSVMDeathTest, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); + +using kokkos_mock_csvm_test_tuple = std::tuple>; +using kokkos_mock_csvm_test_type_list = util::cartesian_type_product_t; + +using kokkos_mock_csvm_test_type_gtest = util::combine_test_parameters_gtest_t; +using kokkos_mock_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; + +// generic GPU CSVM tests - mocked grid sizes +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMFakedGridSize, GenericGPUCSVM, kokkos_mock_csvm_test_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMFakedGridSize, GenericGPUCSVMKernelFunction, kokkos_mock_kernel_function_type_gtest, naming::test_parameter_to_name); diff --git a/tests/backends/Kokkos/mock_kokkos_csvm.hpp b/tests/backends/Kokkos/mock_kokkos_csvm.hpp new file mode 100644 index 000000000..6fb35cd9c --- /dev/null +++ b/tests/backends/Kokkos/mock_kokkos_csvm.hpp @@ -0,0 +1,85 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief MOCK class for the C-SVM class using the Kokkos backend. + */ + +#ifndef PLSSVM_TESTS_BACKENDS_KOKKOS_MOCK_KOKKOS_CSVM_HPP_ +#define PLSSVM_TESTS_BACKENDS_KOKKOS_MOCK_KOKKOS_CSVM_HPP_ +#pragma once + +#include "plssvm/backends/execution_range.hpp" // plssvm::detail::dim_type +#include "plssvm/backends/Kokkos/csvm.hpp" // plssvm::kokkos::csvm + +#include "gmock/gmock.h" // MOCK_METHOD, ON_CALL, ::testing::Return + +#include // std::size_t +#include // std::forward + +/** + * @brief GTest mock class for the Kokkos CSVM. + * @tparam mock_grid_size `true` if the `plssvm::kokkos::csvm::get_max_grid_size()` function should be mocked, otherwise `false` + */ +template +class mock_kokkos_csvm final : public plssvm::kokkos::csvm { + using base_type = plssvm::kokkos::csvm; + + public: + using base_type::device_ptr_type; + + template + explicit mock_kokkos_csvm(Args &&...args) : + base_type{ std::forward(args)... } { + this->fake_functions(); + } + + MOCK_METHOD((plssvm::detail::dim_type), get_max_grid_size, (const std::size_t), (const, override)); + + // make protected member functions public + using base_type::assemble_kernel_matrix; + using base_type::blas_level_3; + using base_type::get_device_memory; + using base_type::get_max_work_group_size; + using base_type::num_available_devices; + + using base_type::predict_values; + + using base_type::conjugate_gradients; + using base_type::perform_dimensional_reduction; + using base_type::run_assemble_kernel_matrix_implicit_blas_level_3; + using base_type::run_blas_level_3; + using base_type::solve_lssvm_system_of_linear_equations; + + using base_type::get_max_mem_alloc_size; + + using base_type::run_assemble_kernel_matrix_explicit; + using base_type::run_blas_level_3_kernel_explicit; + using base_type::run_inplace_matrix_addition; + using base_type::run_inplace_matrix_scale; + using base_type::run_predict_kernel; + using base_type::run_w_kernel; + + using base_type::data_distribution_; + using base_type::devices_; + + private: + /* + * @brief Fake the plssvm::kokkos::csvm::get_max_grid_size() function if requested. + */ + void fake_functions() const { + if constexpr (mock_grid_size) { + // mock the function using hardcoded maximum grid sizes + ON_CALL(*this, get_max_grid_size).WillByDefault(::testing::Return(plssvm::detail::dim_type{ std::size_t{ 4 }, std::size_t{ 4 }, std::size_t{ 4 } })); + } else { + // use the actual real implementation otherwise + ON_CALL(*this, get_max_grid_size).WillByDefault([this](const std::size_t device_id) { return base_type::get_max_grid_size(device_id); }); + } + } +}; + +#endif // PLSSVM_TESTS_BACKENDS_KOKKOS_MOCK_KOKKOS_CSVM_HPP_ diff --git a/tests/kokkos_main.cpp b/tests/kokkos_main.cpp new file mode 100644 index 000000000..e53409bd4 --- /dev/null +++ b/tests/kokkos_main.cpp @@ -0,0 +1,63 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Contains the googletest main function. Sets the DeathTest to "threadsafe" execution instead of "fast". + */ + +#include "gtest/gtest.h" // GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST, RUN_ALL_TESTS, ::testing::{InitGoogleTest, GTEST_FLAG} + +#include "Kokkos_Core.hpp" // TODO: + +// TODO: reduce copy-paste + +// silence GTest warnings/test errors +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVM); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunction); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolver); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunction); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionClassification); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionClassification); + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMDeathTest); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverDeathTest); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionDeathTest); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionDeathTest); + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVM); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMKernelFunction); + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMDeathTest); + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtr); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrLayout); + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrDeathTest); + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Exception); + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + + // initialize Kokkos + Kokkos::initialize(argc, argv); + + // prevent problems with fork() in the presence of multiple threads + // https://github.com/google/googletest/blob/main/docs/advanced.md#death-tests-and-threads + // NOTE: may reduce performance of the (death) tests +#if !defined(_WIN32) + ::testing::GTEST_FLAG(death_test_style) = "threadsafe"; +#endif + + // run all tests + const int return_code = RUN_ALL_TESTS(); + + // finalize Kokkos + Kokkos::finalize(); + + return return_code; +} From 5cba91dfb8543d939e55583b95bfff8e0135c402 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 22 Oct 2024 14:08:50 +0200 Subject: [PATCH 016/123] Update and refactor implementation. --- include/plssvm/backends/Kokkos/csvm.hpp | 34 ++-- .../Kokkos/detail/conditional_execution.hpp | 138 +++++++++++++++ .../Kokkos/detail/execution_space.hpp | 42 ----- .../plssvm/backends/Kokkos/detail/utility.hpp | 87 ++++------ .../backends/Kokkos/execution_space.hpp | 134 ++++++++++++++ src/plssvm/backends/Kokkos/CMakeLists.txt | 2 +- src/plssvm/backends/Kokkos/csvm.cpp | 164 +++++++++++------- .../Kokkos/detail/execution_space.cpp | 39 ----- src/plssvm/backends/Kokkos/detail/utility.cpp | 54 +----- .../backends/Kokkos/execution_space.cpp | 74 ++++++++ 10 files changed, 505 insertions(+), 263 deletions(-) create mode 100644 include/plssvm/backends/Kokkos/detail/conditional_execution.hpp delete mode 100644 include/plssvm/backends/Kokkos/detail/execution_space.hpp create mode 100644 include/plssvm/backends/Kokkos/execution_space.hpp delete mode 100644 src/plssvm/backends/Kokkos/detail/execution_space.cpp create mode 100644 src/plssvm/backends/Kokkos/execution_space.cpp diff --git a/include/plssvm/backends/Kokkos/csvm.hpp b/include/plssvm/backends/Kokkos/csvm.hpp index 206d85a81..859a9f43b 100644 --- a/include/plssvm/backends/Kokkos/csvm.hpp +++ b/include/plssvm/backends/Kokkos/csvm.hpp @@ -13,18 +13,19 @@ #define PLSSVM_BACKENDS_KOKKOS_CSVM_HPP_ #pragma once -#include "plssvm/backends/execution_range.hpp" // plssvm::detail::{dim_type, execution_range} -#include "plssvm/backends/gpu_csvm.hpp" // plssvm::detail::gpu_csvm -#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr -#include "plssvm/backends/Kokkos/detail/execution_space.hpp" // plssvm::kokkos::detail::execution_space -#include "plssvm/backends/Kokkos/detail/pinned_memory.hpp" // plssvm::kokkos::detail::pinned_memory -#include "plssvm/csvm.hpp" // plssvm::detail::csvm_backend_exists -#include "plssvm/detail/memory_size.hpp" // plssvm::detail::memory_size -#include "plssvm/detail/type_traits.hpp" // PLSSVM_REQUIRES -#include "plssvm/parameter.hpp" // plssvm::parameter, plssvm::detail::parameter -#include "plssvm/target_platforms.hpp" // plssvm::target_platform - -#include "Kokkos_Core.hpp" // TODO: +#include "plssvm/backends/execution_range.hpp" // plssvm::detail::{dim_type, execution_range} +#include "plssvm/backends/gpu_csvm.hpp" // plssvm::detail::gpu_csvm +#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr +#include "plssvm/backends/Kokkos/detail/pinned_memory.hpp" // plssvm::kokkos::detail::pinned_memory +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/csvm.hpp" // plssvm::detail::csvm_backend_exists +#include "plssvm/detail/memory_size.hpp" // plssvm::detail::memory_size +#include "plssvm/detail/type_traits.hpp" // PLSSVM_REQUIRES +#include "plssvm/parameter.hpp" // plssvm::parameter, plssvm::detail::parameter +#include "plssvm/target_platforms.hpp" // plssvm::target_platform + +#include "Kokkos_Core_fwd.hpp" // Kokkos::DefaultExecutionSpace #include // std::size_t #include // std::true_type @@ -37,6 +38,7 @@ namespace kokkos { /** * @brief A C-SVM implementation using Kokkos as backend. + * @details Internally, we always only use the `Kokkos::DefaultExecutionSpace`. */ class csvm : public ::plssvm::detail::gpu_csvm { protected: @@ -117,6 +119,12 @@ class csvm : public ::plssvm::detail::gpu_csvm // std::invoke + +namespace plssvm::kokkos::detail { + +/** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA` macro if `KOKKOS_ENABLE_CUDA` is defined, i.e., the Kokkos CUDA ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_CUDA` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. + */ +#if defined(KOKKOS_ENABLE_CUDA) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA(func) return std::invoke(func) +#else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::cuda) } +#endif + +/** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP` macro if `KOKKOS_ENABLE_HIP` is defined, i.e., the Kokkos HIP ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_HIP` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. + */ +#if defined(KOKKOS_ENABLE_HIP) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP(func) return std::invoke(func) +#else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::hip) } +#endif + +/** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL` macro if `KOKKOS_ENABLE_SYCL` is defined, i.e., the Kokkos SYCL ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_SYCL` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. + */ +#if defined(KOKKOS_ENABLE_SYCL) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL(func) return std::invoke(func) +#else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::sycl) } +#endif + +/** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX` macro if `KOKKOS_ENABLE_HPX` is defined, i.e., the Kokkos HPX ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_HPX` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. + */ +#if defined(KOKKOS_ENABLE_HPX) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX(func) return std::invoke(func) +#else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::hpx) } +#endif + +/** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP` macro if `KOKKOS_ENABLE_OPENMP` is defined, i.e., the Kokkos OpenMP ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_OPENMP` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. + */ +#if defined(KOKKOS_ENABLE_OPENMP) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP(func) return std::invoke(func) +#else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openmp) } +#endif + +/** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET` macro if `KOKKOS_ENABLE_OPENMPTARGET` is defined, i.e., the Kokkos OpenMP target offloading ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_OPENMPTARGET` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. + */ +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET(func) return std::invoke(func) +#else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openmp_target) } +#endif + +/** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC` macro if `KOKKOS_ENABLE_OPENACC` is defined, i.e., the Kokkos OpenACC ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_OPENACC` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. + */ +#if defined(KOKKOS_ENABLE_OPENACC) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC(func) return std::invoke(func) +#else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openacc) } +#endif + +/** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS` macro if `KOKKOS_ENABLE_THREADS` is defined, i.e., the Kokkos std::thread ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_THREADS` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. + */ +#if defined(KOKKOS_ENABLE_THREADS) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS(func) return std::invoke(func) +#else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::threads) } +#endif + +/** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL` macro if `KOKKOS_ENABLE_SERIAL` is defined, i.e., the Kokkos serial ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_SERIAL` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. + * @note This ExecutionSpace *should* always be available! + */ +#if defined(KOKKOS_ENABLE_SERIAL) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL(func) return std::invoke(func) +#else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::serial) } +#endif + +} // namespace plssvm::kokkos::detail + +#endif // PLSSVM_BACKENDS_KOKKOS_DETAIL_CONDITIONAL_EXECUTION_HPP_ diff --git a/include/plssvm/backends/Kokkos/detail/execution_space.hpp b/include/plssvm/backends/Kokkos/detail/execution_space.hpp deleted file mode 100644 index 8e89975c3..000000000 --- a/include/plssvm/backends/Kokkos/detail/execution_space.hpp +++ /dev/null @@ -1,42 +0,0 @@ -/** - * @file - * @author Alexander Van Craen - * @author Marcel Breyer - * @copyright 2018-today The PLSSVM project - All Rights Reserved - * @license This file is part of the PLSSVM project which is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - * - * @brief Execution space enumeration for the ExecutionSpaces in Kokkos. - */ - -#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_EXECUTION_SPACE_HPP_ -#define PLSSVM_BACKENDS_KOKKOS_DETAIL_EXECUTION_SPACE_HPP_ -#pragma once - -#include "fmt/base.h" // fmt::formatter -#include "fmt/ostream.h" // fmt::ostream_formatter - -#include // std::ostream forward declaration - -namespace plssvm::kokkos::detail { - -enum class execution_space { - cuda, - hip, - sycl, - hpx, - openmp, - openmp_target, - openacc, - threads, - serial -}; - -std::ostream &operator<<(std::ostream &out, execution_space space); - -} // namespace plssvm::kokkos::detail - -template <> -struct fmt::formatter : fmt::ostream_formatter { }; - -#endif // PLSSVM_BACKENDS_KOKKOS_DETAIL_EXECUTION_SPACE_HPP_ diff --git a/include/plssvm/backends/Kokkos/detail/utility.hpp b/include/plssvm/backends/Kokkos/detail/utility.hpp index 523900aa9..b7e732aff 100644 --- a/include/plssvm/backends/Kokkos/detail/utility.hpp +++ b/include/plssvm/backends/Kokkos/detail/utility.hpp @@ -13,8 +13,9 @@ #define PLSSVM_BACKENDS_KOKKOS_DETAIL_UTILITY_HPP_ #pragma once -#include "plssvm/backends/Kokkos/detail/execution_space.hpp" // plssvm::kokkos::detail::execution_space -#include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_* +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/target_platforms.hpp" // plssvm::target_platform #include "Kokkos_Core.hpp" // TODO: ? @@ -24,64 +25,40 @@ namespace plssvm::kokkos::detail { -template -[[nodiscard]] execution_space determine_execution_space() noexcept { - // determine the execution_space enumeration value based on the provided Kokkos execution space -#if defined(KOKKOS_ENABLE_CUDA) - if constexpr (std::is_same_v) { - return execution_space::cuda; - } -#endif -#if defined(KOKKOS_ENABLE_HIP) - if constexpr (std::is_same_v) { - return execution_space::hip; - } -#endif -#if defined(KOKKOS_ENABLE_SYCL) - if constexpr (std::is_same_v) { - return execution_space::sycl; - } -#endif -#if defined(KOKKOS_ENABLE_HPX) - if constexpr (std::is_same_v) { - return execution_space::hpx; - } -#endif -#if defined(KOKKOS_ENABLE_OPENMP) - if constexpr (std::is_same_v) { - return execution_space::openmp; - } -#endif -#if defined(KOKKOS_ENABLE_OPENMPTARGET) - if constexpr (std::is_same_v) { - return execution_space::openmp_target; - } -#endif -#if defined(KOKKOS_ENABLE_OPENACC) - if constexpr (std::is_same_v) { - return execution_space::openacc; - } -#endif -#if defined(KOKKOS_ENABLE_THREADS) - if constexpr (std::is_same_v) { - return execution_space::threads; - } -#endif -#if defined(KOKKOS_ENABLE_SERIAL) - if constexpr (std::is_same_v) { - return execution_space::serial; - } -#endif -} - [[nodiscard]] target_platform determine_default_target_platform_from_execution_space(execution_space space); void check_execution_space_target_platform_combination(execution_space space, target_platform target); -[[nodiscard]] std::string get_device_name(execution_space space, std::size_t device_id); - -void device_synchronize(const Kokkos::DefaultExecutionSpace& exec); +template +[[nodiscard]] inline std::string get_device_name(const execution_space space, [[maybe_unused]] const ExecSpace &exec) { + // TODO: implement for other backends! + switch (space) { + case execution_space::cuda: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { + return std::string{ exec.cuda_device_prop().name }; + }); + case execution_space::hip: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { + return std::string{ exec.hip_device_prop().name }; + }); + case execution_space::sycl: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { + return exec.sycl_queue.get_device().get_info(); + }); + case execution_space::openmp: + case execution_space::hpx: + case execution_space::threads: + case execution_space::serial: + return "CPU host device"; + case execution_space::openmp_target: + return "OpenMP target device"; + case execution_space::openacc: + return "OpenACC target device"; + } + return "unknown"; +} +void device_synchronize(const Kokkos::DefaultExecutionSpace &exec); [[nodiscard]] std::string get_kokkos_version(); diff --git a/include/plssvm/backends/Kokkos/execution_space.hpp b/include/plssvm/backends/Kokkos/execution_space.hpp new file mode 100644 index 000000000..adde9892f --- /dev/null +++ b/include/plssvm/backends/Kokkos/execution_space.hpp @@ -0,0 +1,134 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Execution space enumeration for the ExecutionSpaces in Kokkos. + */ + +#ifndef PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_HPP_ +#pragma once + +#include "plssvm/detail/utility.hpp" // plssvm::unreachable + +#include "Kokkos_Core.hpp" // Kokkos macros, Kokkos ExecutionSpace types + +#include "fmt/base.h" // fmt::formatter +#include "fmt/ostream.h" // fmt::ostream_formatter + +#include // std::ostream forward declaration +#include // std::is_same_v + +namespace plssvm::kokkos { + +/** + * @brief Enum class for all execution spaces supported by [Kokkos](https://github.com/kokkos/kokkos). + */ +enum class execution_space { + /** Execution space representing execution on a CUDA device. */ + cuda, + /** Execution space representing execution on a device supported by HIP. */ + hip, + /** Execution space representing execution on a device supported by SYCL. */ + sycl, + /** Execution space representing execution with the HPX runtime system. */ + hpx, + /** Execution space representing execution with the OpenMP runtime system. */ + openmp, + /** Execution space representing execution using the target offloading feature of the OpenMP runtime system. */ + openmp_target, + /** Execution space representing execution with the OpenACC runtime system. */ + openacc, + /** Execution space representing parallel execution with std::threads. */ + threads, + /** Execution space representing serial execution on the CPU. Always available. */ + serial +}; + +/** + * @brief Create an `execution_space` from the provided Kokkos @p ExecSpace. + * @tparam ExecSpace the type of the provided Kokkos ExecutionSpace + * @return the enum value representing the provided Kokkos ExecutionSpace (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline execution_space determine_execution_space() noexcept { + // determine the execution_space enumeration value based on the provided Kokkos execution space +#if defined(KOKKOS_ENABLE_CUDA) + if constexpr (std::is_same_v) { + return execution_space::cuda; + } +#endif +#if defined(KOKKOS_ENABLE_HIP) + if constexpr (std::is_same_v) { + return execution_space::hip; + } +#endif +#if defined(KOKKOS_ENABLE_SYCL) + if constexpr (std::is_same_v) { + return execution_space::sycl; + } +#endif +#if defined(KOKKOS_ENABLE_HPX) + if constexpr (std::is_same_v) { + return execution_space::hpx; + } +#endif +#if defined(KOKKOS_ENABLE_OPENMP) + if constexpr (std::is_same_v) { + return execution_space::openmp; + } +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + if constexpr (std::is_same_v) { + return execution_space::openmp_target; + } +#endif +#if defined(KOKKOS_ENABLE_OPENACC) + if constexpr (std::is_same_v) { + return execution_space::openacc; + } +#endif +#if defined(KOKKOS_ENABLE_THREADS) + if constexpr (std::is_same_v) { + return execution_space::threads; + } +#endif +#if defined(KOKKOS_ENABLE_SERIAL) + if constexpr (std::is_same_v) { + return execution_space::serial; + } +#endif + // at least one execution space must always be available! + ::plssvm::detail::unreachable(); +} + +/** + * @brief Output the execution @p space to the given output-stream @p out. + * @param[in,out] out the output-stream to write the execution space to + * @param[in] space the Kokkos execution space + * @return the output-stream + */ +std::ostream &operator<<(std::ostream &out, execution_space space); + +/** + * @brief Use the input-stream @p in to initialize the execution @p space. + * @param[in,out] in input-stream to extract the execution space from + * @param[in] space the Kokkos execution space + * @return the input-stream + */ +std::istream &operator>>(std::istream &in, execution_space &space); + +} // namespace plssvm::kokkos + +/// @endcond + +template <> +struct fmt::formatter : fmt::ostream_formatter { }; + +/// @endcond + +#endif // PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_HPP_ diff --git a/src/plssvm/backends/Kokkos/CMakeLists.txt b/src/plssvm/backends/Kokkos/CMakeLists.txt index d7d1037ce..89cf282ce 100644 --- a/src/plssvm/backends/Kokkos/CMakeLists.txt +++ b/src/plssvm/backends/Kokkos/CMakeLists.txt @@ -23,11 +23,11 @@ message(CHECK_PASS "found") # explicitly set sources set(PLSSVM_KOKKOS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/detail/device_ptr.cpp - ${CMAKE_CURRENT_LIST_DIR}/detail/execution_space.cpp ${CMAKE_CURRENT_LIST_DIR}/detail/pinned_memory.cpp ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp ${CMAKE_CURRENT_LIST_DIR}/csvm.cpp ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp + ${CMAKE_CURRENT_LIST_DIR}/execution_space.cpp ) # set target properties diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index f113c7e2c..114c8738a 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -9,27 +9,29 @@ #include "plssvm/backends/Kokkos/csvm.hpp" #include "plssvm/backends/execution_range.hpp" // plssvm::detail::{execution_range, dim_type} +#include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_* #include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr -#include "plssvm/backends/Kokkos/detail/execution_space.hpp" // plssvm::kokkos::detail::execution_space #include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::kokkos::detail::get_runtime_version #include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp" // plssvm::kokkos::detail::{device_kernel_symm, device_kernel_symm_mirror, device_kernel_inplace_matrix_add, device_kernel_inplace_matrix_scale} #include "plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp" // plssvm::kokkos::detail::device_kernel_assembly #include "plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp" // plssvm::kokkos::detail::device_kernel_assembly_symm #include "plssvm/backends/Kokkos/kernel/predict_kernel.hpp" // plssvm::kokkos::detail::{device_kernel_w_linear, device_kernel_predict_linear, device_kernel_predict} #include "plssvm/constants.hpp" // plssvm::THREAD_BLOCK_SIZE, plssvm::INTERNAL_BLOCK_SIZE, plssvm::FEATURE_BLOCK_SIZE +#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT #include "plssvm/detail/data_distribution.hpp" // plssvm::detail::triangular_data_distribution #include "plssvm/detail/logging.hpp" // plssvm::detail::log #include "plssvm/detail/memory_size.hpp" // plssvm::detail::memory_size #include "plssvm/detail/tracking/performance_tracker.hpp" // plssvm::detail::tracking::tracking_entry -#include "plssvm/detail/utility.hpp" // plssvm::detail::unreachable // TODO: remove +#include "plssvm/detail/utility.hpp" // plssvm::detail::{get_system_memory, unreachable} #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type #include "plssvm/parameter.hpp" // plssvm::parameter #include "plssvm/target_platforms.hpp" // plssvm::target_platform #include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level -#include "Kokkos_Core.hpp" // TODO: +#include "Kokkos_Core.hpp" // TODO: docu #include "fmt/core.h" // fmt::format #include "fmt/format.h" // fmt::format @@ -46,7 +48,8 @@ csvm::csvm(parameter params) : csvm{ plssvm::target_platform::automatic, params } { } csvm::csvm(target_platform target, parameter params) : - base_type{ params } { + base_type{ params }, + space_{ determine_execution_space() } { this->init(target); } @@ -77,11 +80,6 @@ void csvm::init(const target_platform target) { break; } - // TODO: document: we ALWAYS use the default execution space - - // set the execution space -> we always only use the Kokkos::DefaultExecutionSpace - space_ = detail::determine_execution_space(); - plssvm::detail::log(verbosity_level::full, "\nUsing Kokkos ({}) as backend with the Kokkos::DefaultExecutionSpace \"{}\".\n", plssvm::detail::tracking::tracking_entry{ "dependencies", "kokkos_version", detail::get_kokkos_version() }, @@ -129,7 +127,7 @@ void csvm::init(const target_platform target) { std::vector device_names{}; device_names.reserve(devices_.size()); for (typename std::vector::size_type device = 0; device < devices_.size(); ++device) { - const std::string device_name = detail::get_device_name(space_, device); + const std::string device_name = detail::get_device_name(space_, devices_[device]); plssvm::detail::log(verbosity_level::full, " [{}, {}]\n", device, @@ -154,95 +152,127 @@ csvm::~csvm() { } std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const { - // TODO: implement for other execution spaces, guard behind ifdef + // TODO: implement for other execution spaces std::vector<::plssvm::detail::memory_size> res(this->num_available_devices()); switch (space_) { - case detail::execution_space::cuda: - { - cudaDeviceProp prop{}; + case execution_space::cuda: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { - cudaGetDeviceProperties(&prop, devices_[device_id].cuda_device()); - res[device_id] = ::plssvm::detail::memory_size{ static_cast(prop.totalGlobalMem) }; + res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].cuda_device_prop().totalGlobalMem) }; } - } - break; - case detail::execution_space::hip: - case detail::execution_space::sycl: - case detail::execution_space::openmp_target: - case detail::execution_space::openacc: - case detail::execution_space::openmp: - case detail::execution_space::hpx: - case detail::execution_space::threads: - case detail::execution_space::serial: + return res; + }); + case execution_space::hip: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { + for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { + res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].hip_device_prop().totalGlobalMem) }; + } + return res; + }); + case execution_space::sycl: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { + for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { + res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].sycl_queue().get_device().get_info<::sycl::info::device::global_mem_size>()) }; + } + return res; + }); + case execution_space::openmp: + case execution_space::hpx: + case execution_space::threads: + case execution_space::serial: + return std::vector<::plssvm::detail::memory_size>(this->num_available_devices(), ::plssvm::detail::get_system_memory()); + case execution_space::openmp_target: + case execution_space::openacc: throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; } - - return res; + // all possible cases should be handled by the previous switch + // -> silence missing return statement compiler warnings due to throw statement + ::plssvm::detail::unreachable(); } std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const { - // TODO: implement for other execution spaces, guard behind ifdef + // TODO: implement for other execution spaces switch (space_) { - case detail::execution_space::cuda: + case execution_space::cuda: + case execution_space::hip: + return this->get_device_memory(); + case execution_space::sycl: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { + for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { + res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].sycl_queue().get_device().get_info<::sycl::info::device::max_mem_alloc_size>()) }; + } + return res; + }); + case execution_space::openmp: + case execution_space::hpx: + case execution_space::threads: + case execution_space::serial: return this->get_device_memory(); - case detail::execution_space::hip: - case detail::execution_space::sycl: - case detail::execution_space::openmp_target: - case detail::execution_space::openacc: - case detail::execution_space::openmp: - case detail::execution_space::hpx: - case detail::execution_space::threads: - case detail::execution_space::serial: + case execution_space::openmp_target: + case execution_space::openacc: throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; } + // all possible cases should be handled by the previous switch + // -> silence missing return statement compiler warnings due to throw statement ::plssvm::detail::unreachable(); } std::size_t csvm::get_max_work_group_size(const std::size_t device_id) const { PLSSVM_ASSERT(device_id < this->num_available_devices(), "Invalid device {} requested!", device_id); - // TODO: implement for other execution spaces, guard behind ifdef + // TODO: implement for other execution spaces switch (space_) { - case detail::execution_space::cuda: - { - cudaDeviceProp prop{}; - cudaGetDeviceProperties(&prop, devices_[device_id].cuda_device()); - return static_cast(prop.maxThreadsPerBlock); - } - case detail::execution_space::hip: - case detail::execution_space::sycl: - case detail::execution_space::openmp_target: - case detail::execution_space::openacc: - case detail::execution_space::openmp: - case detail::execution_space::hpx: - case detail::execution_space::threads: - case detail::execution_space::serial: + case execution_space::cuda: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { + return static_cast(devices_[device_id].cuda_device_prop().maxThreadsPerBlock); + }); + case execution_space::hip: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { + return static_cast(devices_[device_id].hip_device_prop().maxThreadsPerBlock); + }); + case execution_space::sycl: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { + return devices_[device_id].sycl_queue().get_device().get_info<::sycl::info::device::max_work_group_size>(); + }); + case execution_space::openmp_target: + case execution_space::openacc: + case execution_space::openmp: + case execution_space::hpx: + case execution_space::threads: + case execution_space::serial: throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; } + // all possible cases should be handled by the previous switch + // -> silence missing return statement compiler warnings due to throw statement ::plssvm::detail::unreachable(); } ::plssvm::detail::dim_type csvm::get_max_grid_size(const std::size_t device_id) const { PLSSVM_ASSERT(device_id < this->num_available_devices(), "Invalid device {} requested!", device_id); - // TODO: implement for other execution spaces, guard behind ifdef + // TODO: implement for other execution spaces switch (space_) { - case detail::execution_space::cuda: - { - cudaDeviceProp prop{}; - cudaGetDeviceProperties(&prop, devices_[device_id].cuda_device()); + case execution_space::cuda: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA(([&]() -> ::plssvm::detail::dim_type { + const cudaDeviceProp &prop = devices_[device_id].cuda_device_prop(); + return { static_cast(prop.maxGridSize[0]), static_cast(prop.maxGridSize[1]), static_cast(prop.maxGridSize[2]) }; + })); + case execution_space::hip: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP(([&]() -> ::plssvm::detail::dim_type { + const hipDeviceProp &prop = devices_[device_id].hip_device_prop(); return { static_cast(prop.maxGridSize[0]), static_cast(prop.maxGridSize[1]), static_cast(prop.maxGridSize[2]) }; - } - case detail::execution_space::hip: - case detail::execution_space::sycl: - case detail::execution_space::openmp_target: - case detail::execution_space::openacc: - case detail::execution_space::openmp: - case detail::execution_space::hpx: - case detail::execution_space::threads: - case detail::execution_space::serial: + })); + case execution_space::sycl: + case execution_space::openmp_target: + case execution_space::openacc: + case execution_space::openmp: + case execution_space::hpx: + case execution_space::threads: + case execution_space::serial: throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; } + // all possible cases should be handled by the previous switch + // -> silence missing return statement compiler warnings due to throw statement ::plssvm::detail::unreachable(); } diff --git a/src/plssvm/backends/Kokkos/detail/execution_space.cpp b/src/plssvm/backends/Kokkos/detail/execution_space.cpp deleted file mode 100644 index 65afa72b1..000000000 --- a/src/plssvm/backends/Kokkos/detail/execution_space.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/** - * @author Alexander Van Craen - * @author Marcel Breyer - * @copyright 2018-today The PLSSVM project - All Rights Reserved - * @license This file is part of the PLSSVM project which is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - */ - -#include "plssvm/backends/Kokkos/detail/execution_space.hpp" - -#include // std::ostream - -namespace plssvm::kokkos::detail { - -std::ostream &operator<<(std::ostream &out, const execution_space space) { - switch (space) { - case execution_space::cuda: - return out << "Cuda"; - case execution_space::hip: - return out << "HIP"; - case execution_space::sycl: - return out << "SYCL"; - case execution_space::hpx: - return out << "HPX"; - case execution_space::openmp: - return out << "OpenMP"; - case execution_space::openmp_target: - return out << "OpenMPTarget"; - case execution_space::openacc: - return out << "OpenACC"; - case execution_space::threads: - return out << "Threads"; - case execution_space::serial: - return out << "Serial"; - } - return out << "unknown"; -} - -} // namespace plssvm::kokkos::detail diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp index 4505c0515..ac53ffc48 100644 --- a/src/plssvm/backends/Kokkos/detail/utility.cpp +++ b/src/plssvm/backends/Kokkos/detail/utility.cpp @@ -8,11 +8,11 @@ #include "plssvm/backends/Kokkos/detail/utility.hpp" -#include "plssvm/backends/Kokkos/detail/execution_space.hpp" // plssvm::kokkos::detail::execution_space -#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception -#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT -#include "plssvm/detail/utility.hpp" // plssvm::detail::unreachable -#include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT +#include "plssvm/detail/utility.hpp" // plssvm::detail::unreachable +#include "plssvm/target_platforms.hpp" // plssvm::target_platform #include "Kokkos_Macros.hpp" @@ -86,47 +86,7 @@ void check_execution_space_target_platform_combination(const execution_space spa // TODO: error checks? -std::string get_device_name(const execution_space space, const std::size_t device_id) { - // TODO: implement for other backends! - switch (space) { - case execution_space::cuda: -#if defined(KOKKOS_ENABLE_CUDA) - { - cudaDeviceProp prop{}; - cudaGetDeviceProperties(&prop, static_cast(device_id)); - return std::string{ prop.name }; - } -#else - throw backend_exception{ fmt::format("Unsupported Kokkos execution space \"{}\"!", space) }; -#endif - case execution_space::hip: -#if defined(KOKKOS_ENABLE_HIP) - { - hipDeviceProp_t prop{}; - hipGetDeviceProperties(&prop, static_cast(device_id)); - return std::string{ prop.name }; - } -#else - throw backend_exception{ fmt::format("Unsupported Kokkos execution space \"{}\"!", space) }; -#endif - case execution_space::openmp: -#if defined(KOKKOS_ENABLE_HIP) - return "CPU host device"; -#else - throw backend_exception{ fmt::format("Unsupported Kokkos execution space \"{}\"!", space) }; -#endif - case execution_space::sycl: - case execution_space::hpx: - case execution_space::openmp_target: - case execution_space::openacc: - case execution_space::threads: - case execution_space::serial: - throw backend_exception{ fmt::format("Unsupported Kokkos execution space \"{}\"!", space) }; - } - return "unknown"; -} - -void device_synchronize(const Kokkos::DefaultExecutionSpace& exec) { +void device_synchronize(const Kokkos::DefaultExecutionSpace &exec) { exec.fence(); } @@ -135,4 +95,6 @@ std::string get_kokkos_version() { return fmt::format("{}.{}.{}", KOKKOS_VERSION_MAJOR, KOKKOS_VERSION_MINOR, KOKKOS_VERSION_PATCH); } +// TODO: https://godbolt.org/z/eMYrbxsTj + } // namespace plssvm::kokkos::detail diff --git a/src/plssvm/backends/Kokkos/execution_space.cpp b/src/plssvm/backends/Kokkos/execution_space.cpp new file mode 100644 index 000000000..5453e11d8 --- /dev/null +++ b/src/plssvm/backends/Kokkos/execution_space.cpp @@ -0,0 +1,74 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/backends/Kokkos/execution_space.hpp" + +#include "plssvm/detail/string_utility.hpp" // plssvm::detail::to_lower_case +#include "plssvm/detail/utility.hpp" // plssvm::detail::contains + +#include // std::ios::failbit +#include // std::istream +#include // std::ostream +#include // std::string + +namespace plssvm::kokkos { + +std::ostream &operator<<(std::ostream &out, const execution_space space) { + switch (space) { + case execution_space::cuda: + return out << "Cuda"; + case execution_space::hip: + return out << "HIP"; + case execution_space::sycl: + return out << "SYCL"; + case execution_space::hpx: + return out << "HPX"; + case execution_space::openmp: + return out << "OpenMP"; + case execution_space::openmp_target: + return out << "OpenMPTarget"; + case execution_space::openacc: + return out << "OpenACC"; + case execution_space::threads: + return out << "Threads"; + case execution_space::serial: + return out << "Serial"; + } + return out << "unknown"; +} + +std::istream &operator>>(std::istream &in, execution_space &space) { + std::string str{}; + in >> str; + ::plssvm::detail::to_lower_case(str); + + if (str == "cuda") { + space = execution_space::cuda; + } else if (str == "hip") { + space = execution_space::hip; + } else if (str == "sycl") { + space = execution_space::sycl; + } else if (str == "hpx") { + space = execution_space::hpx; + } else if (str == "openmp") { + space = execution_space::openmp; + } else if (str == "openmp_target") { + space = execution_space::openmp_target; + } else if (str == "openacc") { + space = execution_space::openacc; + } else if (str == "threads") { + space = execution_space::threads; + } else if (str == "serial") { + space = execution_space::serial; + } else { + in.setstate(std::ios::failbit); + } + return in; +} + +} // namespace plssvm::kokkos From 90ff2728f584795e8a3fd72798bcb931a14d457d Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 22 Oct 2024 15:34:44 +0200 Subject: [PATCH 017/123] Create get_device_list function and add missing documentation. --- .../plssvm/backends/Kokkos/detail/utility.hpp | 70 +++++------ src/plssvm/backends/Kokkos/csvm.cpp | 15 +-- src/plssvm/backends/Kokkos/detail/utility.cpp | 109 +++++++++++++++--- 3 files changed, 130 insertions(+), 64 deletions(-) diff --git a/include/plssvm/backends/Kokkos/detail/utility.hpp b/include/plssvm/backends/Kokkos/detail/utility.hpp index b7e732aff..e29468830 100644 --- a/include/plssvm/backends/Kokkos/detail/utility.hpp +++ b/include/plssvm/backends/Kokkos/detail/utility.hpp @@ -13,53 +13,57 @@ #define PLSSVM_BACKENDS_KOKKOS_DETAIL_UTILITY_HPP_ #pragma once -#include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_* #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "Kokkos_Core.hpp" // TODO: ? +#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace -#include // std::size_t -#include // std::string -#include // std::is_same_v +#include // std::string +#include // std::vector namespace plssvm::kokkos::detail { +/** + * @brief Given the execution @p space, determine the respective default target platform. + * @param[in] space the Kokkos::ExecutionSpace for which the default target platform should be determined + * @return the default target platform (`[[nodiscard]]`) + */ [[nodiscard]] target_platform determine_default_target_platform_from_execution_space(execution_space space); +/** + * @brief Check whether the execution @p space supports the @p target platform. Throws an `plssvm::kokkos::backend_exception` if that's not the case. + * @param[in] space the Kokkos::ExecutionSpace to investigate + * @param[in] target the target platform to check + * @throws plssvm::kokkos::backend_exception if @p space doesn't support the @p target platform + */ void check_execution_space_target_platform_combination(execution_space space, target_platform target); -template -[[nodiscard]] inline std::string get_device_name(const execution_space space, [[maybe_unused]] const ExecSpace &exec) { - // TODO: implement for other backends! - switch (space) { - case execution_space::cuda: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { - return std::string{ exec.cuda_device_prop().name }; - }); - case execution_space::hip: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { - return std::string{ exec.hip_device_prop().name }; - }); - case execution_space::sycl: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { - return exec.sycl_queue.get_device().get_info(); - }); - case execution_space::openmp: - case execution_space::hpx: - case execution_space::threads: - case execution_space::serial: - return "CPU host device"; - case execution_space::openmp_target: - return "OpenMP target device"; - case execution_space::openacc: - return "OpenACC target device"; - } - return "unknown"; -} +/** + * @brief Get a list of all available devices in the execution @p space that are supported by the @p target platform. + * @param[in] space the Kokkos::ExecutionSpace to retrieve the devices from + * @param[in] target the target platform that must be supported + * @return all devices for the @p target in the Kokkos::ExecutionSpace @p space (`[[nodiscard]]`) + */ +[[nodiscard]] std::vector get_device_list(execution_space space, target_platform target); +/** + * @brief Get the name of the device represented by the Kokkos::ExecutionSpace @p exec in the execution @p space. + * @param[in] space the Kokkos::ExecutionSpace + * @param[in] exec the device + * @return the device name (`[[nodiscard]]`) + */ +[[nodiscard]] std::string get_device_name(execution_space space, const Kokkos::DefaultExecutionSpace &exec); + +/** + * @brief Wait for all kernel and/or other operations on the Kokkos::ExecutionSpace @p exec to finish + * @param[in] exec the Kokkos::ExecutionSpace to synchronize + */ void device_synchronize(const Kokkos::DefaultExecutionSpace &exec); +/** + * @brief Get the used Kokkos library version. + * @return the library version (`[[nodiscard]]`) + */ [[nodiscard]] std::string get_kokkos_version(); } // namespace plssvm::kokkos::detail diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index 114c8738a..f2a40050c 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -100,20 +100,9 @@ void csvm::init(const target_platform target) { } // get all available devices wrt the requested target platform -// TODO: HOW CAN ONE USE MULTIPLE KOKKOS DEVICES -// TODO: implement for other Kokkos execution spaces -#if defined(KOKKOS_ENABLE_CUDA) - for (int device = 0; device < Kokkos::num_devices(); ++device) { - // create CUDA stream using the CUDA specific functions - cudaSetDevice(device); - cudaStream_t stream{}; - cudaStreamCreate(&stream); - // create Kokkos execution space for the specific device - devices_.emplace_back(Kokkos::Cuda(stream, true)); - } -#endif + devices_ = detail::get_device_list(space_, target_); - // throw exception if no CUDA devices could be found + // throw exception if no devices in the current execution space could be found if (devices_.empty()) { throw backend_exception{ fmt::format("Not devices found for the Kokkos execution space {} with the target platform {}!", space_, target_) }; } diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp index ac53ffc48..b7d58be1d 100644 --- a/src/plssvm/backends/Kokkos/detail/utility.cpp +++ b/src/plssvm/backends/Kokkos/detail/utility.cpp @@ -8,25 +8,20 @@ #include "plssvm/backends/Kokkos/detail/utility.hpp" -#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception -#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space -#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT -#include "plssvm/detail/utility.hpp" // plssvm::detail::unreachable -#include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_* +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT +#include "plssvm/detail/utility.hpp" // plssvm::detail::unreachable +#include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "Kokkos_Macros.hpp" - -#if defined(KOKKOS_ENABLE_CUDA) - #include "cuda_runtime.h" // cudaDeviceProp, cudaGetDeviceProperties -#endif -#if defined(KOKKOS_ENABLE_HIP) - #include "hip/hip_runtime_api.h" // HIP runtime functions -#endif +#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace, Kokkos::num_devices, Kokkos::Cuda, Kokkos::Hip, Kokkos::Sycl, Kokkos::Impl::ManageStream +#include "Kokkos_Macros.hpp" // Kokkos macros #include "fmt/core.h" // fmt::format -#include // std::size_t -#include // std::string +#include // std::string +#include // std::vector namespace plssvm::kokkos::detail { @@ -46,6 +41,8 @@ target_platform determine_default_target_platform_from_execution_space(const exe case execution_space::serial: return target_platform::cpu; } + // all possible cases should be handled by the previous switch + // -> silence missing return statement compiler warnings due to throw statement ::plssvm::detail::unreachable(); } @@ -84,7 +81,85 @@ void check_execution_space_target_platform_combination(const execution_space spa } } -// TODO: error checks? +std::vector get_device_list(const execution_space space, [[maybe_unused]] const target_platform target) { + std::vector devices{}; + switch (space) { + case execution_space::cuda: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { + for (int device = 0; device < Kokkos::num_devices(); ++device) { + // create CUDA stream using the CUDA specific functions + cudaSetDevice(device); + cudaStream_t stream{}; + cudaStreamCreate(&stream); + // create Kokkos execution space for the specific device + // Note: it is important to pass the cudaStream_t lifetime to be managed by Kokkos + devices.emplace_back(Kokkos::Cuda(stream, Kokkos::Impl::ManageStream::yes)); + } + return devices; + }); + case execution_space::hip: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { + for (int device = 0; device < Kokkos::num_devices(); ++device) { + // HIP CUDA stream using the HIP specific functions + hipSetDevice(device); + hipStream_t stream{}; + hipStreamCreate(&stream); + // create Kokkos execution space for the specific device + // Note: it is important to pass the hipStream_t lifetime to be managed by Kokkos + devices.emplace_back(Kokkos::Hip(stream, Kokkos::Impl::ManageStream::yes)); + } + return devices; + }); + case execution_space::sycl: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { + // TODO: use all available devices -> not that trivial + // TODO: handle target + devices.emplace_back(Kokkos::SYCL{}); + return devices; + }); + case execution_space::openmp: + case execution_space::hpx: + case execution_space::threads: + case execution_space::serial: + devices.emplace_back(Kokkos::DefaultExecutionSpace{}); + return devices; + case execution_space::openmp_target: + case execution_space::openacc: + // TODO: implement + throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space) }; + } + // all possible cases should be handled by the previous switch + // -> silence missing return statement compiler warnings due to throw statement + ::plssvm::detail::unreachable(); +} + +std::string get_device_name(const execution_space space, [[maybe_unused]] const Kokkos::DefaultExecutionSpace &exec) { + // TODO: implement for other backends! + switch (space) { + case execution_space::cuda: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { + return std::string{ exec.cuda_device_prop().name }; + }); + case execution_space::hip: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { + return std::string{ exec.hip_device_prop().name }; + }); + case execution_space::sycl: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { + return exec.sycl_queue.get_device().get_info(); + }); + case execution_space::openmp: + case execution_space::hpx: + case execution_space::threads: + case execution_space::serial: + return "CPU host device"; + case execution_space::openmp_target: + return "OpenMP target device"; + case execution_space::openacc: + return "OpenACC target device"; + } + return "unknown"; +} void device_synchronize(const Kokkos::DefaultExecutionSpace &exec) { exec.fence(); @@ -95,6 +170,4 @@ std::string get_kokkos_version() { return fmt::format("{}.{}.{}", KOKKOS_VERSION_MAJOR, KOKKOS_VERSION_MINOR, KOKKOS_VERSION_PATCH); } -// TODO: https://godbolt.org/z/eMYrbxsTj - } // namespace plssvm::kokkos::detail From 3f4bf8def6f704ff96c0fe127ca2de5dd9c64598 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 22 Oct 2024 16:17:38 +0200 Subject: [PATCH 018/123] Move Kokkos::View typedefs to custom header. --- .../backends/Kokkos/detail/device_ptr.hpp | 22 ++++-------- .../backends/Kokkos/detail/typedefs.hpp | 36 +++++++++++++++++++ .../Kokkos/kernel/cg_explicit/blas.hpp | 12 +++---- .../cg_explicit/kernel_matrix_assembly.hpp | 4 +-- .../kernel_matrix_assembly_blas.hpp | 6 ++-- .../backends/Kokkos/kernel/predict_kernel.hpp | 6 ++-- .../backends/Kokkos/detail/device_ptr.cpp | 26 +++++++------- 7 files changed, 71 insertions(+), 41 deletions(-) create mode 100644 include/plssvm/backends/Kokkos/detail/typedefs.hpp diff --git a/include/plssvm/backends/Kokkos/detail/device_ptr.hpp b/include/plssvm/backends/Kokkos/detail/device_ptr.hpp index 8f587b667..98194bcfb 100644 --- a/include/plssvm/backends/Kokkos/detail/device_ptr.hpp +++ b/include/plssvm/backends/Kokkos/detail/device_ptr.hpp @@ -13,24 +13,16 @@ #define PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_PTR_HPP_ #pragma once -#include "plssvm/backends/gpu_device_ptr.hpp" // plssvm::detail::gpu_device_ptr -#include "plssvm/shape.hpp" // plssvm::shape +#include "plssvm/backends/gpu_device_ptr.hpp" // plssvm::detail::gpu_device_ptr +#include "plssvm/backends/Kokkos/detail/typedefs.hpp" // plssvm::kokkos::detail::device_view_type +#include "plssvm/shape.hpp" // plssvm::shape -#include "Kokkos_Core.hpp" // TODO: Kokkos::DefaultExecutionSpace +#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace #include // std::size_t namespace plssvm::kokkos::detail { -template -using device_view_type = Kokkos::View; - -template -using device_subview_type = Kokkos::Subview; - -template -using host_view_type = Kokkos::View; - /** * @brief Small wrapper class around a Kokkos view together with commonly used device functions. * @tparam T the type of the kernel view to wrap @@ -70,20 +62,20 @@ class device_ptr : public ::plssvm::detail::gpu_device_ptr +using device_view_type = Kokkos::View; + +/** + * @brief Typedef for a simple Kokkos::View always targeting the Kokkos::HostSpace. + * @tparam T the type of the view's data + */ +template +using host_view_type = Kokkos::View; + +} // namespace plssvm::kokkos::detail + +#endif // PLSSVM_BACKENDS_KOKKOS_DETAIL_TYPEDEFS_HPP_ diff --git a/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp index c12220b0b..85997c118 100644 --- a/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp +++ b/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp @@ -13,10 +13,12 @@ #define PLSSVM_BACKENDS_KOKKOS_CG_EXPLICIT_BLAS_HPP_ #pragma once -#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // TODO: view type aliases -#include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} +#include "plssvm/backends/Kokkos/detail/typedefs.hpp" // plssvm::kokkos::detail::device_view_type +#include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} -#include "Kokkos_Core.hpp" // TODO: +#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents + +#include // std::size_t namespace plssvm::kokkos::detail { @@ -317,9 +319,7 @@ class device_kernel_inplace_matrix_add { const auto global_i = i + static_cast(internal_i); const auto global_j = j + static_cast(internal_j); - // if (global_i < lhs_.extent(0) && global_j < rhs_.extent(0)) { // TODO: lhs_[global_i * (num_cols_ + PADDING_SIZE_sz) + global_j] += rhs_[global_i * (num_cols_ + PADDING_SIZE_sz) + global_j]; - // } } } } @@ -378,9 +378,7 @@ class device_kernel_inplace_matrix_scale { const auto global_i = i + static_cast(internal_i); const auto global_j = j + static_cast(internal_j); - // if (global_i < lhs_.extent(0)) { // TODO: lhs_[global_i * (num_cols_ + PADDING_SIZE_sz) + global_j] *= scale_; - // } } } } diff --git a/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp index ad9397377..550dbfe0e 100644 --- a/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp +++ b/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp @@ -13,13 +13,13 @@ #define PLSSVM_BACKENDS_KOKKOS_CG_EXPLICIT_KERNEL_MATRIX_ASSEMBLY_HPP_ #pragma once -#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // TODO: view type aliases #include "plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp" // plssvm::kokkos::detail::standard_layout_tuple +#include "plssvm/backends/Kokkos/detail/typedefs.hpp" // plssvm::kokkos::detail::device_view_type #include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp" // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function} #include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "Kokkos_Core.hpp" // TODO: +#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents #include // std::size_t diff --git a/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp index 2f0f6619c..cf73cadb4 100644 --- a/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp +++ b/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp @@ -13,13 +13,15 @@ #define PLSSVM_BACKENDS_KOKKOS_CG_IMPLICIT_KERNEL_MATRIX_ASSEMBLY_BLAS_HPP_ #pragma once -#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // TODO: view type aliases #include "plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp" // plssvm::kokkos::detail::standard_layout_tuple +#include "plssvm/backends/Kokkos/detail/typedefs.hpp" // plssvm::kokkos::detail::device_view_type #include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp" // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function} #include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "Kokkos_Core.hpp" // TODO: Kokkos::atomic_add +#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents, Kokkos::atomic_add + +#include // std::size_t namespace plssvm::kokkos::detail { diff --git a/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp b/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp index 629a0901f..c6a302d6d 100644 --- a/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp +++ b/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp @@ -13,12 +13,14 @@ #define PLSSVM_BACKENDS_KOKKOS_PREDICT_KERNEL_HPP_ #pragma once -#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // TODO: view type aliases +#include "plssvm/backends/Kokkos/detail/typedefs.hpp" // plssvm::kokkos::detail::device_view_type #include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp" // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function} #include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "Kokkos_Core.hpp" // TODO: Kokkos::atomic_add +#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents, Kokkos::atomic_add + +#include // std::size_t namespace plssvm::kokkos::detail { diff --git a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp index b176c1283..cbf973ca4 100644 --- a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp @@ -8,36 +8,36 @@ #include "plssvm/backends/Kokkos/detail/device_ptr.hpp" -#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::detail::device_synchronize -#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception -#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT -#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception -#include "plssvm/shape.hpp" // plssvm::shape +#include "plssvm/backends/Kokkos/detail/typedefs.hpp" // plssvm::kokkos::detail::{device_view_type, host_view_type} +#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::detail::device_synchronize +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT +#include "plssvm/shape.hpp" // plssvm::shape -#include "Kokkos_Core.hpp" +#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace, Kokkos::subview, Kokkos::parallel_for, KOKKOS_LAMBDA, Kokkos::deep_copy #include "fmt/core.h" // fmt::format +#include // std::min #include // std::size_t -#include // std::terminate -#include // std::cout, std::endl +#include // std::memcpy +#include // std::make_pair #include // std::vector namespace plssvm::kokkos::detail { template -device_ptr::device_ptr(const size_type size, const Kokkos::DefaultExecutionSpace exec) : +device_ptr::device_ptr(const size_type size, const Kokkos::DefaultExecutionSpace &exec) : device_ptr{ plssvm::shape{ size, 1 }, plssvm::shape{ 0, 0 }, exec } { } template -device_ptr::device_ptr(const plssvm::shape shape, const Kokkos::DefaultExecutionSpace exec) : +device_ptr::device_ptr(const plssvm::shape shape, const Kokkos::DefaultExecutionSpace &exec) : device_ptr{ shape, plssvm::shape{ 0, 0 }, exec } { } template -device_ptr::device_ptr(const plssvm::shape shape, const plssvm::shape padding, const Kokkos::DefaultExecutionSpace exec) : +device_ptr::device_ptr(const plssvm::shape shape, const plssvm::shape padding, const Kokkos::DefaultExecutionSpace &exec) : base_type{ shape, padding, exec } { - // TODO: GUARD behind ifdef! - data_ = device_view_type{ fmt::format("device_{}_view", exec.cuda_device()), this->size_padded() }; + data_ = device_view_type{ "device_ptr_view", this->size_padded() }; } template From 33ecfe9e0edc3393d5f8c0e61ddbde732f53fa97 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 22 Oct 2024 16:56:25 +0200 Subject: [PATCH 019/123] Add additional execution_space query function and tests. --- .../backends/Kokkos/execution_space.hpp | 72 ++---------- src/plssvm/backends/Kokkos/csvm.cpp | 2 +- .../backends/Kokkos/execution_space.cpp | 106 ++++++++++++++++-- tests/backends/Kokkos/CMakeLists.txt | 1 + tests/backends/Kokkos/detail/device_ptr.cpp | 2 + tests/backends/Kokkos/execution_space.cpp | 79 +++++++++++++ 6 files changed, 194 insertions(+), 68 deletions(-) create mode 100644 tests/backends/Kokkos/execution_space.cpp diff --git a/include/plssvm/backends/Kokkos/execution_space.hpp b/include/plssvm/backends/Kokkos/execution_space.hpp index adde9892f..6d9d84e3f 100644 --- a/include/plssvm/backends/Kokkos/execution_space.hpp +++ b/include/plssvm/backends/Kokkos/execution_space.hpp @@ -13,15 +13,11 @@ #define PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_HPP_ #pragma once -#include "plssvm/detail/utility.hpp" // plssvm::unreachable - -#include "Kokkos_Core.hpp" // Kokkos macros, Kokkos ExecutionSpace types - #include "fmt/base.h" // fmt::formatter #include "fmt/ostream.h" // fmt::ostream_formatter -#include // std::ostream forward declaration -#include // std::is_same_v +#include // std::ostream forward declaration +#include // std::vector namespace plssvm::kokkos { @@ -50,61 +46,17 @@ enum class execution_space { }; /** - * @brief Create an `execution_space` from the provided Kokkos @p ExecSpace. - * @tparam ExecSpace the type of the provided Kokkos ExecutionSpace - * @return the enum value representing the provided Kokkos ExecutionSpace (`[[nodiscard]]`) + * @brief Create an `execution_space` from the current `Kokkos::DefaultExecutionSpace`. + * @return the enum value representing the current `Kokkos::DefaultExecutionSpace` (`[[nodiscard]]`) + */ +[[nodiscard]] execution_space determine_execution_space() noexcept; + +/** + * @brief List all available Kokkos::ExecutionSpaces. + * @details At least one execution space must **always** be available! + * @return a vector containing all available execution spaces (`[[nodiscard]]`) */ -template -[[nodiscard]] inline execution_space determine_execution_space() noexcept { - // determine the execution_space enumeration value based on the provided Kokkos execution space -#if defined(KOKKOS_ENABLE_CUDA) - if constexpr (std::is_same_v) { - return execution_space::cuda; - } -#endif -#if defined(KOKKOS_ENABLE_HIP) - if constexpr (std::is_same_v) { - return execution_space::hip; - } -#endif -#if defined(KOKKOS_ENABLE_SYCL) - if constexpr (std::is_same_v) { - return execution_space::sycl; - } -#endif -#if defined(KOKKOS_ENABLE_HPX) - if constexpr (std::is_same_v) { - return execution_space::hpx; - } -#endif -#if defined(KOKKOS_ENABLE_OPENMP) - if constexpr (std::is_same_v) { - return execution_space::openmp; - } -#endif -#if defined(KOKKOS_ENABLE_OPENMPTARGET) - if constexpr (std::is_same_v) { - return execution_space::openmp_target; - } -#endif -#if defined(KOKKOS_ENABLE_OPENACC) - if constexpr (std::is_same_v) { - return execution_space::openacc; - } -#endif -#if defined(KOKKOS_ENABLE_THREADS) - if constexpr (std::is_same_v) { - return execution_space::threads; - } -#endif -#if defined(KOKKOS_ENABLE_SERIAL) - if constexpr (std::is_same_v) { - return execution_space::serial; - } -#endif - // at least one execution space must always be available! - ::plssvm::detail::unreachable(); -} +[[nodiscard]] std::vector available_execution_spaces(); /** * @brief Output the execution @p space to the given output-stream @p out. diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index f2a40050c..7d24b2a8b 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -49,7 +49,7 @@ csvm::csvm(parameter params) : csvm::csvm(target_platform target, parameter params) : base_type{ params }, - space_{ determine_execution_space() } { + space_{ determine_execution_space() } { this->init(target); } diff --git a/src/plssvm/backends/Kokkos/execution_space.cpp b/src/plssvm/backends/Kokkos/execution_space.cpp index 5453e11d8..06a4c351e 100644 --- a/src/plssvm/backends/Kokkos/execution_space.cpp +++ b/src/plssvm/backends/Kokkos/execution_space.cpp @@ -8,13 +8,18 @@ #include "plssvm/backends/Kokkos/execution_space.hpp" +#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT #include "plssvm/detail/string_utility.hpp" // plssvm::detail::to_lower_case -#include "plssvm/detail/utility.hpp" // plssvm::detail::contains +#include "plssvm/detail/utility.hpp" // plssvm::detail::unreachable -#include // std::ios::failbit -#include // std::istream -#include // std::ostream -#include // std::string +#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace, Kokkos macros, Kokkos ExecutionSpace types + +#include // std::ios::failbit +#include // std::istream +#include // std::ostream +#include // std::string +#include // std::is_same_v +#include // std::vector namespace plssvm::kokkos { @@ -57,11 +62,11 @@ std::istream &operator>>(std::istream &in, execution_space &space) { space = execution_space::hpx; } else if (str == "openmp") { space = execution_space::openmp; - } else if (str == "openmp_target") { + } else if (str == "openmp_target" || str == "openmptarget") { space = execution_space::openmp_target; } else if (str == "openacc") { space = execution_space::openacc; - } else if (str == "threads") { + } else if (str == "threads" || str == "std::threads") { space = execution_space::threads; } else if (str == "serial") { space = execution_space::serial; @@ -71,4 +76,91 @@ std::istream &operator>>(std::istream &in, execution_space &space) { return in; } +execution_space determine_execution_space() noexcept { + // determine the execution_space enumeration value based on the provided Kokkos execution space +#if defined(KOKKOS_ENABLE_CUDA) + if constexpr (std::is_same_v) { + return execution_space::cuda; + } +#endif +#if defined(KOKKOS_ENABLE_HIP) + if constexpr (std::is_same_v) { + return execution_space::hip; + } +#endif +#if defined(KOKKOS_ENABLE_SYCL) + if constexpr (std::is_same_v) { + return execution_space::sycl; + } +#endif +#if defined(KOKKOS_ENABLE_HPX) + if constexpr (std::is_same_v) { + return execution_space::hpx; + } +#endif +#if defined(KOKKOS_ENABLE_OPENMP) + if constexpr (std::is_same_v) { + return execution_space::openmp; + } +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + if constexpr (std::is_same_v) { + return execution_space::openmp_target; + } +#endif +#if defined(KOKKOS_ENABLE_OPENACC) + if constexpr (std::is_same_v) { + return execution_space::openacc; + } +#endif +#if defined(KOKKOS_ENABLE_THREADS) + if constexpr (std::is_same_v) { + return execution_space::threads; + } +#endif +#if defined(KOKKOS_ENABLE_SERIAL) + if constexpr (std::is_same_v) { + return execution_space::serial; + } +#endif + // at least one execution space must always be available! + ::plssvm::detail::unreachable(); +} + +[[nodiscard]] std::vector available_execution_spaces() { + std::vector available_spaces{}; +#if defined(KOKKOS_ENABLE_CUDA) + available_spaces.push_back(execution_space::cuda); +#endif +#if defined(KOKKOS_ENABLE_HIP) + available_spaces.push_back(execution_space::hip); +#endif +#if defined(KOKKOS_ENABLE_SYCL) + available_spaces.push_back(execution_space::sycl); +#endif +#if defined(KOKKOS_ENABLE_HPX) + available_spaces.push_back(execution_space::hpx); +#endif +#if defined(KOKKOS_ENABLE_OPENMP) + available_spaces.push_back(execution_space::openmp); +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + available_spaces.push_back(execution_space::openmp_target); +#endif +#if defined(KOKKOS_ENABLE_OPENACC) + available_spaces.push_back(execution_space::openacc); +#endif +#if defined(KOKKOS_ENABLE_THREADS) + available_spaces.push_back(execution_space::threads); +#endif +#if defined(KOKKOS_ENABLE_SERIAL) + available_spaces.push_back(execution_space::serial); +#endif + + // AT LEAST ONE execution space must ALWAYS be available + PLSSVM_ASSERT(!available_spaces.empty(), "Aat least one execution space must always be available!"); + + return available_spaces; +} + } // namespace plssvm::kokkos diff --git a/tests/backends/Kokkos/CMakeLists.txt b/tests/backends/Kokkos/CMakeLists.txt index 1a4d3d089..7032ec2d8 100644 --- a/tests/backends/Kokkos/CMakeLists.txt +++ b/tests/backends/Kokkos/CMakeLists.txt @@ -14,6 +14,7 @@ set(PLSSVM_KOKKOS_TEST_SOURCES # ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cu ${CMAKE_CURRENT_LIST_DIR}/kokkos_csvm.cpp ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp + ${CMAKE_CURRENT_LIST_DIR}/execution_space.cpp ) find_package(Kokkos REQUIRED) diff --git a/tests/backends/Kokkos/detail/device_ptr.cpp b/tests/backends/Kokkos/detail/device_ptr.cpp index 797bfef78..34b83eefa 100644 --- a/tests/backends/Kokkos/detail/device_ptr.cpp +++ b/tests/backends/Kokkos/detail/device_ptr.cpp @@ -10,6 +10,8 @@ #include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr +#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace + #include "tests/backends/generic_device_ptr_tests.hpp" // generic device pointer tests to instantiate #include "tests/naming.hpp" // naming::test_parameter_to_name #include "tests/types_to_test.hpp" // util::{combine_test_parameters_gtest_t, cartesian_type_product_t, layout_type_list} diff --git a/tests/backends/Kokkos/execution_space.cpp b/tests/backends/Kokkos/execution_space.cpp new file mode 100644 index 000000000..5639f3802 --- /dev/null +++ b/tests/backends/Kokkos/execution_space.cpp @@ -0,0 +1,79 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Tests for functions related to the different Kokkos execution spaces. + */ + +#include "plssvm/backends/Kokkos/execution_space.hpp" + +#include "tests/custom_test_macros.hpp" // EXPECT_CONVERSION_TO_STRING, EXPECT_CONVERSION_FROM_STRING + +#include "gtest/gtest-matchers.h" // EXPECT_THAT; ::testing::AnyOf +#include "gtest/gtest.h" // TEST, EXPECT_TRUE + +#include // std::istringstream + +// check whether the plssvm::kokkos::execution_space -> std::string conversions are correct +TEST(KokkosExecutionSpace, to_string) { + // check conversions to std::string + EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::cuda, "Cuda"); + EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::hip, "HIP"); + EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::sycl, "SYCL"); + EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::hpx, "HPX"); + EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::openmp, "OpenMP"); + EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::openmp_target, "OpenMPTarget"); + EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::openacc, "OpenACC"); + EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::threads, "Threads"); + EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::serial, "Serial"); +} + +TEST(KokkosExecutionSpace, to_string_unknown) { + // check conversions to std::string from unknown execution_space + EXPECT_CONVERSION_TO_STRING(static_cast(9), "unknown"); +} + +// check whether the std::string -> plssvm::kokkos::execution_space conversions are correct +TEST(KokkosExecutionSpace, from_string) { + // check conversion from std::string + EXPECT_CONVERSION_FROM_STRING("Cuda", plssvm::kokkos::execution_space::cuda); + EXPECT_CONVERSION_FROM_STRING("CUDA", plssvm::kokkos::execution_space::cuda); + EXPECT_CONVERSION_FROM_STRING("Hip", plssvm::kokkos::execution_space::hip); + EXPECT_CONVERSION_FROM_STRING("HIP", plssvm::kokkos::execution_space::hip); + EXPECT_CONVERSION_FROM_STRING("Sycl", plssvm::kokkos::execution_space::sycl); + EXPECT_CONVERSION_FROM_STRING("SYCL", plssvm::kokkos::execution_space::sycl); + EXPECT_CONVERSION_FROM_STRING("Hpx", plssvm::kokkos::execution_space::hpx); + EXPECT_CONVERSION_FROM_STRING("HPX", plssvm::kokkos::execution_space::hpx); + EXPECT_CONVERSION_FROM_STRING("OpenMP", plssvm::kokkos::execution_space::openmp); + EXPECT_CONVERSION_FROM_STRING("OPENMP", plssvm::kokkos::execution_space::openmp); + EXPECT_CONVERSION_FROM_STRING("OpenMP_Target", plssvm::kokkos::execution_space::openmp_target); + EXPECT_CONVERSION_FROM_STRING("OPENMPTARGET", plssvm::kokkos::execution_space::openmp_target); + EXPECT_CONVERSION_FROM_STRING("OpenACC", plssvm::kokkos::execution_space::openacc); + EXPECT_CONVERSION_FROM_STRING("OPENACC", plssvm::kokkos::execution_space::openacc); + EXPECT_CONVERSION_FROM_STRING("threads", plssvm::kokkos::execution_space::threads); + EXPECT_CONVERSION_FROM_STRING("THREADS", plssvm::kokkos::execution_space::threads); + EXPECT_CONVERSION_FROM_STRING("std::threads", plssvm::kokkos::execution_space::threads); + EXPECT_CONVERSION_FROM_STRING("Serial", plssvm::kokkos::execution_space::serial); + EXPECT_CONVERSION_FROM_STRING("SERIAL", plssvm::kokkos::execution_space::serial); +} + +TEST(KokkosExecutionSpace, from_string_unknown) { + // foo isn't a valid execution_space + std::istringstream input{ "foo" }; + plssvm::kokkos::execution_space space{}; + input >> space; + EXPECT_TRUE(input.fail()); +} + +TEST(KokkosExecutionSpace, determine_execution_space) { + // check that "unreachable" is never reached + EXPECT_THAT(plssvm::kokkos::determine_execution_space(), ::testing::AnyOf(plssvm::kokkos::execution_space::cuda, plssvm::kokkos::execution_space::hip, plssvm::kokkos::execution_space::sycl, plssvm::kokkos::execution_space::hpx, plssvm::kokkos::execution_space::openmp, plssvm::kokkos::execution_space::openmp_target, plssvm::kokkos::execution_space::openacc, plssvm::kokkos::execution_space::threads, plssvm::kokkos::execution_space::serial)); +} + +TEST(KokkosExecutionSpace, available_execution_spaces) { + // at least one execution space must always be available + EXPECT_FALSE(plssvm::kokkos::available_execution_spaces().empty()); +} From 4ed8baced441b762d554192ce87ff557531a6154 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 22 Oct 2024 17:06:54 +0200 Subject: [PATCH 020/123] Add typedef tests. --- tests/backends/Kokkos/CMakeLists.txt | 1 + tests/backends/Kokkos/detail/typedefs.cpp | 27 +++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 tests/backends/Kokkos/detail/typedefs.cpp diff --git a/tests/backends/Kokkos/CMakeLists.txt b/tests/backends/Kokkos/CMakeLists.txt index 7032ec2d8..b8401c933 100644 --- a/tests/backends/Kokkos/CMakeLists.txt +++ b/tests/backends/Kokkos/CMakeLists.txt @@ -10,6 +10,7 @@ set(PLSSVM_KOKKOS_TEST_NAME Kokkos_tests) # list all necessary sources set(PLSSVM_KOKKOS_TEST_SOURCES ${CMAKE_CURRENT_LIST_DIR}/detail/device_ptr.cpp + ${CMAKE_CURRENT_LIST_DIR}/detail/typedefs.cpp # ${CMAKE_CURRENT_LIST_DIR}/detail/pinned_memory.cpp # ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cu ${CMAKE_CURRENT_LIST_DIR}/kokkos_csvm.cpp diff --git a/tests/backends/Kokkos/detail/typedefs.cpp b/tests/backends/Kokkos/detail/typedefs.cpp new file mode 100644 index 000000000..4e25d4a6c --- /dev/null +++ b/tests/backends/Kokkos/detail/typedefs.cpp @@ -0,0 +1,27 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Tests for the Kokkos::View typedefs. + */ + +#include "plssvm/backends/Kokkos/detail/typedefs.hpp" // plssvm::kokkos::detail::{device_view_type, host_view_type} + +#include "Kokkos_Core.hpp" // Kokkos::View, Kokkos::DefaultExecutionSpace, Kokkos::HostSpace, Kokkos::MemoryUnmanaged + +#include "gtest/gtest.h" // TEST, ::testing::StaticAssertTypeEq + +TEST(KokkosTypedefs, device_view_type) { + // test device view typedefs + ::testing::StaticAssertTypeEq, plssvm::kokkos::detail::device_view_type>(); + ::testing::StaticAssertTypeEq, plssvm::kokkos::detail::device_view_type>(); +} + +TEST(KokkosTypedefs, host_view_type) { + // test host view typedefs + ::testing::StaticAssertTypeEq, plssvm::kokkos::detail::host_view_type>(); + ::testing::StaticAssertTypeEq, plssvm::kokkos::detail::host_view_type>(); +} From ee9705c83da8d16d0ded6821fb49011f957e7ada Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 22 Oct 2024 17:17:19 +0200 Subject: [PATCH 021/123] Add standard layout tuple tests. --- .../Kokkos/detail/standard_layout_tuple.hpp | 2 ++ tests/backends/Kokkos/CMakeLists.txt | 1 + .../Kokkos/detail/standard_layout_tuple.cpp | 33 +++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 tests/backends/Kokkos/detail/standard_layout_tuple.cpp diff --git a/include/plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp b/include/plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp index 3f5fddddd..5b26f5e98 100644 --- a/include/plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp +++ b/include/plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp @@ -15,6 +15,8 @@ #include "plssvm/constants.hpp" // plssvm::real_type +#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION + #include // std::size_t #include // std::is_standard_layout #include // std::forward diff --git a/tests/backends/Kokkos/CMakeLists.txt b/tests/backends/Kokkos/CMakeLists.txt index b8401c933..c17034811 100644 --- a/tests/backends/Kokkos/CMakeLists.txt +++ b/tests/backends/Kokkos/CMakeLists.txt @@ -11,6 +11,7 @@ set(PLSSVM_KOKKOS_TEST_NAME Kokkos_tests) set(PLSSVM_KOKKOS_TEST_SOURCES ${CMAKE_CURRENT_LIST_DIR}/detail/device_ptr.cpp ${CMAKE_CURRENT_LIST_DIR}/detail/typedefs.cpp + ${CMAKE_CURRENT_LIST_DIR}/detail/standard_layout_tuple.cpp # ${CMAKE_CURRENT_LIST_DIR}/detail/pinned_memory.cpp # ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cu ${CMAKE_CURRENT_LIST_DIR}/kokkos_csvm.cpp diff --git a/tests/backends/Kokkos/detail/standard_layout_tuple.cpp b/tests/backends/Kokkos/detail/standard_layout_tuple.cpp new file mode 100644 index 000000000..7b4fb6cd8 --- /dev/null +++ b/tests/backends/Kokkos/detail/standard_layout_tuple.cpp @@ -0,0 +1,33 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Tests for the custom standard layout tuple implementation necessary for Kokkos. + */ + +#include "plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp" // plssvm::kokkos::detail::{standard_layout_tuple, make_standard_layout_tuple, get} + +#include "gtest/gtest.h" // TEST, EXPECT_EQ, testing::StaticAssertTypeEq + +#include // std::remove_const_t + +TEST(KokkosStandardLayoutTuple, make_standard_layout_tuple) { + // create a new standard layout tuple + [[maybe_unused]] const auto tuple = plssvm::kokkos::detail::make_standard_layout_tuple(true, 42, 3.1415); + + // check the tuple type + ::testing::StaticAssertTypeEq, std::remove_const_t>(); +} + +TEST(KokkosStandardLayoutTuple, get) { + // create a new standard layout tuple + const auto tuple = plssvm::kokkos::detail::make_standard_layout_tuple(true, 42, 3.1415); + + // check getter functions + EXPECT_EQ(plssvm::kokkos::detail::get<0>(tuple), true); + EXPECT_EQ(plssvm::kokkos::detail::get<1>(tuple), 42); + EXPECT_EQ(plssvm::kokkos::detail::get<2>(tuple), 3.1415); +} From 8a90949058de87b85b385cbfb74bdb36cd3d31a8 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 22 Oct 2024 17:23:11 +0200 Subject: [PATCH 022/123] Add stub tests. --- .../backends/Kokkos/detail/pinned_memory.cpp | 2 + tests/backends/Kokkos/CMakeLists.txt | 4 +- .../backends/Kokkos/detail/pinned_memory.cpp | 39 +++++++++++++++++++ tests/backends/Kokkos/detail/utility.cpp | 19 +++++++++ 4 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 tests/backends/Kokkos/detail/pinned_memory.cpp create mode 100644 tests/backends/Kokkos/detail/utility.cpp diff --git a/src/plssvm/backends/Kokkos/detail/pinned_memory.cpp b/src/plssvm/backends/Kokkos/detail/pinned_memory.cpp index 919cbdaa1..dfae19661 100644 --- a/src/plssvm/backends/Kokkos/detail/pinned_memory.cpp +++ b/src/plssvm/backends/Kokkos/detail/pinned_memory.cpp @@ -40,6 +40,8 @@ pinned_memory::~pinned_memory() { } } +// TODO: check if implementable via Kokkos? + template class pinned_memory; template class pinned_memory; diff --git a/tests/backends/Kokkos/CMakeLists.txt b/tests/backends/Kokkos/CMakeLists.txt index c17034811..f6925207f 100644 --- a/tests/backends/Kokkos/CMakeLists.txt +++ b/tests/backends/Kokkos/CMakeLists.txt @@ -12,8 +12,8 @@ set(PLSSVM_KOKKOS_TEST_SOURCES ${CMAKE_CURRENT_LIST_DIR}/detail/device_ptr.cpp ${CMAKE_CURRENT_LIST_DIR}/detail/typedefs.cpp ${CMAKE_CURRENT_LIST_DIR}/detail/standard_layout_tuple.cpp -# ${CMAKE_CURRENT_LIST_DIR}/detail/pinned_memory.cpp -# ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cu + ${CMAKE_CURRENT_LIST_DIR}/detail/pinned_memory.cpp + ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp ${CMAKE_CURRENT_LIST_DIR}/kokkos_csvm.cpp ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp ${CMAKE_CURRENT_LIST_DIR}/execution_space.cpp diff --git a/tests/backends/Kokkos/detail/pinned_memory.cpp b/tests/backends/Kokkos/detail/pinned_memory.cpp new file mode 100644 index 000000000..aa91612d7 --- /dev/null +++ b/tests/backends/Kokkos/detail/pinned_memory.cpp @@ -0,0 +1,39 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Tests for the Kokkos backend pinned memory. + */ + +#include "plssvm/backends/Kokkos/detail/pinned_memory.hpp" // plssvm::kokkos::detail::pinned_memory + +#include "tests/backends/generic_pinned_memory_tests.hpp" // generic pinned memory tests to instantiate +#include "tests/naming.hpp" // naming::test_parameter_to_name +#include "tests/types_to_test.hpp" // util::{combine_test_parameters_gtest_t, cartesian_type_product_t, layout_type_list} + +#include "gtest/gtest.h" // INSTANTIATE_TYPED_TEST_SUITE_P + +#include // std::tuple + +template +struct kokkos_pinned_memory_test_type { + using pinned_memory_type = plssvm::kokkos::detail::pinned_memory; + + constexpr static bool can_pin = false; // TODO: try implementing in Kokkos? +}; + +using kokkos_pinned_memory_tuple = std::tuple, kokkos_pinned_memory_test_type>; + +// the tests used in the instantiated GTest test suites +using kokkos_pinned_memory_type_gtest = util::combine_test_parameters_gtest_t>; +using kokkos_pinned_memory_layout_type_gtest = util::combine_test_parameters_gtest_t, util::layout_type_list>; + +// instantiate type-parameterized tests +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosPinnedMemory, PinnedMemory, kokkos_pinned_memory_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosPinnedMemory, PinnedMemoryLayout, kokkos_pinned_memory_layout_type_gtest, naming::test_parameter_to_name); + +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosPinnedMemoryDeathTest, PinnedMemoryDeathTest, kokkos_pinned_memory_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosPinnedMemoryDeathTest, PinnedMemoryLayoutDeathTest, kokkos_pinned_memory_layout_type_gtest, naming::test_parameter_to_name); diff --git a/tests/backends/Kokkos/detail/utility.cpp b/tests/backends/Kokkos/detail/utility.cpp new file mode 100644 index 000000000..26c4b1b56 --- /dev/null +++ b/tests/backends/Kokkos/detail/utility.cpp @@ -0,0 +1,19 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Tests for the custom utility functions related to the Kokkos backend. + */ + +#include "plssvm/backends/Kokkos/detail/utility.hpp" + +#include "tests/custom_test_macros.hpp" // EXPECT_THROW_WHAT, EXPECT_THROW_WHAT_MATCHER + +#include "fmt/format.h" // fmt::format +#include "gmock/gmock.h" // ::testing::StartsWith +#include "gtest/gtest.h" // TEST, EXPECT_GE, EXPECT_NO_THROW + +#include // std::regex, std::regex::extended, std::regex_match From fc021ae44eb1d6f60f4f3e99b2eeabd90d60bacb Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 22 Oct 2024 18:09:00 +0200 Subject: [PATCH 023/123] Rename function. --- include/plssvm/backends/Kokkos/execution_space.hpp | 2 +- src/plssvm/backends/Kokkos/csvm.cpp | 2 +- src/plssvm/backends/Kokkos/execution_space.cpp | 2 +- tests/backends/Kokkos/execution_space.cpp | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/plssvm/backends/Kokkos/execution_space.hpp b/include/plssvm/backends/Kokkos/execution_space.hpp index 6d9d84e3f..fa1236d70 100644 --- a/include/plssvm/backends/Kokkos/execution_space.hpp +++ b/include/plssvm/backends/Kokkos/execution_space.hpp @@ -49,7 +49,7 @@ enum class execution_space { * @brief Create an `execution_space` from the current `Kokkos::DefaultExecutionSpace`. * @return the enum value representing the current `Kokkos::DefaultExecutionSpace` (`[[nodiscard]]`) */ -[[nodiscard]] execution_space determine_execution_space() noexcept; +[[nodiscard]] execution_space determine_default_execution_space() noexcept; /** * @brief List all available Kokkos::ExecutionSpaces. diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index 7d24b2a8b..1f4b0d8d5 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -49,7 +49,7 @@ csvm::csvm(parameter params) : csvm::csvm(target_platform target, parameter params) : base_type{ params }, - space_{ determine_execution_space() } { + space_{ determine_default_execution_space() } { this->init(target); } diff --git a/src/plssvm/backends/Kokkos/execution_space.cpp b/src/plssvm/backends/Kokkos/execution_space.cpp index 06a4c351e..2f3472aa8 100644 --- a/src/plssvm/backends/Kokkos/execution_space.cpp +++ b/src/plssvm/backends/Kokkos/execution_space.cpp @@ -76,7 +76,7 @@ std::istream &operator>>(std::istream &in, execution_space &space) { return in; } -execution_space determine_execution_space() noexcept { +execution_space determine_default_execution_space() noexcept { // determine the execution_space enumeration value based on the provided Kokkos execution space #if defined(KOKKOS_ENABLE_CUDA) if constexpr (std::is_same_v) { diff --git a/tests/backends/Kokkos/execution_space.cpp b/tests/backends/Kokkos/execution_space.cpp index 5639f3802..c0cec6f45 100644 --- a/tests/backends/Kokkos/execution_space.cpp +++ b/tests/backends/Kokkos/execution_space.cpp @@ -12,8 +12,8 @@ #include "tests/custom_test_macros.hpp" // EXPECT_CONVERSION_TO_STRING, EXPECT_CONVERSION_FROM_STRING -#include "gtest/gtest-matchers.h" // EXPECT_THAT; ::testing::AnyOf -#include "gtest/gtest.h" // TEST, EXPECT_TRUE +#include "gmock/gmock.h" // EXPECT_THAT; ::testing::AnyOf +#include "gtest/gtest.h" // TEST, EXPECT_TRUE #include // std::istringstream @@ -70,7 +70,7 @@ TEST(KokkosExecutionSpace, from_string_unknown) { TEST(KokkosExecutionSpace, determine_execution_space) { // check that "unreachable" is never reached - EXPECT_THAT(plssvm::kokkos::determine_execution_space(), ::testing::AnyOf(plssvm::kokkos::execution_space::cuda, plssvm::kokkos::execution_space::hip, plssvm::kokkos::execution_space::sycl, plssvm::kokkos::execution_space::hpx, plssvm::kokkos::execution_space::openmp, plssvm::kokkos::execution_space::openmp_target, plssvm::kokkos::execution_space::openacc, plssvm::kokkos::execution_space::threads, plssvm::kokkos::execution_space::serial)); + EXPECT_THAT(plssvm::kokkos::determine_default_execution_space(), ::testing::AnyOf(plssvm::kokkos::execution_space::cuda, plssvm::kokkos::execution_space::hip, plssvm::kokkos::execution_space::sycl, plssvm::kokkos::execution_space::hpx, plssvm::kokkos::execution_space::openmp, plssvm::kokkos::execution_space::openmp_target, plssvm::kokkos::execution_space::openacc, plssvm::kokkos::execution_space::threads, plssvm::kokkos::execution_space::serial)); } TEST(KokkosExecutionSpace, available_execution_spaces) { From a5ee4ef2933487e935c3a1a7a12b50fad8bc3936 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 22 Oct 2024 18:09:21 +0200 Subject: [PATCH 024/123] Add TODOs and first utility tests. --- src/plssvm/backends/Kokkos/detail/utility.cpp | 4 +- tests/backends/Kokkos/detail/utility.cpp | 105 +++++++++++++++++- 2 files changed, 102 insertions(+), 7 deletions(-) diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp index b7d58be1d..ac49ef532 100644 --- a/src/plssvm/backends/Kokkos/detail/utility.cpp +++ b/src/plssvm/backends/Kokkos/detail/utility.cpp @@ -30,7 +30,7 @@ target_platform determine_default_target_platform_from_execution_space(const exe case execution_space::cuda: return target_platform::gpu_nvidia; case execution_space::hip: - return target_platform::gpu_amd; + return target_platform::gpu_amd; // TODO: or gpu_nvidia :/ case execution_space::sycl: case execution_space::openmp_target: case execution_space::openacc: @@ -56,7 +56,7 @@ void check_execution_space_target_platform_combination(const execution_space spa } break; case execution_space::hip: - if (target != target_platform::gpu_amd) { + if (target != target_platform::gpu_amd && target != target_platform::gpu_nvidia) { throw backend_exception{ fmt::format("The target platform {} is not supported for Kokkos {} execution space!", target, space) }; } break; diff --git a/tests/backends/Kokkos/detail/utility.cpp b/tests/backends/Kokkos/detail/utility.cpp index 26c4b1b56..ab49f1034 100644 --- a/tests/backends/Kokkos/detail/utility.cpp +++ b/tests/backends/Kokkos/detail/utility.cpp @@ -10,10 +10,105 @@ #include "plssvm/backends/Kokkos/detail/utility.hpp" -#include "tests/custom_test_macros.hpp" // EXPECT_THROW_WHAT, EXPECT_THROW_WHAT_MATCHER +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "fmt/format.h" // fmt::format -#include "gmock/gmock.h" // ::testing::StartsWith -#include "gtest/gtest.h" // TEST, EXPECT_GE, EXPECT_NO_THROW +#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace -#include // std::regex, std::regex::extended, std::regex_match +#include "tests/custom_test_macros.hpp" // EXPECT_THROW_WHAT + +#include "fmt/core.h" // fmt::format +#include "gmock/gmock.h" // EXPECT_THAT; ::testing::AnyOf +#include "gtest/gtest.h" // TEST, EXPECT_GE, EXPECT_NE + +#include // std::regex, std::regex::extended, std::regex_match +#include // std::string +#include // std::vector + +TEST(KokkosUtility, determine_default_target_platform_from_execution_space) { + // determine the potential default target platform + EXPECT_EQ(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::cuda), plssvm::target_platform::gpu_nvidia); + EXPECT_THAT(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::hip), ::testing::AnyOf(plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd)); + EXPECT_NE(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::sycl), plssvm::target_platform::automatic); + EXPECT_EQ(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::hpx), plssvm::target_platform::cpu); + EXPECT_EQ(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::openmp), plssvm::target_platform::cpu); + EXPECT_NE(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::openmp_target), plssvm::target_platform::automatic); + EXPECT_NE(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::openacc), plssvm::target_platform::automatic); + EXPECT_EQ(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::threads), plssvm::target_platform::cpu); + EXPECT_EQ(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::serial), plssvm::target_platform::cpu); +} + +TEST(KokkosUtility, check_execution_space_target_platform_combination) { + // check some execution_space <-> target_platform combinations + // the cuda execution space only supports the NVIDIA GPU + EXPECT_NO_THROW(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::cuda, plssvm::target_platform::gpu_nvidia)); + EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::cuda, plssvm::target_platform::gpu_amd), + plssvm::kokkos::backend_exception, + "The target platform gpu_amd is not supported for Kokkos Cuda execution space!"); + EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::cuda, plssvm::target_platform::gpu_intel), + plssvm::kokkos::backend_exception, + "The target platform gpu_intel is not supported for Kokkos Cuda execution space!"); + EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::cuda, plssvm::target_platform::cpu), + plssvm::kokkos::backend_exception, + "The target platform cpu is not supported for Kokkos Cuda execution space!"); + + // the hip execution space only supports the NVIDIA and AMD GPUs + EXPECT_NO_THROW(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::hip, plssvm::target_platform::gpu_nvidia)); + EXPECT_NO_THROW(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::hip, plssvm::target_platform::gpu_amd)); + EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::hip, plssvm::target_platform::gpu_intel), + plssvm::kokkos::backend_exception, + "The target platform gpu_intel is not supported for Kokkos HIP execution space!"); + EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::hip, plssvm::target_platform::cpu), + plssvm::kokkos::backend_exception, + "The target platform cpu is not supported for Kokkos HIP execution space!"); + + // TODO: SYCL + // TODO: OpenMP target + // TODO: OpenACC + + // the remaining execution spaces all only support CPUs! + for (const plssvm::kokkos::execution_space exec : { plssvm::kokkos::execution_space::hpx, plssvm::kokkos::execution_space::openmp, plssvm::kokkos::execution_space::threads, plssvm::kokkos::execution_space::serial }) { + EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(exec, plssvm::target_platform::gpu_nvidia), + plssvm::kokkos::backend_exception, + fmt::format("The target platform gpu_nvidia is not supported for Kokkos {} execution space!", exec)); + EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(exec, plssvm::target_platform::gpu_amd), + plssvm::kokkos::backend_exception, + fmt::format("The target platform gpu_amd is not supported for Kokkos {} execution space!", exec)); + EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(exec, plssvm::target_platform::gpu_intel), + plssvm::kokkos::backend_exception, + fmt::format("The target platform gpu_intel is not supported for Kokkos {} execution space!", exec)); + EXPECT_NO_THROW(plssvm::kokkos::detail::check_execution_space_target_platform_combination(exec, plssvm::target_platform::cpu)); + } +} + +TEST(KokkosUtility, get_device_list) { + // get the default device list + const plssvm::kokkos::execution_space space = plssvm::kokkos::determine_default_execution_space(); + const plssvm::target_platform target = plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(space); + const std::vector devices = plssvm::kokkos::detail::get_device_list(space, target); + + // check the number of returned devices + if (space == plssvm::kokkos::execution_space::cuda || space == plssvm::kokkos::execution_space::hip || space == plssvm::kokkos::execution_space::sycl) { + // for the device execution spaces AT LEAST ONE device must be found + EXPECT_GE(devices.size(), 1); + } else { + // for all other execution spaces EXACTLY ONE device must be found + EXPECT_EQ(devices.size(), 1); + } +} + +TEST(KokkosUtility, get_device_name) { + // get the device name of the default Kokkos execution space + const plssvm::kokkos::execution_space space = plssvm::kokkos::determine_default_execution_space(); + const std::string name = plssvm::kokkos::detail::get_device_name(space, Kokkos::DefaultExecutionSpace{}); + + // the returned device name may not be empty or unknown + EXPECT_FALSE(name.empty()); + EXPECT_NE(name, std::string{ "unknown" }); +} + +TEST(KokkosUtility, get_kokkos_version) { + const std::regex reg{ "[0-9]+\\.[0-9]+\\.[0-9]+", std::regex::extended }; + EXPECT_TRUE(std::regex_match(plssvm::kokkos::detail::get_kokkos_version(), reg)); +} From 01f27938a7a46bb2fa0ffa11ba5c64e966fcf9ce Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 22 Oct 2024 18:17:41 +0200 Subject: [PATCH 025/123] Reduce code duplication be moving similar code to a common header file. --- tests/kokkos_main.cpp | 31 +++------------------------- tests/main.cpp | 27 ++----------------------- tests/main.hpp | 47 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 53 deletions(-) create mode 100644 tests/main.hpp diff --git a/tests/kokkos_main.cpp b/tests/kokkos_main.cpp index e53409bd4..1edfbb9fe 100644 --- a/tests/kokkos_main.cpp +++ b/tests/kokkos_main.cpp @@ -9,36 +9,11 @@ * @brief Contains the googletest main function. Sets the DeathTest to "threadsafe" execution instead of "fast". */ -#include "gtest/gtest.h" // GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST, RUN_ALL_TESTS, ::testing::{InitGoogleTest, GTEST_FLAG} - -#include "Kokkos_Core.hpp" // TODO: - -// TODO: reduce copy-paste - -// silence GTest warnings/test errors -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVM); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunction); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolver); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunction); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionClassification); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionClassification); - -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMDeathTest); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverDeathTest); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionDeathTest); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionDeathTest); +#include "Kokkos_Core.hpp" // Kokkos::initialize, Kokkos::finalize -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVM); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMKernelFunction); - -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMDeathTest); - -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtr); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrLayout); - -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrDeathTest); +#include "gtest/gtest.h" // GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST, RUN_ALL_TESTS, ::testing::{InitGoogleTest, GTEST_FLAG} -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Exception); +#include "main.hpp" // GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST definitions int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); diff --git a/tests/main.cpp b/tests/main.cpp index d27eddd7d..944ad9318 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -9,32 +9,9 @@ * @brief Contains the googletest main function. Sets the DeathTest to "threadsafe" execution instead of "fast". */ -#include "gtest/gtest.h" // GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST, RUN_ALL_TESTS, ::testing::{InitGoogleTest, GTEST_FLAG} +#include "main.hpp" // GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST definitions -// silence GTest warnings/test errors -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVM); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunction); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolver); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunction); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionClassification); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionClassification); - -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMDeathTest); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverDeathTest); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionDeathTest); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionDeathTest); - -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVM); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMKernelFunction); - -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMDeathTest); - -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtr); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrLayout); - -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrDeathTest); - -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Exception); +#include "gtest/gtest.h" // RUN_ALL_TESTS, ::testing::{InitGoogleTest, GTEST_FLAG} int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); diff --git a/tests/main.hpp b/tests/main.hpp new file mode 100644 index 000000000..ddb4ea590 --- /dev/null +++ b/tests/main.hpp @@ -0,0 +1,47 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Header file for the GoogleTest main files to reduce code duplication. + */ + +#ifndef PLSSVM_TESTS_MAIN_HPP_ +#define PLSSVM_TESTS_MAIN_HPP_ +#pragma once + +#include "gtest/gtest.h" // GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST + +// silence GTest warnings/test errors + +// generic CSVM tests +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVM); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunction); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolver); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunction); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionClassification); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionClassification); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMDeathTest); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverDeathTest); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionDeathTest); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionDeathTest); +// generic GPU CSVM tests +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVM); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMKernelFunction); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMDeathTest); +// pinned memory tests +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemory); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryLayout); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryDeathTest); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryLayoutDeathTest); +// device pointer tests +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtr); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrLayout); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrDeathTest); +// exception tests +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Exception); + +#endif // PLSSVM_TESTS_MAIN_HPP_ From fd6473ebd7218eb1a49917e35f2c2cb42eaed56e Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 28 Oct 2024 15:12:53 +0100 Subject: [PATCH 026/123] Support switching between different Kokkos::ExecutionSpaces at runtime. plssvm::kokkos::csvm now correctly honors the provided target platform. --- include/plssvm/backends/Kokkos/csvm.hpp | 32 +- .../backends/Kokkos/detail/device_ptr.hpp | 27 +- .../Kokkos/detail/device_view_wrapper.hpp | 230 ++++++++ .../backends/Kokkos/detail/device_wrapper.hpp | 197 +++++++ .../backends/Kokkos/detail/typedefs.hpp | 36 -- .../plssvm/backends/Kokkos/detail/utility.hpp | 76 ++- .../backends/Kokkos/execution_space.hpp | 286 +++++++++- .../Kokkos/kernel/cg_explicit/blas.hpp | 65 ++- .../cg_explicit/kernel_matrix_assembly.hpp | 32 +- .../kernel_matrix_assembly_blas.hpp | 19 +- .../Kokkos/kernel/detail/memset_kernel.hpp | 56 ++ .../Kokkos/kernel/kernel_functions.hpp | 20 +- .../backends/Kokkos/kernel/predict_kernel.hpp | 51 +- src/plssvm/backends/Kokkos/CMakeLists.txt | 1 + src/plssvm/backends/Kokkos/csvm.cpp | 492 ++++++++++-------- .../backends/Kokkos/detail/device_ptr.cpp | 115 ++-- .../backends/Kokkos/detail/device_wrapper.cpp | 106 ++++ src/plssvm/backends/Kokkos/detail/utility.cpp | 182 +++---- .../backends/Kokkos/execution_space.cpp | 104 +--- tests/backends/Kokkos/CMakeLists.txt | 3 +- tests/backends/Kokkos/detail/device_ptr.cpp | 6 +- .../Kokkos/detail/device_view_wrapper.cpp | 77 +++ .../backends/Kokkos/detail/device_wrapper.cpp | 115 ++++ tests/backends/Kokkos/detail/typedefs.cpp | 27 - tests/backends/Kokkos/detail/utility.cpp | 125 ++--- tests/backends/Kokkos/execution_space.cpp | 72 ++- tests/backends/generic_csvm_tests.hpp | 5 +- tests/backends/generic_gpu_csvm_tests.hpp | 2 +- tests/utility.hpp | 20 +- 29 files changed, 1839 insertions(+), 740 deletions(-) create mode 100644 include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp create mode 100644 include/plssvm/backends/Kokkos/detail/device_wrapper.hpp delete mode 100644 include/plssvm/backends/Kokkos/detail/typedefs.hpp create mode 100644 include/plssvm/backends/Kokkos/kernel/detail/memset_kernel.hpp create mode 100644 src/plssvm/backends/Kokkos/detail/device_wrapper.cpp create mode 100644 tests/backends/Kokkos/detail/device_view_wrapper.cpp create mode 100644 tests/backends/Kokkos/detail/device_wrapper.cpp delete mode 100644 tests/backends/Kokkos/detail/typedefs.cpp diff --git a/include/plssvm/backends/Kokkos/csvm.hpp b/include/plssvm/backends/Kokkos/csvm.hpp index 859a9f43b..d8dcfaab8 100644 --- a/include/plssvm/backends/Kokkos/csvm.hpp +++ b/include/plssvm/backends/Kokkos/csvm.hpp @@ -13,19 +13,18 @@ #define PLSSVM_BACKENDS_KOKKOS_CSVM_HPP_ #pragma once -#include "plssvm/backends/execution_range.hpp" // plssvm::detail::{dim_type, execution_range} -#include "plssvm/backends/gpu_csvm.hpp" // plssvm::detail::gpu_csvm -#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr -#include "plssvm/backends/Kokkos/detail/pinned_memory.hpp" // plssvm::kokkos::detail::pinned_memory -#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space -#include "plssvm/constants.hpp" // plssvm::real_type -#include "plssvm/csvm.hpp" // plssvm::detail::csvm_backend_exists -#include "plssvm/detail/memory_size.hpp" // plssvm::detail::memory_size -#include "plssvm/detail/type_traits.hpp" // PLSSVM_REQUIRES -#include "plssvm/parameter.hpp" // plssvm::parameter, plssvm::detail::parameter -#include "plssvm/target_platforms.hpp" // plssvm::target_platform - -#include "Kokkos_Core_fwd.hpp" // Kokkos::DefaultExecutionSpace +#include "plssvm/backends/execution_range.hpp" // plssvm::detail::{dim_type, execution_range} +#include "plssvm/backends/gpu_csvm.hpp" // plssvm::detail::gpu_csvm +#include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper +#include "plssvm/backends/Kokkos/detail/pinned_memory.hpp" // plssvm::kokkos::detail::pinned_memory +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/csvm.hpp" // plssvm::detail::csvm_backend_exists +#include "plssvm/detail/memory_size.hpp" // plssvm::detail::memory_size +#include "plssvm/detail/type_traits.hpp" // PLSSVM_REQUIRES +#include "plssvm/parameter.hpp" // plssvm::parameter, plssvm::detail::parameter +#include "plssvm/target_platforms.hpp" // plssvm::target_platform #include // std::size_t #include // std::true_type @@ -38,13 +37,12 @@ namespace kokkos { /** * @brief A C-SVM implementation using Kokkos as backend. - * @details Internally, we always only use the `Kokkos::DefaultExecutionSpace`. */ -class csvm : public ::plssvm::detail::gpu_csvm { +class csvm : public ::plssvm::detail::gpu_csvm { protected: // protected for the test mock class /// The template base type of the Kokkos C-SVM class. - using base_type = ::plssvm::detail::gpu_csvm; + using base_type = ::plssvm::detail::gpu_csvm; using base_type::data_distribution_; using base_type::devices_; @@ -120,7 +118,7 @@ class csvm : public ::plssvm::detail::gpu_csvm // std::size_t @@ -28,9 +27,9 @@ namespace plssvm::kokkos::detail { * @tparam T the type of the kernel view to wrap */ template -class device_ptr : public ::plssvm::detail::gpu_device_ptr, device_ptr> { +class device_ptr : public ::plssvm::detail::gpu_device_ptr, device_ptr> { /// The template base type of the Kokkos device_ptr class. - using base_type = ::plssvm::detail::gpu_device_ptr, device_ptr>; + using base_type = ::plssvm::detail::gpu_device_ptr, device_ptr>; using base_type::data_; using base_type::queue_; @@ -60,22 +59,22 @@ class device_ptr : public ::plssvm::detail::gpu_device_ptr // std::array +#include // std::size_t +#include // std::invoke +#include // std::make_index_sequence, std::index_sequence, std::move +#include // std::variant, std::get, std::visit + +namespace plssvm::kokkos::detail { + +namespace impl { + +/** + * @brief Uninstantiated base type to create a `std::variant` containing all available Kokkos::View types. + */ +template +struct create_view_variant_type_helper; + +/** + * @brief Helper struct to create a `std::variant` containing all available Kokkos::View types by iterating over the `std::array` of + * `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`. + * @tparam T the value type of the underlying Kokkos::View + * @tparam Is the indices to index the `std::array` + */ +template +struct create_view_variant_type_helper> { + /// The array containing all available execution spaces. + constexpr static auto array = detail::constexpr_available_execution_spaces(); + /// The resulting variant type. + using type = std::variant>...>; +}; + +/** + * @brief Create a `std::variant` containing all available Kokkos::View types by iterating over the `std::array` of + * `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`. + * @tparam T the value type of the underlying Kokkos::View + */ +template +struct create_view_variant_type { + /// The number of types in the final variant. + constexpr static std::size_t N = detail::constexpr_available_execution_spaces().size(); + /// The final variant type. + using type = typename create_view_variant_type_helper>::type; +}; + +} // namespace impl + +/** + * @brief A wrapper class around a `std::variant` that contains all available Kokkos::View types. + * @tparam T the value type of the underlying Kokkos::View + */ +template +class device_view_wrapper { + public: + /// The `std::variant` type containing all Kokkos::View types. + using variant_type = typename impl::create_view_variant_type::type; + + /** + * @brief Default construct the `std::variant` wrapper. + */ + device_view_wrapper() = default; + + /** + * @brief Construct the wrapper using the provided Kokkos::View instance by forwarding its value to the underlying `std::variant`. + * @tparam ExecutionSpace the used Kokkos::ExecutionSpace type of the Kokkos::View + * @param[in] view the Kokkos::View instance + */ + template + explicit device_view_wrapper(Kokkos::View &&view) : + v_{ std::move(view) } { } + + /** + * @brief Given the provided `execution_space` enum value, tries to get the `std::variant` alternative for the corresponding Kokkos::ExecutionSpace type. + * @tparam space the `execution_space` enum value + * @return the Kokkos::View instance (`[[nodiscard]]`) + */ + template + [[nodiscard]] Kokkos::View> &get() { + return std::get>>(v_); + } + + /** + * @copydoc plssvm::kokkos::detail::device_view_wrapper::get + */ + template + [[nodiscard]] const Kokkos::View> &get() const { + return std::get>>(v_); + } + + /** + * @brief Return the `execution_space` enum value of the currently active `std::variant` Kokkos::View type. + * @return the `execution_space` enum value (`[[nodiscard]]`) + */ + [[nodiscard]] execution_space get_execution_space() const noexcept { + return detail::constexpr_available_execution_spaces()[v_.index()]; + } + + /** + * @brief Invoke the function @p func on the active `std::variant` member using `std::visit` internally. + * @tparam Func the type of the function + * @param[in] func the function to invoke + */ + template + void execute(const Func &func) { + // clang-format off + std::visit([&func](auto &view) { + std::invoke(func, view); + }, v_); + // clang-format on + } + + /** + * @copydoc plssvm::kokkos::detail::device_view_wrapper::execute + */ + template + void execute(const Func &func) const { + // clang-format off + std::visit([&func](const auto &view) { + std::invoke(func, view); + }, v_); + // clang-format on + } + + /** + * @brief Compare two device view wrappers for equality by comparing the wrapped `std::variant`s. + * @param[in] lhs the first device view wrapper + * @param[in] rhs the second device view wrapper + * @return `true` if both underlying `std::variant`s are equal, otherwise `false` (`[[nodiscard]]`) + */ + [[nodiscard]] friend bool operator==(const device_view_wrapper &lhs, const device_view_wrapper &rhs) noexcept { + return lhs.v_ == rhs.v_; + } + + /** + * @brief Compare two device view wrappers for inequality by comparing the wrapped `std::variant`s. + * @param[in] lhs the first device view wrapper + * @param[in] rhs the second device view wrapper + * @return `true` if both underlying `std::variant`s are unequal, otherwise `false` (`[[nodiscard]]`) + */ + [[nodiscard]] friend bool operator!=(const device_view_wrapper &lhs, const device_view_wrapper &rhs) noexcept { + return !(lhs == rhs); + } + + private: + /// The wrapped `std::variant` type. + variant_type v_; +}; + +/** + * @brief Given a execution @p space and the number of elements @p size, creates a Kokkos::View in the respective memory space. + * @tparam T the value type of the underlying Kokkos::View + * @param[in] space the specific execution space + * @param[in] size the size of the Kokkos::View (number of elements **not** byte!) + * @return a Kokkos::View wrapper where the active member of the internal `std::variant` corresponds to the Kokkos::View in the Kokkos::ExecutionSpace specified by @p space (`[[nodiscard]]`) + */ +template +[[nodiscard]] device_view_wrapper make_device_view_wrapper(const execution_space &space, const std::size_t size) { + switch (space) { + case execution_space::cuda: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA(([&]() { + return device_view_wrapper{ Kokkos::View{ "cuda_device_ptr_view", size } }; + })); + break; + case execution_space::hip: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP(([&]() { + return device_view_wrapper{ Kokkos::View{ "hip_device_ptr_view", size } }; + })); + break; + case execution_space::sycl: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL(([&]() { + return device_view_wrapper{ Kokkos::View{ "sycl_device_ptr_view", size } }; + })); + break; + case execution_space::hpx: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX(([&]() { + return device_view_wrapper{ Kokkos::View{ "hpx_device_ptr_view", size } }; + })); + break; + case execution_space::openmp: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP(([&]() { + return device_view_wrapper{ Kokkos::View{ "openmp_device_ptr_view", size } }; + })); + break; + case execution_space::openmp_target: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET(([&]() { + return device_view_wrapper{ Kokkos::View{ "openmptarget_device_ptr_view", size } }; + })); + break; + case execution_space::openacc: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC(([&]() { + return device_view_wrapper{ Kokkos::View{ "openacc_device_ptr_view", size } }; + })); + break; + case execution_space::threads: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS(([&]() { + return device_view_wrapper{ Kokkos::View{ "threads_device_ptr_view", size } }; + })); + break; + case execution_space::serial: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL(([&]() { + return device_view_wrapper{ Kokkos::View{ "serial_device_ptr_view", size } }; + })); + break; + } + // all possible cases should be handled by the previous switch + // -> silence missing return statement compiler warnings due to throw statement + ::plssvm::detail::unreachable(); +} + +} // namespace plssvm::kokkos::detail + +#endif // PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_VIEW_WRAPPER_HPP_ diff --git a/include/plssvm/backends/Kokkos/detail/device_wrapper.hpp b/include/plssvm/backends/Kokkos/detail/device_wrapper.hpp new file mode 100644 index 000000000..30b2a91be --- /dev/null +++ b/include/plssvm/backends/Kokkos/detail/device_wrapper.hpp @@ -0,0 +1,197 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief A wrapper around a Kokkos::ExecutionSpace representing a single device. + */ + +#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_WRAPPER_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_WRAPPER_HPP_ + +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{execution_space, execution_space_to_kokkos_type_t}, plssvm::kokkos::detail::constexpr_available_execution_spaces +#include "plssvm/target_platforms.hpp" // plssvm::target_platform + +#include // std::array +#include // std::size_t +#include // std::invoke +#include // std::make_index_sequence, std::index_sequence, std::forward +#include // std::variant, std::get, std::visit +#include // std::vector + +namespace plssvm::kokkos::detail { + +namespace impl { + +/** + * @brief Uninstantiated base type to create a `std::variant` containing all available Kokkos::ExecutionSpace types. + */ +template +struct create_device_variant_type_helper; + +/** + * @brief Helper struct to create a `std::variant` containing all available Kokkos::ExecutionSpace types by iterating over the `std::array` of + * `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`. + * @tparam Is the indices to index the `std::array` + */ +template +struct create_device_variant_type_helper> { + /// The array containing all available execution spaces. + constexpr static auto array = detail::constexpr_available_execution_spaces(); + /// The resulting variant type. + using type = std::variant...>; +}; + +/** + * @brief Create a `std::variant` containing all available Kokkos::ExecutionSpace types by iterating over the `std::array` of + * `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`. + */ +struct create_device_variant_type { + /// The number of types in the final variant. + constexpr static std::size_t N = detail::constexpr_available_execution_spaces().size(); + /// The final variant type. + using type = typename create_device_variant_type_helper>::type; +}; + +} // namespace impl + +/** + * @brief A wrapper class around a `std::variant` that contains all available Kokkos::ExecutionSpace types. + */ +class device_wrapper { + public: + /// The `std::variant` type containing all Kokkos::ExecutionSpace types. + using variant_type = typename impl::create_device_variant_type::type; + + /** + * @brief Default construct the `std::variant` wrapper. + */ + device_wrapper() = default; + + /** + * @brief Construct the wrapper using the provided Kokkos::ExecutionSpace instance by forwarding its value to the underlying `std::variant`. + * @tparam ExecutionSpace the used Kokkos::ExecutionSpace type + * @param[in] exec the Kokkos::ExecutionSpace instance + */ + template + explicit device_wrapper(ExecutionSpace &&exec) : + v_{ std::forward(exec) } { } + + /** + * @brief Given the provided `execution_space` enum value, tries to get the `std::variant` alternative for the corresponding Kokkos::ExecutionSpace type. + * @tparam space the `execution_space` enum value + * @return the Kokkos::ExecutionSpace instance (`[[nodiscard]]`) + */ + template + [[nodiscard]] execution_space_to_kokkos_type_t &get() { + return std::get>(v_); + } + + /** + * @copydoc plssvm::kokkos::detail::device_wrapper::get + */ + template + const execution_space_to_kokkos_type_t &get() const { + return std::get>(v_); + } + + /** + * @brief Return the `execution_space` enum value of the currently active `std::variant` Kokkos::ExecutionSpace type. + * @return the `execution_space` enum value (`[[nodiscard]]`) + */ + [[nodiscard]] execution_space get_execution_space() const noexcept { + return detail::constexpr_available_execution_spaces()[v_.index()]; + } + + /** + * @brief Invoke the function @p func on the active `std::variant` member using `std::visit` internally. + * @tparam Func the type of the function + * @param[in] func the function to invoke + */ + template + void execute(const Func &func) { + // clang-format off + std::visit([&func](auto &device) { + std::invoke(func, device); + }, v_); + // clang-format on + } + + /** + * @copydoc plssvm::kokkos::detail::device_wrapper::execute + */ + template + void execute(const Func &func) const { + // clang-format off + std::visit([&func](const auto &device) { + std::invoke(func, device); + }, v_); + // clang-format on + } + + /** + * @brief Invoke the function @p func on the active `std::variant` member using `std::visit` internally returning the result value of the function invocation. + * @tparam Func the type of the function + * @param[in] func the function to invoke + * @return the return value of function @p func (`[[nodiscard]]`) + */ + template + [[nodiscard]] auto execute_and_return(const Func &func) { + // clang-format off + return std::visit([&func](auto &device) { + return std::invoke(func, device); + }, v_); + // clang-format on + } + + /** + * @copydoc plssvm::kokkos::detail::device_wrapper::execute_and_return + */ + template + [[nodiscard]] auto execute_and_return(const Func &func) const { + // clang-format off + return std::visit([&func](const auto &device) { + return std::invoke(func, device); + }, v_); + // clang-format on + } + + /** + * @brief Compare two device wrappers for equality by comparing the wrapped `std::variant`s. + * @param[in] lhs the first device wrapper + * @param[in] rhs the second device wrapper + * @return `true` if both underlying `std::variant`s are equal, otherwise `false` (`[[nodiscard]]`) + */ + [[nodiscard]] friend bool operator==(const device_wrapper &lhs, const device_wrapper &rhs) noexcept { + return lhs.v_ == rhs.v_; + } + + /** + * @brief Compare two device wrappers for inequality by comparing the wrapped `std::variant`s. + * @param[in] lhs the first device wrapper + * @param[in] rhs the second device wrapper + * @return `true` if both underlying `std::variant`s are unequal, otherwise `false` (`[[nodiscard]]`) + */ + [[nodiscard]] friend bool operator!=(const device_wrapper &lhs, const device_wrapper &rhs) noexcept { + return !(lhs == rhs); + } + + private: + /// The wrapped `std::variant` type. + variant_type v_{}; +}; + +/** + * @brief Get a list of all available devices in the execution @p space that are supported by the @p target platform. + * @param[in] space the Kokkos::ExecutionSpace to retrieve the devices from + * @param[in] target the target platform that must be supported + * @return all devices for the @p target in the Kokkos::ExecutionSpace @p space (`[[nodiscard]]`) + */ +[[nodiscard]] std::vector get_device_list(execution_space space, target_platform target); + +} // namespace plssvm::kokkos::detail + +#endif // PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_WRAPPER_HPP_ diff --git a/include/plssvm/backends/Kokkos/detail/typedefs.hpp b/include/plssvm/backends/Kokkos/detail/typedefs.hpp deleted file mode 100644 index 61fffbb31..000000000 --- a/include/plssvm/backends/Kokkos/detail/typedefs.hpp +++ /dev/null @@ -1,36 +0,0 @@ -/** - * @file - * @author Alexander Van Craen - * @author Marcel Breyer - * @copyright 2018-today The PLSSVM project - All Rights Reserved - * @license This file is part of the PLSSVM project which is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - * - * @brief A few convenient Kokkos::View typedefs. - */ - -#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_TYPEDEFS_HPP_ -#define PLSSVM_BACKENDS_KOKKOS_DETAIL_TYPEDEFS_HPP_ -#pragma once - -#include "Kokkos_Core.hpp" // Kokkos::View, Kokkos::DefaultExecutionSpace, Kokkos::HostSpace, Kokkos::MemoryUnmanaged - -namespace plssvm::kokkos::detail { - -/** - * @brief Typedef for a simple Kokkos::View targeting the Kokkos::DefaultExecutionSpace. - * @tparam T the type of the view's data - */ -template -using device_view_type = Kokkos::View; - -/** - * @brief Typedef for a simple Kokkos::View always targeting the Kokkos::HostSpace. - * @tparam T the type of the view's data - */ -template -using host_view_type = Kokkos::View; - -} // namespace plssvm::kokkos::detail - -#endif // PLSSVM_BACKENDS_KOKKOS_DETAIL_TYPEDEFS_HPP_ diff --git a/include/plssvm/backends/Kokkos/detail/utility.hpp b/include/plssvm/backends/Kokkos/detail/utility.hpp index e29468830..fe8b0367f 100644 --- a/include/plssvm/backends/Kokkos/detail/utility.hpp +++ b/include/plssvm/backends/Kokkos/detail/utility.hpp @@ -13,52 +13,76 @@ #define PLSSVM_BACKENDS_KOKKOS_DETAIL_UTILITY_HPP_ #pragma once -#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space -#include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/detail/type_traits.hpp" // PLSSVM_REQUIRES +#include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace +#include "Kokkos_Core.hpp" // Kokkos::ExecutionSpace::fence -#include // std::string -#include // std::vector +#include // std::map +#include // std::string +#include // std::disjunction, std::is_same +#include // std::variant +#include // std::vector namespace plssvm::kokkos::detail { +namespace impl { + +/** + * @brief Uninstantiated base type for the check whether a type @p appears in a std::variant @p Variant. + * @tparam T the type to check for inclusion + * @tparam Variant the std::variant that should include the type @p T + */ +template +struct is_type_in_variant; + /** - * @brief Given the execution @p space, determine the respective default target platform. - * @param[in] space the Kokkos::ExecutionSpace for which the default target platform should be determined - * @return the default target platform (`[[nodiscard]]`) + * @brief Implement the inclusion check using `std::disjunction`. + * @tparam T the type to check for inclusion + * @tparam Variant the std::variant that should include the type @p T */ -[[nodiscard]] target_platform determine_default_target_platform_from_execution_space(execution_space space); +template +struct is_type_in_variant> : std::disjunction...> { }; /** - * @brief Check whether the execution @p space supports the @p target platform. Throws an `plssvm::kokkos::backend_exception` if that's not the case. - * @param[in] space the Kokkos::ExecutionSpace to investigate - * @param[in] target the target platform to check - * @throws plssvm::kokkos::backend_exception if @p space doesn't support the @p target platform + * @copydoc plssvm::kokkos::detail::impl::is_type_in_variant */ -void check_execution_space_target_platform_combination(execution_space space, target_platform target); +template +inline constexpr bool is_type_in_variant_v = is_type_in_variant::value; + +} // namespace impl /** - * @brief Get a list of all available devices in the execution @p space that are supported by the @p target platform. - * @param[in] space the Kokkos::ExecutionSpace to retrieve the devices from - * @param[in] target the target platform that must be supported - * @return all devices for the @p target in the Kokkos::ExecutionSpace @p space (`[[nodiscard]]`) + * @brief Return a `std::map` containing a mapping from all available target platforms to the available Kokkos::ExecutionSpace that supports said target platform. + * @details If a target platform is supported by multiple Kokkos::ExecutionSpace, the order is determined by the order as returned by `list_available_execution_spaces`. + * @return the mapping of all available target_platform <-> Kokkos::ExecutionSpace combinations (`[[nodiscard]]`) */ -[[nodiscard]] std::vector get_device_list(execution_space space, target_platform target); +[[nodiscard]] std::map> available_target_platform_to_execution_space_mapping(); /** - * @brief Get the name of the device represented by the Kokkos::ExecutionSpace @p exec in the execution @p space. - * @param[in] space the Kokkos::ExecutionSpace - * @param[in] exec the device + * @brief Get the name of the device represented by the `device_wrapper` @p dev. + * @param[in] dev the device wrapper * @return the device name (`[[nodiscard]]`) */ -[[nodiscard]] std::string get_device_name(execution_space space, const Kokkos::DefaultExecutionSpace &exec); +[[nodiscard]] std::string get_device_name(const device_wrapper &dev); + +/** + * @brief Wait for all kernel and/or other operations on the device wrapper in the @p dev to finish. + * @param[in] dev the device wrapper + */ +void device_synchronize(const device_wrapper &dev); /** - * @brief Wait for all kernel and/or other operations on the Kokkos::ExecutionSpace @p exec to finish - * @param[in] exec the Kokkos::ExecutionSpace to synchronize + * @brief Wait for all kernel and/or other operations on the device represented by the Kokkos::ExecutionSpace @p exec to finish. + * @tparam ExecutionSpace the type of the Kokkos::ExecutionSpace + * @param[in] exec the device represented by a Kokkos::ExecutionSpace */ -void device_synchronize(const Kokkos::DefaultExecutionSpace &exec); +template )> +void device_synchronize(const ExecutionSpace &exec) { + exec.fence(); +} /** * @brief Get the used Kokkos library version. diff --git a/include/plssvm/backends/Kokkos/execution_space.hpp b/include/plssvm/backends/Kokkos/execution_space.hpp index fa1236d70..bb37a39a7 100644 --- a/include/plssvm/backends/Kokkos/execution_space.hpp +++ b/include/plssvm/backends/Kokkos/execution_space.hpp @@ -13,9 +13,12 @@ #define PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_HPP_ #pragma once +#include "Kokkos_Core.hpp" // Kokkos macros, Kokkos ExecutionSpace types + #include "fmt/base.h" // fmt::formatter #include "fmt/ostream.h" // fmt::ostream_formatter +#include // std::array #include // std::ostream forward declaration #include // std::vector @@ -45,19 +48,6 @@ enum class execution_space { serial }; -/** - * @brief Create an `execution_space` from the current `Kokkos::DefaultExecutionSpace`. - * @return the enum value representing the current `Kokkos::DefaultExecutionSpace` (`[[nodiscard]]`) - */ -[[nodiscard]] execution_space determine_default_execution_space() noexcept; - -/** - * @brief List all available Kokkos::ExecutionSpaces. - * @details At least one execution space must **always** be available! - * @return a vector containing all available execution spaces (`[[nodiscard]]`) - */ -[[nodiscard]] std::vector available_execution_spaces(); - /** * @brief Output the execution @p space to the given output-stream @p out. * @param[in,out] out the output-stream to write the execution space to @@ -74,9 +64,277 @@ std::ostream &operator<<(std::ostream &out, execution_space space); */ std::istream &operator>>(std::istream &in, execution_space &space); +//***************************************************// +// execution_space_to_kokkos_type // +//***************************************************// + +/** + * @brief Uninstantiated base type to convert an `execution_space` enum value to a Kokkos::ExecutionSpace type. + */ +template +struct execution_space_to_kokkos_type; + +#if defined(KOKKOS_ENABLE_CUDA) +/** + * @brief Convert an `execution_space::cuda` enum value to a `Kokkos::Cuda` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::Cuda; +}; +#endif + +#if defined(KOKKOS_ENABLE_HIP) +/** + * @brief Convert an `execution_space::hip` enum value to a `Kokkos::HIP` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::HIP; +}; +#endif + +#if defined(KOKKOS_ENABLE_SYCL) +/** + * @brief Convert an `execution_space::sycl` enum value to a `Kokkos::SYCL` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::SYCL; +}; +#endif + +#if defined(KOKKOS_ENABLE_HPX) +/** + * @brief Convert an `execution_space::hpx` enum value to a `Kokkos::Experimental::HPX` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::Experimental::HPX; +}; +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +/** + * @brief Convert an `execution_space::openmp` enum value to a `Kokkos::OpenMP` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::OpenMP; +}; +#endif + +#if defined(KOKKOS_ENABLE_OPENMPTARGET) +/** + * @brief Convert an `execution_space::openmp_target` enum value to a `Kokkos::OpenMPTarget` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::OpenMPTarget; +}; +#endif + +#if defined(KOKKOS_ENABLE_OPENACC) +/** + * @brief Convert an `execution_space::openacc` enum value to a `Kokkos::Experimental::OpenACC` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::Experimental::OpenACC; +}; +#endif + +#if defined(KOKKOS_ENABLE_THREADS) +/** + * @brief Convert an `execution_space::threads` enum value to a `Kokkos::Threads` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::Threads; +}; +#endif + +#if defined(KOKKOS_ENABLE_SERIAL) +/** + * @brief Convert an `execution_space::serial` enum value to a `Kokkos::Serial` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::Serial; +}; +#endif + +/** + * @brief Convert the `execution_space` @p space to the corresponding Kokkos::ExecutionSpace type. + * @tparam space the enum value to convert + */ +template +using execution_space_to_kokkos_type_t = typename execution_space_to_kokkos_type::type; + +//***************************************************// +// kokkos_type_to_execution_space // +//***************************************************// + +/** + * @brief Uninstantiated base type to convert a Kokkos::ExecutionSpace type to a `execution_space` enum value. + */ +template +struct kokkos_type_to_execution_space; + +#if defined(KOKKOS_ENABLE_CUDA) +/** + * @brief Convert a `Kokkos::Cuda` Kokkos::ExecutionSpace type to an `execution_space::cuda` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::cuda; +}; +#endif + +#if defined(KOKKOS_ENABLE_HIP) +/** + * @brief Convert a `Kokkos::HIP` Kokkos::ExecutionSpace type to an `execution_space::hip` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::hip; +}; +#endif + +#if defined(KOKKOS_ENABLE_SYCL) +/** + * @brief Convert a `Kokkos::SYCL` Kokkos::ExecutionSpace type to an `execution_space::sycl` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::sycl; +}; +#endif + +#if defined(KOKKOS_ENABLE_HPX) +/** + * @brief Convert a `Kokkos::Experimental::HPX` Kokkos::ExecutionSpace type to an `execution_space::hpx` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::hpx; +}; +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +/** + * @brief Convert a `Kokkos::OpenMP` Kokkos::ExecutionSpace type to an `execution_space::openmp` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::openmp; +}; +#endif + +#if defined(KOKKOS_ENABLE_OPENMPTARGET) +/** + * @brief Convert a `Kokkos::OpenMPTarget` Kokkos::ExecutionSpace type to an `execution_space::openmp_target` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::openmp_target; +}; +#endif + +#if defined(KOKKOS_ENABLE_OPENACC) +/** + * @brief Convert a `Kokkos::Experimental::OpenACC` Kokkos::ExecutionSpace type to an `execution_space::openacc` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::openacc; +}; +#endif + +#if defined(KOKKOS_ENABLE_THREADS) +/** + * @brief Convert a `Kokkos::Threads` Kokkos::ExecutionSpace type to an `execution_space::threads` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::threads; +}; +#endif + +#if defined(KOKKOS_ENABLE_SERIAL) +/** + * @brief Convert a `Kokkos::Serial` Kokkos::ExecutionSpace type to an `execution_space::serial` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::serial; +}; +#endif + +/** + * @brief Convert the Kokkos::ExecutionSpace type @p ExecutionSpace to the corresponding `execution_space` enum value. + * @tparam ExecutionSpace the Kokkos::ExecutionSpace type to convert + */ +template +inline constexpr execution_space kokkos_type_to_execution_space_v = kokkos_type_to_execution_space::value; + +//***************************************************// +// other functions // +//***************************************************// + +namespace detail { + +/** + * @brief List all available Kokkos::ExecutionSpaces at compile time. + * @details At least one execution space must **always** be available! + * @return a `std::array` containing all available execution spaces (`[[nodiscard]]`) + */ +[[nodiscard]] inline constexpr auto constexpr_available_execution_spaces() noexcept { + // Note: the trailing comma is explicitly allowed by the standard + // Note: the order is intentionally chosen this way -> the order of the entries determines the priority when using a backend to run our code + return std::array{ +#if defined(KOKKOS_ENABLE_CUDA) + execution_space::cuda, +#endif +#if defined(KOKKOS_ENABLE_HIP) + execution_space::hip, +#endif +#if defined(KOKKOS_ENABLE_SYCL) + execution_space::sycl, +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + execution_space::openmp_target, +#endif +#if defined(KOKKOS_ENABLE_OPENACC) + execution_space::openacc, +#endif +#if defined(KOKKOS_ENABLE_OPENMP) + execution_space::openmp, +#endif +#if defined(KOKKOS_ENABLE_THREADS) + execution_space::threads, +#endif +#if defined(KOKKOS_ENABLE_HPX) + execution_space::hpx, +#endif +#if defined(KOKKOS_ENABLE_SERIAL) + execution_space::serial, +#endif + }; +} + +} // namespace detail + +/** + * @brief List all available Kokkos::ExecutionSpaces. + * @details Only Kokkos::ExecutionSpaces that where enabled during the CMake configuration are available. + * @return the available Kokkos::ExecutionSpaces (`[[nodiscard]]`) + */ +[[nodiscard]] std::vector list_available_execution_spaces(); + } // namespace plssvm::kokkos -/// @endcond +/// @cond template <> struct fmt::formatter : fmt::ostream_formatter { }; diff --git a/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp b/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp index 85997c118..bddadac01 100644 --- a/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp +++ b/include/plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp @@ -13,10 +13,9 @@ #define PLSSVM_BACKENDS_KOKKOS_CG_EXPLICIT_BLAS_HPP_ #pragma once -#include "plssvm/backends/Kokkos/detail/typedefs.hpp" // plssvm::kokkos::detail::device_view_type -#include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} +#include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} -#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents +#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::View, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents #include // std::size_t @@ -24,8 +23,16 @@ namespace plssvm::kokkos::detail { /** * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a `m x k` symmetric matrix (memory optimized), @p B is a `k x n` matrix, @p C is a `m x n` matrix, and @p alpha and @p beta are scalars. + * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel */ +template class device_kernel_symm { + /** + * @brief The type of the used Kokkos::View. + */ + template + using device_view_type = Kokkos::View; + public: /** * @brief Initialize the Kokkos kernel function object. @@ -40,6 +47,7 @@ class device_kernel_symm { * @param[in,out] C the matrix @p C, also used as result matrix * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_size_x the size of the execution grid in x-dimension */ device_kernel_symm(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type alpha, device_view_type A, device_view_type B, const real_type beta, device_view_type C, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) : num_rows_{ num_rows }, @@ -55,8 +63,12 @@ class device_kernel_symm { grid_y_offset_{ grid_y_offset }, grid_size_x_{ grid_size_x } { } + /** + * @brief Function call operator overload performing the actual calculation. + * @param[in] team the Kokkos team representing the current point in the execution space + */ KOKKOS_INLINE_FUNCTION - void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + void operator()(const typename Kokkos::TeamPolicy::member_type &team) const { // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); @@ -155,8 +167,16 @@ class device_kernel_symm { /** * @brief Perform an explicit BLAS SYMM operation: `C = alpha * A * B + beta * C` where @p A is a `m x k` symmetric matrix (memory optimized), @p B is a `k x n` matrix, @p C is a `m x n` matrix, and @p alpha and @p beta are scalars. * @details In a multi-GPU setting, this function is responsible for mirroring down the columns this device is responsible for! + * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel */ +template class device_kernel_symm_mirror { + /** + * @brief The type of the used Kokkos::View. + */ + template + using device_view_type = Kokkos::View; + public: /** * @brief Initialize the Kokkos kernel function object. @@ -172,6 +192,7 @@ class device_kernel_symm_mirror { * @param[in,out] C the matrix @p C, also used as result matrix * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_size_x the size of the execution grid in x-dimension */ device_kernel_symm_mirror(const std::size_t num_rows, const std::size_t num_rhs, const std::size_t num_mirror_rows, const std::size_t device_specific_num_rows, const std::size_t row_offset, const real_type alpha, device_view_type A, device_view_type B, const real_type beta, device_view_type C, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) : num_rows_{ num_rows }, @@ -188,8 +209,12 @@ class device_kernel_symm_mirror { grid_y_offset_{ grid_y_offset }, grid_size_x_{ grid_size_x } { } + /** + * @brief Function call operator overload performing the actual calculation. + * @param[in] team the Kokkos team representing the current point in the execution space + */ KOKKOS_INLINE_FUNCTION - void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + void operator()(const typename Kokkos::TeamPolicy::member_type &team) const { // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); @@ -278,8 +303,16 @@ class device_kernel_symm_mirror { /** * @brief Perform a simple inplace matrix addition: lhs += rhs. + * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel */ +template class device_kernel_inplace_matrix_add { + /** + * @brief The type of the used Kokkos::View. + */ + template + using device_view_type = Kokkos::View; + public: /** * @brief Initialize the Kokkos kernel function object. @@ -288,6 +321,7 @@ class device_kernel_inplace_matrix_add { * @param[in] rhs the second matrix * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_size_x the size of the execution grid in x-dimension */ device_kernel_inplace_matrix_add(const std::size_t num_cols, device_view_type lhs, device_view_type rhs, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) : num_cols_{ num_cols }, @@ -297,8 +331,12 @@ class device_kernel_inplace_matrix_add { grid_y_offset_{ grid_y_offset }, grid_size_x_{ grid_size_x } { } + /** + * @brief Function call operator overload performing the actual calculation. + * @param[in] team the Kokkos team representing the current point in the execution space + */ KOKKOS_INLINE_FUNCTION - void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + void operator()(const typename Kokkos::TeamPolicy::member_type &team) const { // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); @@ -337,8 +375,16 @@ class device_kernel_inplace_matrix_add { /** * @brief Perform a simple inplace matrix scale: lhs *= scalar. + * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel */ +template class device_kernel_inplace_matrix_scale { + /** + * @brief The type of the used Kokkos::View. + */ + template + using device_view_type = Kokkos::View; + public: /** * @brief Initialize the Kokkos kernel function object. @@ -347,6 +393,7 @@ class device_kernel_inplace_matrix_scale { * @param[in] scale the value to scale * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_size_x the size of the execution grid in x-dimension */ device_kernel_inplace_matrix_scale(const std::size_t num_cols, device_view_type lhs, const real_type scale, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) : num_cols_{ num_cols }, @@ -356,8 +403,12 @@ class device_kernel_inplace_matrix_scale { grid_y_offset_{ grid_y_offset }, grid_size_x_{ grid_size_x } { } + /** + * @brief Function call operator overload performing the actual calculation. + * @param[in] team the Kokkos team representing the current point in the execution space + */ KOKKOS_INLINE_FUNCTION - void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + void operator()(const typename Kokkos::TeamPolicy::member_type &team) const { // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); diff --git a/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp index 550dbfe0e..b3d46112d 100644 --- a/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp +++ b/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp @@ -14,12 +14,11 @@ #pragma once #include "plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp" // plssvm::kokkos::detail::standard_layout_tuple -#include "plssvm/backends/Kokkos/detail/typedefs.hpp" // plssvm::kokkos::detail::device_view_type #include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp" // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function} #include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents +#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::View, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents #include // std::size_t @@ -27,15 +26,21 @@ namespace plssvm::kokkos::detail { /** * @brief Create the explicit kernel matrix using the @p kernel_function. + * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel * @tparam kernel_function the type of the used kernel function * @tparam Args the types of the parameters necessary for the specific kernel function; stored in a `standard_layout_tuple` */ -template +template class device_kernel_assembly { + /** + * @brief The type of the used Kokkos::View. + */ + template + using device_view_type = Kokkos::View; + public: /** - * @brief Initialize the SYCL kernel function object. - * @param[in] cgh the SYCL handler used to allocate the local memory + * @brief Initialize the Kokkos kernel function object. * @param[out] kernel_matrix_d the calculated kernel matrix * @param[in] data_d the data points to calculate the kernel matrix from * @param[in] num_rows the number of data points @@ -47,6 +52,7 @@ class device_kernel_assembly { * @param[in] cost the cost factor the diagonal is scaled with * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_size_x the size of the execution grid in x-dimension * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function */ device_kernel_assembly(device_view_type kernel_matrix_d, device_view_type data_d, const std::size_t num_rows, const std::size_t device_num_rows, const std::size_t row_offset, const std::size_t num_features, device_view_type q, const real_type QA_cost, const real_type cost, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x, Args... kernel_function_parameter) : @@ -65,8 +71,12 @@ class device_kernel_assembly { kernel_function_parameter_{ detail::make_standard_layout_tuple(std::forward(kernel_function_parameter)...) } { } + /** + * @brief Function call operator overload performing the actual calculation. + * @param[in] team the Kokkos team representing the current point in the execution space + */ KOKKOS_INLINE_FUNCTION - void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + void operator()(const typename Kokkos::TeamPolicy::member_type &team) const { // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); @@ -104,10 +114,10 @@ class device_kernel_assembly { const auto global_j = row_offset_ + j_linear + static_cast(internal) * THREAD_BLOCK_SIZE_sz; // FEATURE_BLOCK_SIZE = 2 * THREAD_BLOCK_SIZE -> store twice as many values in the shared memory - data_cache_i(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y) * (num_rows_ + 1ull + PADDING_SIZE_sz) + global_i]; - data_cache_i(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ + 1ull + PADDING_SIZE_sz) + global_i]; - data_cache_j(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y) * (num_rows_ + 1ull + PADDING_SIZE_sz) + global_j]; - data_cache_j(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ + 1ull + PADDING_SIZE_sz) + global_j]; + data_cache_i(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_i]; + data_cache_i(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_i]; + data_cache_j(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_j]; + data_cache_j(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = data_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_rows_ + std::size_t{ 1 } + PADDING_SIZE_sz) + global_j]; } team.team_barrier(); // wait until all threads loaded their part of the data @@ -141,7 +151,7 @@ class device_kernel_assembly { temp_ij += cost_; } // update the kernel matrix - kernel_matrix_d_[device_global_j * (num_rows_ - row_offset_ + PADDING_SIZE_sz) - device_global_j * (device_global_j + 1ull) / 2ull + device_global_i] = temp_ij; + kernel_matrix_d_[device_global_j * (num_rows_ - row_offset_ + PADDING_SIZE_sz) - device_global_j * (device_global_j + std::size_t{ 1 }) / std::size_t{ 2 }; + device_global_i] = temp_ij; } } } diff --git a/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp b/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp index cf73cadb4..b22f69885 100644 --- a/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp +++ b/include/plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp @@ -14,12 +14,11 @@ #pragma once #include "plssvm/backends/Kokkos/detail/standard_layout_tuple.hpp" // plssvm::kokkos::detail::standard_layout_tuple -#include "plssvm/backends/Kokkos/detail/typedefs.hpp" // plssvm::kokkos::detail::device_view_type #include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp" // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function} #include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents, Kokkos::atomic_add +#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::View, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents, Kokkos::atomic_add #include // std::size_t @@ -27,11 +26,18 @@ namespace plssvm::kokkos::detail { /** * @brief Perform an implicit BLAS SYMM-like operation: `C = alpha * A * B + C` where `A` is the implicitly calculated kernel matrix using the @p kernel_function (never actually stored, reducing the amount of needed global memory), @p B and @p C are matrices, and @p alpha is a scalar. + * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel * @tparam kernel_function the type of the used kernel function * @tparam Args the types of the parameters necessary for the specific kernel function */ -template +template class device_kernel_assembly_symm { + /** + * @brief The type of the used Kokkos::View. + */ + template + using device_view_type = Kokkos::View; + public: /** * @brief Initialize the Kokkos kernel function object. @@ -49,6 +55,7 @@ class device_kernel_assembly_symm { * @param[in] num_classes the number of classes in the data set * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_size_x the size of the execution grid in x-dimension * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function */ device_kernel_assembly_symm(const real_type alpha, device_view_type q, device_view_type data_d, const std::size_t num_rows, const std::size_t device_num_rows, const std::size_t row_offset, const std::size_t num_features, const real_type QA_cost, const real_type cost, device_view_type B, device_view_type C, const std::size_t num_classes, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x, Args... kernel_function_parameter) : @@ -69,8 +76,12 @@ class device_kernel_assembly_symm { grid_size_x_{ grid_size_x }, kernel_function_parameter_{ detail::make_standard_layout_tuple(std::forward(kernel_function_parameter)...) } { } + /** + * @brief Function call operator overload performing the actual calculation. + * @param[in] team the Kokkos team representing the current point in the execution space + */ KOKKOS_INLINE_FUNCTION - void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + void operator()(const typename Kokkos::TeamPolicy::member_type &team) const { // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); diff --git a/include/plssvm/backends/Kokkos/kernel/detail/memset_kernel.hpp b/include/plssvm/backends/Kokkos/kernel/detail/memset_kernel.hpp new file mode 100644 index 000000000..584b1afdd --- /dev/null +++ b/include/plssvm/backends/Kokkos/kernel/detail/memset_kernel.hpp @@ -0,0 +1,56 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Defines a Kokkos function object for memsetting a device pointer with a specific value. + */ + +#ifndef PLSSVM_BACKENDS_KOKKOS_KERNEL_DETAIL_MEMSET_KERNEL_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_KERNEL_DETAIL_MEMSET_KERNEL_HPP_ +#pragma once + +#include "plssvm/constants.hpp" // plssvm::real_type + +#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION + +#include // std::size_t + +namespace plssvm::kokkos::detail { + +/** + * @brief A kernel to perform a memset-like operation on a Kokkos::View + */ +class device_memset_kernel { + public: + /** + * @brief Memset all bytes in @p data to the provided @p pattern. + * @param[out] data the array to memset + * @param[in] pattern the memset pattern + */ + device_memset_kernel(unsigned char* data, const unsigned char pattern) : + data_{ data }, + pattern_{ pattern } { } + + /** + * @brief Function call operator overload performing the actual calculation. + * @param[in] idx the index representing the current point in the execution space + */ + KOKKOS_INLINE_FUNCTION + void operator()(const std::size_t idx) const { + data_[idx] = pattern_; + } + + private: + /// @cond Doxygen_suppress + unsigned char* data_; + const unsigned char pattern_; + /// @endcond +}; + +} // namespace plssvm::kokkos::detail + +#endif // PLSSVM_BACKENDS_KOKKOS_KERNEL_DETAIL_MEMSET_KERNEL_HPP_ diff --git a/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp b/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp index 952b1e99f..3c6f9c8aa 100644 --- a/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp +++ b/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp @@ -17,7 +17,7 @@ #include "plssvm/detail/utility.hpp" // plssvm::detail::always_false_v #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "Kokkos_MathematicalFunctions.hpp" // Kokkos::pow, Kokkos::exp, Kokkos::tanh, Kokkos::abs +#include "Kokkos_MathematicalFunctions.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::pow, Kokkos::exp, Kokkos::tanh, Kokkos::abs #include // std::is_same_v @@ -59,7 +59,7 @@ KOKKOS_INLINE_FUNCTION real_type feature_reduce(const */ template <> KOKKOS_INLINE_FUNCTION real_type feature_reduce(const real_type val1, const real_type val2) { - return ::Kokkos::fabs(val1 - val2); + return Kokkos::fabs(val1 - val2); } /** @@ -73,9 +73,9 @@ template <> KOKKOS_INLINE_FUNCTION real_type feature_reduce(const real_type val1, const real_type val2) { const real_type d = val1 - val2; if constexpr (std::is_same_v) { - return (real_type{ 1.0 } / (val1 + val2 + FLT_MIN)) * d * d; // TODO: std::numeric_limits::min + return (real_type{ 1.0 } / (val1 + val2 + FLT_MIN)) * d * d; } else { - return (real_type{ 1.0 } / (val1 + val2 + DBL_MIN)) * d * d; // TODO: std::numeric_limits::min + return (real_type{ 1.0 } / (val1 + val2 + DBL_MIN)) * d * d; } } @@ -92,19 +92,19 @@ KOKKOS_INLINE_FUNCTION real_type feature_reduce -KOKKOS_INLINE_FUNCTION real_type apply_kernel_function(const real_type value, const detail::standard_layout_tuple params) { +KOKKOS_INLINE_FUNCTION real_type apply_kernel_function(const real_type value, [[maybe_unused]] const detail::standard_layout_tuple params) { if constexpr (kernel_function == kernel_function_type::linear) { return value; } else if constexpr (kernel_function == kernel_function_type::polynomial) { - return ::Kokkos::pow(detail::get<1>(params) * value + detail::get<2>(params), detail::get<0>(params)); + return Kokkos::pow(detail::get<1>(params) * value + detail::get<2>(params), detail::get<0>(params)); } else if constexpr (kernel_function == kernel_function_type::rbf) { - return ::Kokkos::exp(-detail::get<0>(params) * value); + return Kokkos::exp(-detail::get<0>(params) * value); } else if constexpr (kernel_function == kernel_function_type::sigmoid) { - return ::Kokkos::tanh(detail::get<0>(params) * value + detail::get<1>(params)); + return Kokkos::tanh(detail::get<0>(params) * value + detail::get<1>(params)); } else if constexpr (kernel_function == kernel_function_type::laplacian) { - return ::Kokkos::exp(-detail::get<0>(params) * value); + return Kokkos::exp(-detail::get<0>(params) * value); } else if constexpr (kernel_function == kernel_function_type::chi_squared) { - return ::Kokkos::exp(-detail::get<0>(params) * value); + return Kokkos::exp(-detail::get<0>(params) * value); } else { static_assert(::plssvm::detail::always_false_v, "Unsupported kernel function!"); } diff --git a/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp b/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp index c6a302d6d..767bfc958 100644 --- a/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp +++ b/include/plssvm/backends/Kokkos/kernel/predict_kernel.hpp @@ -13,12 +13,11 @@ #define PLSSVM_BACKENDS_KOKKOS_PREDICT_KERNEL_HPP_ #pragma once -#include "plssvm/backends/Kokkos/detail/typedefs.hpp" // plssvm::kokkos::detail::device_view_type #include "plssvm/backends/Kokkos/kernel/kernel_functions.hpp" // plssvm::kokkos::detail::{feature_reduce, apply_kernel_function} #include "plssvm/constants.hpp" // plssvm::{real_type, THREAD_BLOCK_SIZE, INTERNAL_BLOCK_SIZE, FEATURE_BLOCK_SIZE, PADDING_SIZE} #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents, Kokkos::atomic_add +#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::View, Kokkos::TeamPolicy, Kokkos::mdspan, Kokkos::dextents, Kokkos::atomic_add #include // std::size_t @@ -26,8 +25,16 @@ namespace plssvm::kokkos::detail { /** * @brief Calculate the `q` vector used to speedup the prediction using the linear kernel function. + * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel */ +template class device_kernel_w_linear { + /** + * @brief The type of the used Kokkos::View. + */ + template + using device_view_type = Kokkos::View; + public: /** * @brief Initialize the Kokkos kernel function object. @@ -40,6 +47,7 @@ class device_kernel_w_linear { * @param[in] sv_offset the first support vector (row in @p alpha_d) the current device is responsible for * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_size_x the size of the execution grid in x-dimension */ device_kernel_w_linear(device_view_type w_d, device_view_type alpha_d, device_view_type sv_d, const std::size_t num_classes, const std::size_t num_sv, const std::size_t device_specific_num_sv, const std::size_t sv_offset, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) : w_d_{ w_d }, @@ -53,8 +61,12 @@ class device_kernel_w_linear { grid_y_offset_{ grid_y_offset }, grid_size_x_{ grid_size_x } { } + /** + * @brief Function call operator overload performing the actual calculation. + * @param[in] team the Kokkos team representing the current point in the execution space + */ KOKKOS_INLINE_FUNCTION - void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + void operator()(const typename Kokkos::TeamPolicy::member_type &team) const { // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); @@ -132,8 +144,16 @@ class device_kernel_w_linear { /** * @brief Predict the @p predict_points_d using the linear kernel speeding up the calculation using the @p w_d vector. + * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel */ +template class device_kernel_predict_linear { + /** + * @brief The type of the used Kokkos::View. + */ + template + using device_view_type = Kokkos::View; + public: /** * @brief Initialize the Kokkos kernel function object. @@ -146,6 +166,7 @@ class device_kernel_predict_linear { * @param[in] num_features the number of features per data point * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_size_x the size of the execution grid in x-dimension */ device_kernel_predict_linear(device_view_type prediction_d, device_view_type w_d, device_view_type rho_d, device_view_type predict_points_d, const std::size_t num_classes, const std::size_t num_predict_points, const std::size_t num_features, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x) : prediction_d_{ prediction_d }, @@ -159,8 +180,12 @@ class device_kernel_predict_linear { grid_y_offset_{ grid_y_offset }, grid_size_x_{ grid_size_x } { } + /** + * @brief Function call operator overload performing the actual calculation. + * @param[in] team the Kokkos team representing the current point in the execution space + */ KOKKOS_INLINE_FUNCTION - void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + void operator()(const typename Kokkos::TeamPolicy::member_type &team) const { // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); @@ -242,11 +267,18 @@ class device_kernel_predict_linear { /** * @brief Predict the @p predict_points_d using the @p kernel_function. + * @tparam ExecutionSpace the Kokkos::ExecutionSpace used to execute the kernel * @tparam kernel_function the type of the used kernel function * @tparam Args the types of the parameters necessary for the specific kernel function */ -template +template class device_kernel_predict { + /** + * @brief The type of the used Kokkos::View. + */ + template + using device_view_type = Kokkos::View; + public: /** * @brief Initialize the SYCL kernel function object. @@ -261,6 +293,7 @@ class device_kernel_predict { * @param[in] num_features the number of features per data point * @param[in] grid_x_offset the offset in x-dimension into the data points if more than one execution grid has to be used * @param[in] grid_y_offset the offset in y-dimension into the data points if more than one execution grid has to be used + * @param[in] grid_size_x the size of the execution grid in x-dimension * @param[in] kernel_function_parameter the parameters necessary to apply the @p kernel_function */ device_kernel_predict(device_view_type prediction_d, device_view_type alpha_d, device_view_type rho_d, device_view_type sv_d, device_view_type predict_points_d, const std::size_t num_classes, const std::size_t num_sv, const std::size_t num_predict_points, const std::size_t num_features, const std::size_t grid_x_offset, const std::size_t grid_y_offset, const std::size_t grid_size_x, Args... kernel_function_parameter) : @@ -278,8 +311,12 @@ class device_kernel_predict { grid_size_x_{ grid_size_x }, kernel_function_parameter_{ detail::make_standard_layout_tuple(std::forward(kernel_function_parameter)...) } { } + /** + * @brief Function call operator overload performing the actual calculation. + * @param[in] team the Kokkos team representing the current point in the execution space + */ KOKKOS_INLINE_FUNCTION - void operator()(const Kokkos::TeamPolicy<>::member_type &team) const { + void operator()(const typename Kokkos::TeamPolicy::member_type &team) const { // cast all values to 64-bit std::size_t to prevent potential 32-bit overflows const auto INTERNAL_BLOCK_SIZE_sz = static_cast(INTERNAL_BLOCK_SIZE); const auto THREAD_BLOCK_SIZE_sz = static_cast(THREAD_BLOCK_SIZE); @@ -359,7 +396,7 @@ class device_kernel_predict { alpha_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = alpha_d_[(dim + threadIdx_y + THREAD_BLOCK_SIZE_sz) * (num_sv_ + PADDING_SIZE_sz) + global_sv_idx]; // the bias (rho) must only be applied once for all support vectors - if (blockIdx_y == 0ull) { + if (blockIdx_y == std::size_t{ 0 }) { out_cache(threadIdx_y, internal * THREAD_BLOCK_SIZE + threadIdx_x) = -rho_d_[dim + threadIdx_y]; out_cache(threadIdx_y + THREAD_BLOCK_SIZE, internal * THREAD_BLOCK_SIZE + threadIdx_x) = -rho_d_[dim + threadIdx_y + THREAD_BLOCK_SIZE_sz]; } else { diff --git a/src/plssvm/backends/Kokkos/CMakeLists.txt b/src/plssvm/backends/Kokkos/CMakeLists.txt index 89cf282ce..20ae3c0a6 100644 --- a/src/plssvm/backends/Kokkos/CMakeLists.txt +++ b/src/plssvm/backends/Kokkos/CMakeLists.txt @@ -23,6 +23,7 @@ message(CHECK_PASS "found") # explicitly set sources set(PLSSVM_KOKKOS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/detail/device_ptr.cpp + ${CMAKE_CURRENT_LIST_DIR}/detail/device_wrapper.cpp ${CMAKE_CURRENT_LIST_DIR}/detail/pinned_memory.cpp ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp ${CMAKE_CURRENT_LIST_DIR}/csvm.cpp diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index 1f4b0d8d5..0a1903c16 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -11,7 +11,8 @@ #include "plssvm/backends/execution_range.hpp" // plssvm::detail::{execution_range, dim_type} #include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_* #include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr -#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::kokkos::detail::get_runtime_version +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::{device_wrapper, get_device_list} +#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::kokkos::detail::get_runtime_version // TODO: docu #include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp" // plssvm::kokkos::detail::{device_kernel_symm, device_kernel_symm_mirror, device_kernel_inplace_matrix_add, device_kernel_inplace_matrix_scale} @@ -24,6 +25,7 @@ #include "plssvm/detail/logging.hpp" // plssvm::detail::log #include "plssvm/detail/memory_size.hpp" // plssvm::detail::memory_size #include "plssvm/detail/tracking/performance_tracker.hpp" // plssvm::detail::tracking::tracking_entry +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t #include "plssvm/detail/utility.hpp" // plssvm::detail::{get_system_memory, unreachable} #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type @@ -39,7 +41,9 @@ #include // std::size_t #include // std::terminate #include // std::cout, std::endl +#include // std::map #include // std::string +#include // std::move #include // std::vector namespace plssvm::kokkos { @@ -48,8 +52,7 @@ csvm::csvm(parameter params) : csvm{ plssvm::target_platform::automatic, params } { } csvm::csvm(target_platform target, parameter params) : - base_type{ params }, - space_{ determine_default_execution_space() } { + base_type{ params } { this->init(target); } @@ -80,23 +83,41 @@ void csvm::init(const target_platform target) { break; } + // get all available target_platform <-> Kokkos::ExecutionSpace combinations + const std::map> available_combinations = detail::available_target_platform_to_execution_space_mapping(); + + if (target == target_platform::automatic) { + // go through all combinations and choose the first execution space in order: gpu_nvidia -> gpu_amd -> gpu_intel -> cpu + for (const target_platform target_order : { target_platform::gpu_nvidia, target_platform::gpu_amd, target_platform::gpu_intel, target_platform::cpu }) { + if (::plssvm::detail::contains(available_combinations, target_order)) { + // the target platform is supported -> choose the first execution space to use in the Kokkos backend + space_ = available_combinations.at(target_order).front(); + target_ = target_order; + break; + } + } + } else { + // check whether the provided target platform is compatible with the currently available Kokkos::ExecutionSpaces + if (::plssvm::detail::contains(available_combinations, target)) { + // the target platform is supported -> choose the first execution space to use in the Kokkos backend + space_ = available_combinations.at(target).front(); + target_ = target; + } else { + // the provided target platform is unsupported -> throw an exception + throw backend_exception{ fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform {}!", fmt::join(list_available_execution_spaces(), ", "), target) }; + } + } + plssvm::detail::log(verbosity_level::full, - "\nUsing Kokkos ({}) as backend with the Kokkos::DefaultExecutionSpace \"{}\".\n", + "\nUsing Kokkos ({}) as backend with the Kokkos::ExecutionSpace \"{}\".\n", plssvm::detail::tracking::tracking_entry{ "dependencies", "kokkos_version", detail::get_kokkos_version() }, plssvm::detail::tracking::tracking_entry{ "dependencies", "kokkos_default_execution_space", space_ }); - // check whether the provided target platform is compatible with the Kokkos execution space + // output automatic target platform information if (target == target_platform::automatic) { - // determine the default target based on the provided Kokkos execution space - target_ = detail::determine_default_target_platform_from_execution_space(space_); plssvm::detail::log(verbosity_level::full, "Using {} as automatic target platform.\n", target_); - } else { - // check whether the provided target platform is compatible with the execution space - // throws a backend exception if the combination is invalid - detail::check_execution_space_target_platform_combination(space_, target); - target_ = target; } // get all available devices wrt the requested target platform @@ -116,7 +137,7 @@ void csvm::init(const target_platform target) { std::vector device_names{}; device_names.reserve(devices_.size()); for (typename std::vector::size_type device = 0; device < devices_.size(); ++device) { - const std::string device_name = detail::get_device_name(space_, devices_[device]); + const std::string device_name = detail::get_device_name(devices_[device]); plssvm::detail::log(verbosity_level::full, " [{}, {}]\n", device, @@ -147,21 +168,21 @@ std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const { case execution_space::cuda: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { - res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].cuda_device_prop().totalGlobalMem) }; + res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().cuda_device_prop().totalGlobalMem) }; } return res; }); case execution_space::hip: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { - res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].hip_device_prop().totalGlobalMem) }; + res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().hip_device_prop().totalGlobalMem) }; } return res; }); case execution_space::sycl: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { - res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].sycl_queue().get_device().get_info<::sycl::info::device::global_mem_size>()) }; + res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().sycl_queue().get_device().get_info<::sycl::info::device::global_mem_size>()) }; } return res; }); @@ -188,7 +209,7 @@ std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const case execution_space::sycl: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { - res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].sycl_queue().get_device().get_info<::sycl::info::device::max_mem_alloc_size>()) }; + res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().sycl_queue().get_device().get_info<::sycl::info::device::max_mem_alloc_size>()) }; } return res; }); @@ -213,22 +234,25 @@ std::size_t csvm::get_max_work_group_size(const std::size_t device_id) const { switch (space_) { case execution_space::cuda: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { - return static_cast(devices_[device_id].cuda_device_prop().maxThreadsPerBlock); + return static_cast(devices_[device_id].get().cuda_device_prop().maxThreadsPerBlock); }); case execution_space::hip: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { - return static_cast(devices_[device_id].hip_device_prop().maxThreadsPerBlock); + return static_cast(devices_[device_id].get().hip_device_prop().maxThreadsPerBlock); }); case execution_space::sycl: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { - return devices_[device_id].sycl_queue().get_device().get_info<::sycl::info::device::max_work_group_size>(); + return devices_[device_id].get().sycl_queue().get_device().get_info<::sycl::info::device::max_work_group_size>(); }); + case execution_space::openmp: + return 16; // TODO: most likely dependent on the number of cores in Kokkos... + case execution_space::serial: + // only one thread allowed in serial execution + return 1; case execution_space::openmp_target: case execution_space::openacc: - case execution_space::openmp: case execution_space::hpx: case execution_space::threads: - case execution_space::serial: throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; } // all possible cases should be handled by the previous switch @@ -243,21 +267,25 @@ ::plssvm::detail::dim_type csvm::get_max_grid_size(const std::size_t device_id) switch (space_) { case execution_space::cuda: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA(([&]() -> ::plssvm::detail::dim_type { - const cudaDeviceProp &prop = devices_[device_id].cuda_device_prop(); + // TODO: Kokkos only uses maxGridSize[0] + const cudaDeviceProp &prop = devices_[device_id].get().cuda_device_prop(); return { static_cast(prop.maxGridSize[0]), static_cast(prop.maxGridSize[1]), static_cast(prop.maxGridSize[2]) }; })); case execution_space::hip: + // TODO: Kokkos only uses maxGridSize[0] PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP(([&]() -> ::plssvm::detail::dim_type { - const hipDeviceProp &prop = devices_[device_id].hip_device_prop(); + const hipDeviceProp &prop = devices_[device_id].get().hip_device_prop(); return { static_cast(prop.maxGridSize[0]), static_cast(prop.maxGridSize[1]), static_cast(prop.maxGridSize[2]) }; })); + case execution_space::openmp: + return { 16, 16, 16 }; // TODO: correct values + case execution_space::serial: + return { 1, 1, 1 }; // TODO: correct values case execution_space::sycl: case execution_space::openmp_target: case execution_space::openacc: - case execution_space::openmp: case execution_space::hpx: case execution_space::threads: - case execution_space::serial: throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; } // all possible cases should be handled by the previous switch @@ -272,7 +300,6 @@ ::plssvm::detail::dim_type csvm::get_max_grid_size(const std::size_t device_id) auto csvm::run_assemble_kernel_matrix_explicit(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const parameter ¶ms, const device_ptr_type &data_d, const device_ptr_type &q_red_d, real_type QA_cost) const -> device_ptr_type { const unsigned long long num_rows_reduced = data_d.shape().x - 1; const unsigned long long num_features = data_d.shape().y; - const queue_type &device = devices_[device_id]; // calculate the number of data points this device is responsible for const unsigned long long device_specific_num_rows = data_distribution_->place_specific_num_rows(device_id); @@ -284,192 +311,213 @@ auto csvm::run_assemble_kernel_matrix_explicit(const std::size_t device_id, cons const ::plssvm::detail::triangular_data_distribution &dist = dynamic_cast<::plssvm::detail::triangular_data_distribution &>(*data_distribution_); const std::size_t num_entries_padded = dist.calculate_explicit_kernel_matrix_num_entries_padded(device_id); - device_ptr_type kernel_matrix_d{ num_entries_padded, device }; // only explicitly store the upper triangular matrix + device_ptr_type kernel_matrix_d{ num_entries_padded, devices_[device_id] }; // only explicitly store the upper triangular matrix const real_type cost_factor = real_type{ 1.0 } / params.cost; const std::size_t scratch_memory_size = static_cast(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); // save the team sizes const ::plssvm::detail::dim_type team_sizes = exec.block; - for (const auto &[partial_grid, offsets] : exec.grids) { - // create a Kokkos TeamPolicy - Kokkos::TeamPolicy<> team_policy(device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO); + return devices_[device_id].execute_and_return([&](auto &device) { + using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; + constexpr execution_space space = kokkos_type_to_execution_space_v; - switch (params.kernel_type) { - case kernel_function_type::linear: - { - using functor_type = detail::device_kernel_assembly; - Kokkos::parallel_for("assemble_kernel_matrix_explicit_linear", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x }); - } - break; - case kernel_function_type::polynomial: - { - using functor_type = detail::device_kernel_assembly; - Kokkos::parallel_for("assemble_kernel_matrix_explicit_polynomial", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, params.degree, std::get(params.gamma), params.coef0 }); - } - break; - case kernel_function_type::rbf: - { - using functor_type = detail::device_kernel_assembly; - Kokkos::parallel_for("assemble_kernel_matrix_explicit_rbf", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); - } - break; - case kernel_function_type::sigmoid: - { - using functor_type = detail::device_kernel_assembly; - Kokkos::parallel_for("assemble_kernel_matrix_explicit_sigmoid", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma), params.coef0 }); - } - break; - case kernel_function_type::laplacian: - { - using functor_type = detail::device_kernel_assembly; - Kokkos::parallel_for("assemble_kernel_matrix_explicit_laplacian", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); - } - break; - case kernel_function_type::chi_squared: - { - using functor_type = detail::device_kernel_assembly; - Kokkos::parallel_for("assemble_kernel_matrix_explicit_chi_squared", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get(), data_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); - } - break; + for (const auto &[partial_grid, offsets] : exec.grids) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()) }; + + switch (params.kernel_type) { + case kernel_function_type::linear: + { + using functor_type = detail::device_kernel_assembly; + Kokkos::parallel_for("assemble_kernel_matrix_explicit_linear", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get().get(), data_d.get().get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get().get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x }); + } + break; + case kernel_function_type::polynomial: + { + using functor_type = detail::device_kernel_assembly; + Kokkos::parallel_for("assemble_kernel_matrix_explicit_polynomial", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get().get(), data_d.get().get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get().get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, params.degree, std::get(params.gamma), params.coef0 }); + } + break; + case kernel_function_type::rbf: + { + using functor_type = detail::device_kernel_assembly; + Kokkos::parallel_for("assemble_kernel_matrix_explicit_rbf", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get().get(), data_d.get().get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get().get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + case kernel_function_type::sigmoid: + { + using functor_type = detail::device_kernel_assembly; + Kokkos::parallel_for("assemble_kernel_matrix_explicit_sigmoid", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get().get(), data_d.get().get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get().get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma), params.coef0 }); + } + break; + case kernel_function_type::laplacian: + { + using functor_type = detail::device_kernel_assembly; + Kokkos::parallel_for("assemble_kernel_matrix_explicit_laplacian", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get().get(), data_d.get().get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get().get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + case kernel_function_type::chi_squared: + { + using functor_type = detail::device_kernel_assembly; + Kokkos::parallel_for("assemble_kernel_matrix_explicit_chi_squared", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ kernel_matrix_d.get().get(), data_d.get().get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, q_red_d.get().get(), QA_cost, cost_factor, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + } } - } - detail::device_synchronize(device); + detail::device_synchronize(device); - return kernel_matrix_d; + return std::move(kernel_matrix_d); + }); } void csvm::run_blas_level_3_kernel_explicit(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const ::plssvm::detail::execution_range &mirror_exec, const real_type alpha, const device_ptr_type &A_d, const device_ptr_type &B_d, const real_type beta, device_ptr_type &C_d) const { const unsigned long long num_rhs = B_d.shape().x; const unsigned long long num_rows = B_d.shape().y; - const queue_type &device = devices_[device_id]; - // calculate the number of data points this device is responsible for - const unsigned long long device_specific_num_rows = data_distribution_->place_specific_num_rows(device_id); - // get the offset of the data points this device is responsible for - const unsigned long long row_offset = data_distribution_->place_row_offset(device_id); - // the necessary amount of scratch memory for the kernels - const std::size_t scratch_memory_size = static_cast(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); + devices_[device_id].execute([&](auto &device) { + using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; + constexpr execution_space space = kokkos_type_to_execution_space_v; - // save the team sizes - const ::plssvm::detail::dim_type team_sizes = exec.block; + // calculate the number of data points this device is responsible for + const unsigned long long device_specific_num_rows = data_distribution_->place_specific_num_rows(device_id); + // get the offset of the data points this device is responsible for + const unsigned long long row_offset = data_distribution_->place_row_offset(device_id); + // the necessary amount of scratch memory for the kernels + const std::size_t scratch_memory_size = static_cast(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); - for (const auto &[partial_grid, offsets] : exec.grids) { - // create a Kokkos TeamPolicy - Kokkos::TeamPolicy<> team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + // save the team sizes + const ::plssvm::detail::dim_type team_sizes = exec.block; - Kokkos::parallel_for("blas_level_3_kernel_explicit", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_symm(num_rows, num_rhs, device_specific_num_rows, row_offset, alpha, A_d.get(), B_d.get(), beta, C_d.get(), offsets.x, offsets.y, partial_grid.x)); - } + for (const auto &[partial_grid, offsets] : exec.grids) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; - // save the mirror team sizes - const ::plssvm::detail::dim_type mirror_team_sizes = mirror_exec.block; + Kokkos::parallel_for("blas_level_3_kernel_explicit", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_symm{ num_rows, num_rhs, device_specific_num_rows, row_offset, alpha, A_d.get().get(), B_d.get().get(), beta, C_d.get().get(), offsets.x, offsets.y, partial_grid.x }); + } - for (const auto &[partial_grid, offsets] : mirror_exec.grids) { - const unsigned long long num_mirror_rows = num_rows - row_offset - device_specific_num_rows; + // save the mirror team sizes + const ::plssvm::detail::dim_type mirror_team_sizes = mirror_exec.block; - if (num_mirror_rows > 0) { - // create a Kokkos TeamPolicy - Kokkos::TeamPolicy<> team_policy{ static_cast(partial_grid.total_size()), static_cast(mirror_team_sizes.total_size()), Kokkos::AUTO }; + for (const auto &[partial_grid, offsets] : mirror_exec.grids) { + const unsigned long long num_mirror_rows = num_rows - row_offset - device_specific_num_rows; - Kokkos::parallel_for("blas_level_3_kernel_explicit_mirror", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_symm_mirror(num_rows, num_rhs, num_mirror_rows, device_specific_num_rows, row_offset, alpha, A_d.get(), B_d.get(), beta, C_d.get(), offsets.x, offsets.y, partial_grid.x)); + if (num_mirror_rows > 0) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(mirror_team_sizes.total_size()), Kokkos::AUTO }; + + Kokkos::parallel_for("blas_level_3_kernel_explicit_mirror", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_symm_mirror{ num_rows, num_rhs, num_mirror_rows, device_specific_num_rows, row_offset, alpha, A_d.get().get(), B_d.get().get(), beta, C_d.get().get(), offsets.x, offsets.y, partial_grid.x }); + } } - } - detail::device_synchronize(device); + detail::device_synchronize(device); + }); } void csvm::run_inplace_matrix_addition(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, device_ptr_type &lhs_d, const device_ptr_type &rhs_d) const { const unsigned long long num_rhs = lhs_d.shape().x; - const queue_type &device = devices_[device_id]; - // save the team sizes - const ::plssvm::detail::dim_type team_sizes = exec.block; + devices_[device_id].execute([&](auto &device) { + using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; + constexpr execution_space space = kokkos_type_to_execution_space_v; - for (const auto &[partial_grid, offsets] : exec.grids) { - // create a Kokkos TeamPolicy - Kokkos::TeamPolicy<> team_policy{ static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + // save the team sizes + const ::plssvm::detail::dim_type team_sizes = exec.block; - Kokkos::parallel_for("inplace_matrix_addition", team_policy, detail::device_kernel_inplace_matrix_add(num_rhs, lhs_d.get(), rhs_d.get(), offsets.x, offsets.y, partial_grid.x)); - } - detail::device_synchronize(device); + for (const auto &[partial_grid, offsets] : exec.grids) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + + Kokkos::parallel_for("inplace_matrix_addition", team_policy, detail::device_kernel_inplace_matrix_add{ num_rhs, lhs_d.get().get(), rhs_d.get().get(), offsets.x, offsets.y, partial_grid.x }); + } + detail::device_synchronize(device); + }); } void csvm::run_inplace_matrix_scale(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, device_ptr_type &lhs_d, const real_type scale) const { const unsigned long long num_rhs = lhs_d.shape().x; - const queue_type &device = devices_[device_id]; - // save the team sizes - const ::plssvm::detail::dim_type team_sizes = exec.block; + devices_[device_id].execute([&](auto &device) { + using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; + constexpr execution_space space = kokkos_type_to_execution_space_v; - for (const auto &[partial_grid, offsets] : exec.grids) { - // create a Kokkos TeamPolicy - Kokkos::TeamPolicy<> team_policy{ static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + // save the team sizes + const ::plssvm::detail::dim_type team_sizes = exec.block; - Kokkos::parallel_for("inplace_matrix_scale", team_policy, detail::device_kernel_inplace_matrix_scale(num_rhs, lhs_d.get(), scale, offsets.x, offsets.y, partial_grid.x)); - } - detail::device_synchronize(device); + for (const auto &[partial_grid, offsets] : exec.grids) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + + Kokkos::parallel_for("inplace_matrix_scale", team_policy, detail::device_kernel_inplace_matrix_scale{ num_rhs, lhs_d.get().get(), scale, offsets.x, offsets.y, partial_grid.x }); + } + detail::device_synchronize(device); + }); } void csvm::run_assemble_kernel_matrix_implicit_blas_level_3(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const real_type alpha, const device_ptr_type &A_d, const parameter ¶ms, const device_ptr_type &q_red, const real_type QA_cost, const device_ptr_type &B_d, device_ptr_type &C_d) const { const unsigned long long num_rows_reduced = A_d.shape().x - 1; const unsigned long long num_features = A_d.shape().y; const unsigned long long num_classes = B_d.shape().x; - const queue_type &device = devices_[device_id]; - // calculate the number of data points this device is responsible for - const unsigned long long device_specific_num_rows = data_distribution_->place_specific_num_rows(device_id); - // get the offset of the data points this device is responsible for - const unsigned long long row_offset = data_distribution_->place_row_offset(device_id); + devices_[device_id].execute([&](auto &device) { + using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; + constexpr execution_space space = kokkos_type_to_execution_space_v; - const real_type cost_factor = real_type{ 1.0 } / params.cost; - const std::size_t scratch_memory_size = static_cast(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); + // calculate the number of data points this device is responsible for + const unsigned long long device_specific_num_rows = data_distribution_->place_specific_num_rows(device_id); + // get the offset of the data points this device is responsible for + const unsigned long long row_offset = data_distribution_->place_row_offset(device_id); - // save the team sizes - const ::plssvm::detail::dim_type team_sizes = exec.block; + const real_type cost_factor = real_type{ 1.0 } / params.cost; + const std::size_t scratch_memory_size = static_cast(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); - for (const auto &[partial_grid, offsets] : exec.grids) { - // create a Kokkos TeamPolicy - Kokkos::TeamPolicy<> team_policy(device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO); + // save the team sizes + const ::plssvm::detail::dim_type team_sizes = exec.block; - switch (params.kernel_type) { - case kernel_function_type::linear: - { - using functor_type = detail::device_kernel_assembly_symm; - Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_linear", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, partial_grid.x }); - } - break; - case kernel_function_type::polynomial: - { - using functor_type = detail::device_kernel_assembly_symm; - Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_polynomial", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, partial_grid.x, params.degree, std::get(params.gamma), params.coef0 }); - } - break; - case kernel_function_type::rbf: - { - using functor_type = detail::device_kernel_assembly_symm; - Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_rbf", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); - } - break; - case kernel_function_type::sigmoid: - { - using functor_type = detail::device_kernel_assembly_symm; - Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_sigmoid", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma), params.coef0 }); - } - break; - case kernel_function_type::laplacian: - { - using functor_type = detail::device_kernel_assembly_symm; - Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_laplacian", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); - } - break; - case kernel_function_type::chi_squared: - { - using functor_type = detail::device_kernel_assembly_symm; - Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_chi_squared", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get(), A_d.get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get(), C_d.get(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); - } - break; + for (const auto &[partial_grid, offsets] : exec.grids) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + + switch (params.kernel_type) { + case kernel_function_type::linear: + { + using functor_type = detail::device_kernel_assembly_symm; + Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_linear", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get().get(), A_d.get().get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get().get(), C_d.get().get(), num_classes, offsets.x, offsets.y, partial_grid.x }); + } + break; + case kernel_function_type::polynomial: + { + using functor_type = detail::device_kernel_assembly_symm; + Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_polynomial", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get().get(), A_d.get().get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get().get(), C_d.get().get(), num_classes, offsets.x, offsets.y, partial_grid.x, params.degree, std::get(params.gamma), params.coef0 }); + } + break; + case kernel_function_type::rbf: + { + using functor_type = detail::device_kernel_assembly_symm; + Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_rbf", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get().get(), A_d.get().get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get().get(), C_d.get().get(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + case kernel_function_type::sigmoid: + { + using functor_type = detail::device_kernel_assembly_symm; + Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_sigmoid", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get().get(), A_d.get().get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get().get(), C_d.get().get(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma), params.coef0 }); + } + break; + case kernel_function_type::laplacian: + { + using functor_type = detail::device_kernel_assembly_symm; + Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_laplacian", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get().get(), A_d.get().get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get().get(), C_d.get().get(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + case kernel_function_type::chi_squared: + { + using functor_type = detail::device_kernel_assembly_symm; + Kokkos::parallel_for("assemble_kernel_matrix_implicit_blas_level_3_chi_squared", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ alpha, q_red.get().get(), A_d.get().get(), num_rows_reduced, device_specific_num_rows, row_offset, num_features, QA_cost, cost_factor, B_d.get().get(), C_d.get().get(), num_classes, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + } } - } - detail::device_synchronize(device); + detail::device_synchronize(device); + }); } //***************************************************// @@ -481,27 +529,31 @@ auto csvm::run_w_kernel(const std::size_t device_id, const ::plssvm::detail::exe const unsigned long long num_sv = alpha_d.shape().y; const unsigned long long device_specific_num_sv = sv_d.shape().x; const unsigned long long num_features = sv_d.shape().y; - const queue_type &device = devices_[device_id]; // get the offset of the data points this device is responsible for const unsigned long long sv_offset = data_distribution_->place_row_offset(device_id); - device_ptr_type w_d{ shape{ num_classes, num_features }, shape{ PADDING_SIZE, PADDING_SIZE }, device }; + device_ptr_type w_d{ shape{ num_classes, num_features }, shape{ PADDING_SIZE, PADDING_SIZE }, devices_[device_id] }; const std::size_t scratch_memory_size = static_cast(2u * THREAD_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); // save the team sizes const ::plssvm::detail::dim_type team_sizes = exec.block; - for (const auto &[partial_grid, offsets] : exec.grids) { - // create a Kokkos TeamPolicy - Kokkos::TeamPolicy<> team_policy{ static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + return devices_[device_id].execute_and_return([&](auto &device) { + using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; + constexpr execution_space space = kokkos_type_to_execution_space_v; - Kokkos::parallel_for("w_kernel", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_w_linear(w_d.get(), alpha_d.get(), sv_d.get(), num_classes, num_sv, device_specific_num_sv, sv_offset, offsets.x, offsets.y, partial_grid.x)); - } - detail::device_synchronize(device); + for (const auto &[partial_grid, offsets] : exec.grids) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy team_policy{ static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + + Kokkos::parallel_for("w_kernel", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_w_linear{ w_d.get().get(), alpha_d.get().get(), sv_d.get().get(), num_classes, num_sv, device_specific_num_sv, sv_offset, offsets.x, offsets.y, partial_grid.x }); + } + detail::device_synchronize(device); - return w_d; + return std::move(w_d); + }); } auto csvm::run_predict_kernel(const std::size_t device_id, const ::plssvm::detail::execution_range &exec, const parameter ¶ms, const device_ptr_type &alpha_d, const device_ptr_type &rho_d, const device_ptr_type &sv_or_w_d, const device_ptr_type &predict_points_d) const -> device_ptr_type { @@ -509,61 +561,65 @@ auto csvm::run_predict_kernel(const std::size_t device_id, const ::plssvm::detai const unsigned long long num_predict_points = predict_points_d.shape().x; // = device_specific_num_rows const unsigned long long num_features = predict_points_d.shape().y; const unsigned long long num_sv = sv_or_w_d.shape().x; - const queue_type &device = devices_[device_id]; - device_ptr_type out_d{ shape{ num_predict_points, num_classes }, shape{ PADDING_SIZE, PADDING_SIZE }, device }; + device_ptr_type out_d{ shape{ num_predict_points, num_classes }, shape{ PADDING_SIZE, PADDING_SIZE }, devices_[device_id] }; const std::size_t scratch_memory_size = static_cast(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); // save the team sizes const ::plssvm::detail::dim_type team_sizes = exec.block; - for (const auto &[partial_grid, offsets] : exec.grids) { - // create a Kokkos TeamPolicy - Kokkos::TeamPolicy<> team_policy{ static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + return devices_[device_id].execute_and_return([&](auto &device) { + using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; + constexpr execution_space space = kokkos_type_to_execution_space_v; - switch (params.kernel_type) { - case kernel_function_type::linear: - { - using functor_type = detail::device_kernel_predict_linear; - Kokkos::parallel_for("predict_kernel_linear", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get(), sv_or_w_d.get(), rho_d.get(), predict_points_d.get(), num_classes, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x }); - } - break; - case kernel_function_type::polynomial: - { - using functor_type = detail::device_kernel_predict; - Kokkos::parallel_for("predict_kernel_polynomial", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, params.degree, std::get(params.gamma), params.coef0 }); - } - break; - case kernel_function_type::rbf: - { - using functor_type = detail::device_kernel_predict; - Kokkos::parallel_for("predict_kernel_rbf", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); - } - break; - case kernel_function_type::sigmoid: - { - using functor_type = detail::device_kernel_predict; - Kokkos::parallel_for("predict_kernel_sigmoid", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma), params.coef0 }); - } - break; - case kernel_function_type::laplacian: - { - using functor_type = detail::device_kernel_predict; - Kokkos::parallel_for("predict_kernel_laplacian", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); - } - break; - case kernel_function_type::chi_squared: - { - using functor_type = detail::device_kernel_predict; - Kokkos::parallel_for("predict_kernel_chi_squared", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get(), alpha_d.get(), rho_d.get(), sv_or_w_d.get(), predict_points_d.get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); - } - break; + for (const auto &[partial_grid, offsets] : exec.grids) { + // create a Kokkos TeamPolicy + Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + + switch (params.kernel_type) { + case kernel_function_type::linear: + { + using functor_type = detail::device_kernel_predict_linear; + Kokkos::parallel_for("predict_kernel_linear", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get().get(), sv_or_w_d.get().get(), rho_d.get().get(), predict_points_d.get().get(), num_classes, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x }); + } + break; + case kernel_function_type::polynomial: + { + using functor_type = detail::device_kernel_predict; + Kokkos::parallel_for("predict_kernel_polynomial", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get().get(), alpha_d.get().get(), rho_d.get().get(), sv_or_w_d.get().get(), predict_points_d.get().get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, params.degree, std::get(params.gamma), params.coef0 }); + } + break; + case kernel_function_type::rbf: + { + using functor_type = detail::device_kernel_predict; + Kokkos::parallel_for("predict_kernel_rbf", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get().get(), alpha_d.get().get(), rho_d.get().get(), sv_or_w_d.get().get(), predict_points_d.get().get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + case kernel_function_type::sigmoid: + { + using functor_type = detail::device_kernel_predict; + Kokkos::parallel_for("predict_kernel_sigmoid", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get().get(), alpha_d.get().get(), rho_d.get().get(), sv_or_w_d.get().get(), predict_points_d.get().get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma), params.coef0 }); + } + break; + case kernel_function_type::laplacian: + { + using functor_type = detail::device_kernel_predict; + Kokkos::parallel_for("predict_kernel_laplacian", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get().get(), alpha_d.get().get(), rho_d.get().get(), sv_or_w_d.get().get(), predict_points_d.get().get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + case kernel_function_type::chi_squared: + { + using functor_type = detail::device_kernel_predict; + Kokkos::parallel_for("predict_kernel_chi_squared", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), functor_type{ out_d.get().get(), alpha_d.get().get(), rho_d.get().get(), sv_or_w_d.get().get(), predict_points_d.get().get(), num_classes, num_sv, num_predict_points, num_features, offsets.x, offsets.y, partial_grid.x, std::get(params.gamma) }); + } + break; + } } - } - detail::device_synchronize(device); + detail::device_synchronize(device); - return out_d; + return std::move(out_d); + }); } } // namespace plssvm::kokkos diff --git a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp index cbf973ca4..dcd0f98d3 100644 --- a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp @@ -8,13 +8,16 @@ #include "plssvm/backends/Kokkos/detail/device_ptr.hpp" -#include "plssvm/backends/Kokkos/detail/typedefs.hpp" // plssvm::kokkos::detail::{device_view_type, host_view_type} -#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::detail::device_synchronize -#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception -#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT -#include "plssvm/shape.hpp" // plssvm::shape +#include "plssvm/backends/Kokkos/detail/device_view_wrapper.hpp" // plssvm::kokkos::detail::{device_view_wrapper, make_device_view_wrapper} +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper +#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::detail::device_synchronize +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/backends/Kokkos/kernel/detail/memset_kernel.hpp" // plssvm::kokkos::detail::device_fill_array +#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t +#include "plssvm/shape.hpp" // plssvm::shape -#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace, Kokkos::subview, Kokkos::parallel_for, KOKKOS_LAMBDA, Kokkos::deep_copy +#include "Kokkos_Core.hpp" // Kokkos::View, Kokkos::HostSpace, Kokkos::MemoryUnmanaged, Kokkos::subview, Kokkos::parallel_for, Kokkos::deep_copy #include "fmt/core.h" // fmt::format @@ -26,76 +29,94 @@ namespace plssvm::kokkos::detail { +/** + * @brief Typedef for a simple Kokkos::View always targeting the Kokkos::HostSpace. + * @tparam T the type of the view's data + */ template -device_ptr::device_ptr(const size_type size, const Kokkos::DefaultExecutionSpace &exec) : - device_ptr{ plssvm::shape{ size, 1 }, plssvm::shape{ 0, 0 }, exec } { } +using host_view_type = Kokkos::View; template -device_ptr::device_ptr(const plssvm::shape shape, const Kokkos::DefaultExecutionSpace &exec) : - device_ptr{ shape, plssvm::shape{ 0, 0 }, exec } { } +device_ptr::device_ptr(const size_type size, const device_wrapper &device) : + device_ptr{ plssvm::shape{ size, 1 }, plssvm::shape{ 0, 0 }, device } { } template -device_ptr::device_ptr(const plssvm::shape shape, const plssvm::shape padding, const Kokkos::DefaultExecutionSpace &exec) : - base_type{ shape, padding, exec } { - data_ = device_view_type{ "device_ptr_view", this->size_padded() }; +device_ptr::device_ptr(const plssvm::shape shape, const device_wrapper &device) : + device_ptr{ shape, plssvm::shape{ 0, 0 }, device } { } + +template +device_ptr::device_ptr(const plssvm::shape shape, const plssvm::shape padding, const device_wrapper &device) : + base_type{ shape, padding, device } { + data_ = make_device_view_wrapper(device.get_execution_space(), this->size_padded()); + this->memset(0); } template void device_ptr::memset(const int pattern, const size_type pos, const size_type num_bytes) { - PLSSVM_ASSERT(data_ != device_view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); if (pos >= this->size_padded()) { throw backend_exception{ fmt::format("Illegal access in memset!: {} >= {}", pos, this->size_padded()) }; } const size_type rnum_bytes = std::min(num_bytes, (this->size_padded() - pos) * sizeof(value_type)); - // create subview of the device data - auto data_subview = Kokkos::subview(data_, std::make_pair(pos, pos + (rnum_bytes / sizeof(value_type)))); - // fill subview with constant data - Kokkos::parallel_for("device_ptr_memset", num_bytes, KOKKOS_LAMBDA(const std::size_t idx) { - // Cast the view's data pointer to unsigned char* (byte access) - reinterpret_cast(data_subview.data())[idx] = pattern; }); + // TODO: use Kokkos ZeroMemset specialization? + data_.execute([&](const auto &data) { + using kokkos_execution_space_type = typename ::plssvm::detail::remove_cvref_t::execution_space; - detail::device_synchronize(queue_); + // create subview of the device data + auto *data_ptr = reinterpret_cast(data.data() + pos); + auto p = static_cast(pattern); + // memset subview + Kokkos::parallel_for("device_ptr_memset", + Kokkos::RangePolicy(0, rnum_bytes), + device_memset_kernel{ data_ptr, p }); + + detail::device_synchronize(queue_); + }); } template void device_ptr::fill(const value_type value, const size_type pos, const size_type count) { - PLSSVM_ASSERT(data_ != device_view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); if (pos >= this->size_padded()) { throw backend_exception{ fmt::format("Illegal access in fill!: {} >= {}", pos, this->size_padded()) }; } const size_type rcount = std::min(count, this->size_padded() - pos); - // create subview of the device data - auto data_subview = Kokkos::subview(data_, std::make_pair(pos, pos + rcount)); - // fill subview with constant data - Kokkos::deep_copy(data_subview, value); + data_.execute([&](const auto &data) { + // create subview of the device data + auto data_subview = Kokkos::subview(data, std::make_pair(pos, pos + rcount)); + // fill subview with constant data + Kokkos::deep_copy(data_subview, value); - detail::device_synchronize(queue_); + detail::device_synchronize(queue_); + }); } template void device_ptr::copy_to_device(const_host_pointer_type data_to_copy, const size_type pos, const size_type count) { - PLSSVM_ASSERT(data_ != device_view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); PLSSVM_ASSERT(data_to_copy != nullptr, "Invalid host pointer for the data to copy!"); const size_type rcount = std::min(count, this->size_padded() - pos); - // create view of the host data - const host_view_type host_view{ data_to_copy, rcount }; - // create subview of the device data - auto data_subview = Kokkos::subview(data_, std::make_pair(pos, pos + rcount)); - // copy the data to the device subview - Kokkos::deep_copy(data_subview, host_view); + data_.execute([&](const auto &data) { + // create view of the host data + const host_view_type host_view{ data_to_copy, rcount }; + // create subview of the device data + auto data_subview = Kokkos::subview(data, std::make_pair(pos, pos + rcount)); + // copy the data to the device subview + Kokkos::deep_copy(data_subview, host_view); - detail::device_synchronize(queue_); + detail::device_synchronize(queue_); + }); } template void device_ptr::copy_to_device_strided(const_host_pointer_type data_to_copy, const std::size_t spitch, const std::size_t width, const std::size_t height) { - PLSSVM_ASSERT(data_ != device_view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); PLSSVM_ASSERT(data_to_copy != nullptr, "Invalid host pointer for the data to copy!"); if (width > spitch) { @@ -121,25 +142,27 @@ void device_ptr::copy_to_device_strided(const_host_pointer_type data_to_copy, template void device_ptr::copy_to_host(host_pointer_type buffer, const size_type pos, const size_type count) const { - PLSSVM_ASSERT(data_ != device_view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); PLSSVM_ASSERT(buffer != nullptr, "Invalid host pointer for the data to copy!"); const size_type rcount = std::min(count, this->size_padded() - pos); - // create view of the host data - const host_view_type host_view{ buffer, rcount }; - // create subview of the device data - auto data_subview = Kokkos::subview(data_, std::make_pair(pos, pos + rcount)); - // copy the data to the host - Kokkos::deep_copy(host_view, data_subview); + data_.execute([&](const auto &data) { + // create view of the host data + const host_view_type host_view{ buffer, rcount }; + // create subview of the device data + auto data_subview = Kokkos::subview(data, std::make_pair(pos, pos + rcount)); + // copy the data to the host + Kokkos::deep_copy(host_view, data_subview); - detail::device_synchronize(queue_); + detail::device_synchronize(queue_); + }); } template void device_ptr::copy_to_other_device(device_ptr &target, const size_type pos, const size_type count) const { - PLSSVM_ASSERT(data_ != device_view_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); - PLSSVM_ASSERT(target.get() != device_view_type{}, "Invalid target pointer! Maybe target has been default constructed?"); + PLSSVM_ASSERT(data_ != device_pointer_type{}, "Invalid data pointer! Maybe *this has been default constructed?"); + PLSSVM_ASSERT(target.get() != device_pointer_type{}, "Invalid target pointer! Maybe target has been default constructed?"); const size_type rcount = std::min(count, this->size_padded() - pos); if (target.size_padded() < rcount) { diff --git a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp new file mode 100644 index 000000000..add12def4 --- /dev/null +++ b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp @@ -0,0 +1,106 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" + +#include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_* +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/detail/logging_without_performance_tracking.hpp" // plssvm::detail::log_untracked +#include "plssvm/detail/utility.hpp" // plssvm::detail::unreachable +#include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level + +#include "Kokkos_Core.hpp" // Kokkos::num_devices, Kokkos::ExecutionSpace + +#include // std::vector + +namespace plssvm::kokkos::detail { + +std::vector get_device_list(const execution_space space, [[maybe_unused]] const target_platform target) { + std::vector devices{}; + switch (space) { + case execution_space::cuda: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { + for (int device = 0; device < Kokkos::num_devices(); ++device) { + // create CUDA stream using the CUDA specific functions + cudaSetDevice(device); + cudaStream_t stream{}; + cudaStreamCreate(&stream); + // create Kokkos execution space for the specific device + // Note: it is important to pass the cudaStream_t lifetime to be managed by Kokkos + devices.emplace_back(Kokkos::Cuda(stream, Kokkos::Impl::ManageStream::yes)); + } + return devices; + }); + case execution_space::hip: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { + for (int device = 0; device < Kokkos::num_devices(); ++device) { + // HIP CUDA stream using the HIP specific functions + hipSetDevice(device); + hipStream_t stream{}; + hipStreamCreate(&stream); + // create Kokkos execution space for the specific device + // Note: it is important to pass the hipStream_t lifetime to be managed by Kokkos + devices.emplace_back(Kokkos::Hip(stream, Kokkos::Impl::ManageStream::yes)); + } + return devices; + }); + case execution_space::sycl: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { + // TODO: use all available devices -> not that trivial + // TODO: handle target <- if provide queue -> managed? + devices.emplace_back(Kokkos::SYCL{}); + return devices; + }); + case execution_space::hpx: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX([&]() { + devices.emplace_back(Kokkos::Hpx{}); + return devices; + }); + case execution_space::openmp: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP([&]() { + // Note: if OpenMP should be used as device must be set in order for it to work! + if (omp_get_nested() == 0) { + ::plssvm::detail::log_untracked(verbosity_level::full | verbosity_level::warning, + "WARNING: In order for Kokkos::OpenMP to work properly, we have to set \"omp_set_nested(1)\"!\n"); + // enable OMP_NESTED support + // Note: function is officially deprecated but still necessary for Kokkos::OpenMP to work properly + omp_set_nested(1); + } + devices.emplace_back(Kokkos::OpenMP{}); + return devices; + }); + case execution_space::openmp_target: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET([&]() { + // TODO: multi-GPU? + devices.emplace_back(Kokkos::OpenMPTarget{}); + return devices; + }); + case execution_space::openacc: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC([&]() { + // TODO: multi-GPU? + devices.emplace_back(Kokkos::OpenACC{}); + return devices; + }); + case execution_space::threads: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS([&]() { + devices.emplace_back(Kokkos::Threads{}); + return devices; + }); + case execution_space::serial: + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL([&]() { + devices.emplace_back(Kokkos::Serial{}); + return devices; + }); + } + // all possible cases should be handled by the previous switch + // -> silence missing return statement compiler warnings due to throw statement + ::plssvm::detail::unreachable(); +} + +} // namespace plssvm::kokkos::detail diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp index ac49ef532..f8521523b 100644 --- a/src/plssvm/backends/Kokkos/detail/utility.cpp +++ b/src/plssvm/backends/Kokkos/detail/utility.cpp @@ -9,160 +9,108 @@ #include "plssvm/backends/Kokkos/detail/utility.hpp" #include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_* -#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT -#include "plssvm/detail/utility.hpp" // plssvm::detail::unreachable +#include "plssvm/detail/string_utility.hpp" // plssvm::detail::as_lower_case +#include "plssvm/detail/utility.hpp" // plssvm::detail::contains #include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace, Kokkos::num_devices, Kokkos::Cuda, Kokkos::Hip, Kokkos::Sycl, Kokkos::Impl::ManageStream +#include "Kokkos_Core.hpp" // Kokkos::ExecutionSpace, Kokkos::Impl::ManageStream #include "Kokkos_Macros.hpp" // Kokkos macros #include "fmt/core.h" // fmt::format +#include // std::map #include // std::string #include // std::vector namespace plssvm::kokkos::detail { -target_platform determine_default_target_platform_from_execution_space(const execution_space space) { - switch (space) { - case execution_space::cuda: - return target_platform::gpu_nvidia; - case execution_space::hip: - return target_platform::gpu_amd; // TODO: or gpu_nvidia :/ - case execution_space::sycl: - case execution_space::openmp_target: - case execution_space::openacc: - return target_platform::gpu_nvidia; // TODO: what to return here? - case execution_space::openmp: - case execution_space::hpx: - case execution_space::threads: - case execution_space::serial: - return target_platform::cpu; +std::map> available_target_platform_to_execution_space_mapping() { + std::map> available_map{}; + + // TODO: only return really POSSIBLE target platforms? + // iterate over all available execution spaces + for (const execution_space space : list_available_execution_spaces()) { + switch (space) { + case execution_space::cuda: + // NVIDIA GPUs only + available_map[target_platform::gpu_nvidia].push_back(execution_space::cuda); + break; + case execution_space::hip: + // NVIDIA and AMD GPUs possible + available_map[target_platform::gpu_nvidia].push_back(execution_space::hip); + available_map[target_platform::gpu_amd].push_back(execution_space::hip); + break; + case execution_space::sycl: + case execution_space::openacc: + // all GPUs and CPU possible + available_map[target_platform::gpu_nvidia].push_back(execution_space::sycl); + available_map[target_platform::gpu_amd].push_back(execution_space::sycl); + available_map[target_platform::gpu_intel].push_back(execution_space::sycl); + available_map[target_platform::cpu].push_back(execution_space::sycl); + break; + case execution_space::openmp_target: + // all GPUs + available_map[target_platform::gpu_nvidia].push_back(execution_space::openmp_target); + available_map[target_platform::gpu_amd].push_back(execution_space::openmp_target); + available_map[target_platform::gpu_intel].push_back(execution_space::openmp_target); + break; + case execution_space::hpx: + case execution_space::openmp: + case execution_space::threads: + case execution_space::serial: + // all these execution spaces are CPU only + available_map[target_platform::cpu].push_back(space); + break; + } } - // all possible cases should be handled by the previous switch - // -> silence missing return statement compiler warnings due to throw statement - ::plssvm::detail::unreachable(); -} -void check_execution_space_target_platform_combination(const execution_space space, const target_platform target) { - PLSSVM_ASSERT(target != target_platform::automatic, "The provided target platform may not be the automatic target platform!"); + // the map must at least have one entry + PLSSVM_ASSERT(!available_map.empty(), "At least one target platform must be available!"); + // the automatic target platform must not be present + PLSSVM_ASSERT(!::plssvm::detail::contains(available_map, target_platform::automatic), "The automatic target platform may not be present!"); - switch (space) { - case execution_space::cuda: - if (target != target_platform::gpu_nvidia) { - throw backend_exception{ fmt::format("The target platform {} is not supported for Kokkos {} execution space!", target, space) }; - } - break; - case execution_space::hip: - if (target != target_platform::gpu_amd && target != target_platform::gpu_nvidia) { - throw backend_exception{ fmt::format("The target platform {} is not supported for Kokkos {} execution space!", target, space) }; - } - break; - case execution_space::sycl: - // SYCL may support all target platforms! - // TODO: use SYCL specific functions to check? - case execution_space::openmp_target: - // OpenMP Target Offloading may support all target platforms! - // TODO: use OpenMP Target Offloading specific functions to check? - case execution_space::openacc: - // OpenACC may support all target platforms! - // TODO: use OpenACC Target Offloading specific functions to check? - break; - case execution_space::openmp: - case execution_space::hpx: - case execution_space::threads: - case execution_space::serial: - if (target != target_platform::cpu) { - throw backend_exception{ fmt::format("The target platform {} is not supported for Kokkos {} execution space!", target, space) }; - } - break; - } + return available_map; } -std::vector get_device_list(const execution_space space, [[maybe_unused]] const target_platform target) { - std::vector devices{}; - switch (space) { +std::string get_device_name([[maybe_unused]] const device_wrapper &dev) { + switch (dev.get_execution_space()) { case execution_space::cuda: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { - for (int device = 0; device < Kokkos::num_devices(); ++device) { - // create CUDA stream using the CUDA specific functions - cudaSetDevice(device); - cudaStream_t stream{}; - cudaStreamCreate(&stream); - // create Kokkos execution space for the specific device - // Note: it is important to pass the cudaStream_t lifetime to be managed by Kokkos - devices.emplace_back(Kokkos::Cuda(stream, Kokkos::Impl::ManageStream::yes)); - } - return devices; + return std::string{ dev.get().cuda_device_prop().name }; }); case execution_space::hip: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { - for (int device = 0; device < Kokkos::num_devices(); ++device) { - // HIP CUDA stream using the HIP specific functions - hipSetDevice(device); - hipStream_t stream{}; - hipStreamCreate(&stream); - // create Kokkos execution space for the specific device - // Note: it is important to pass the hipStream_t lifetime to be managed by Kokkos - devices.emplace_back(Kokkos::Hip(stream, Kokkos::Impl::ManageStream::yes)); - } - return devices; + return std::string{ dev.get().hip_device_prop().name }; }); case execution_space::sycl: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { - // TODO: use all available devices -> not that trivial - // TODO: handle target - devices.emplace_back(Kokkos::SYCL{}); - return devices; + return dev.get().sycl_queue.get_device().get_info(); }); - case execution_space::openmp: case execution_space::hpx: - case execution_space::threads: - case execution_space::serial: - devices.emplace_back(Kokkos::DefaultExecutionSpace{}); - return devices; - case execution_space::openmp_target: - case execution_space::openacc: - // TODO: implement - throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space) }; - } - // all possible cases should be handled by the previous switch - // -> silence missing return statement compiler warnings due to throw statement - ::plssvm::detail::unreachable(); -} - -std::string get_device_name(const execution_space space, [[maybe_unused]] const Kokkos::DefaultExecutionSpace &exec) { - // TODO: implement for other backends! - switch (space) { - case execution_space::cuda: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { - return std::string{ exec.cuda_device_prop().name }; - }); - case execution_space::hip: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { - return std::string{ exec.hip_device_prop().name }; - }); - case execution_space::sycl: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { - return exec.sycl_queue.get_device().get_info(); - }); + return "HPX CPU host device"; case execution_space::openmp: - case execution_space::hpx: - case execution_space::threads: - case execution_space::serial: - return "CPU host device"; + return "OpenMP CPU host device"; case execution_space::openmp_target: + // TODO: device name? return "OpenMP target device"; case execution_space::openacc: + // TODO: device name? return "OpenACC target device"; + case execution_space::threads: + return "std::threads CPU host device"; + case execution_space::serial: + return "serial CPU host device"; } return "unknown"; } -void device_synchronize(const Kokkos::DefaultExecutionSpace &exec) { - exec.fence(); +void device_synchronize(const device_wrapper &dev) { + dev.execute([](const auto &device) { + device.fence(); + }); } std::string get_kokkos_version() { diff --git a/src/plssvm/backends/Kokkos/execution_space.cpp b/src/plssvm/backends/Kokkos/execution_space.cpp index 2f3472aa8..6179c496d 100644 --- a/src/plssvm/backends/Kokkos/execution_space.cpp +++ b/src/plssvm/backends/Kokkos/execution_space.cpp @@ -8,18 +8,14 @@ #include "plssvm/backends/Kokkos/execution_space.hpp" -#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT #include "plssvm/detail/string_utility.hpp" // plssvm::detail::to_lower_case -#include "plssvm/detail/utility.hpp" // plssvm::detail::unreachable -#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace, Kokkos macros, Kokkos ExecutionSpace types - -#include // std::ios::failbit -#include // std::istream -#include // std::ostream -#include // std::string -#include // std::is_same_v -#include // std::vector +#include // std::array +#include // std::ios::failbit +#include // std::istream +#include // std::ostream +#include // std::string +#include // std::vector namespace plssvm::kokkos { @@ -76,91 +72,9 @@ std::istream &operator>>(std::istream &in, execution_space &space) { return in; } -execution_space determine_default_execution_space() noexcept { - // determine the execution_space enumeration value based on the provided Kokkos execution space -#if defined(KOKKOS_ENABLE_CUDA) - if constexpr (std::is_same_v) { - return execution_space::cuda; - } -#endif -#if defined(KOKKOS_ENABLE_HIP) - if constexpr (std::is_same_v) { - return execution_space::hip; - } -#endif -#if defined(KOKKOS_ENABLE_SYCL) - if constexpr (std::is_same_v) { - return execution_space::sycl; - } -#endif -#if defined(KOKKOS_ENABLE_HPX) - if constexpr (std::is_same_v) { - return execution_space::hpx; - } -#endif -#if defined(KOKKOS_ENABLE_OPENMP) - if constexpr (std::is_same_v) { - return execution_space::openmp; - } -#endif -#if defined(KOKKOS_ENABLE_OPENMPTARGET) - if constexpr (std::is_same_v) { - return execution_space::openmp_target; - } -#endif -#if defined(KOKKOS_ENABLE_OPENACC) - if constexpr (std::is_same_v) { - return execution_space::openacc; - } -#endif -#if defined(KOKKOS_ENABLE_THREADS) - if constexpr (std::is_same_v) { - return execution_space::threads; - } -#endif -#if defined(KOKKOS_ENABLE_SERIAL) - if constexpr (std::is_same_v) { - return execution_space::serial; - } -#endif - // at least one execution space must always be available! - ::plssvm::detail::unreachable(); -} - -[[nodiscard]] std::vector available_execution_spaces() { - std::vector available_spaces{}; -#if defined(KOKKOS_ENABLE_CUDA) - available_spaces.push_back(execution_space::cuda); -#endif -#if defined(KOKKOS_ENABLE_HIP) - available_spaces.push_back(execution_space::hip); -#endif -#if defined(KOKKOS_ENABLE_SYCL) - available_spaces.push_back(execution_space::sycl); -#endif -#if defined(KOKKOS_ENABLE_HPX) - available_spaces.push_back(execution_space::hpx); -#endif -#if defined(KOKKOS_ENABLE_OPENMP) - available_spaces.push_back(execution_space::openmp); -#endif -#if defined(KOKKOS_ENABLE_OPENMPTARGET) - available_spaces.push_back(execution_space::openmp_target); -#endif -#if defined(KOKKOS_ENABLE_OPENACC) - available_spaces.push_back(execution_space::openacc); -#endif -#if defined(KOKKOS_ENABLE_THREADS) - available_spaces.push_back(execution_space::threads); -#endif -#if defined(KOKKOS_ENABLE_SERIAL) - available_spaces.push_back(execution_space::serial); -#endif - - // AT LEAST ONE execution space must ALWAYS be available - PLSSVM_ASSERT(!available_spaces.empty(), "Aat least one execution space must always be available!"); - - return available_spaces; +std::vector list_available_execution_spaces() { + constexpr auto arr = detail::constexpr_available_execution_spaces(); + return std::vector(arr.cbegin(), arr.cend()); } } // namespace plssvm::kokkos diff --git a/tests/backends/Kokkos/CMakeLists.txt b/tests/backends/Kokkos/CMakeLists.txt index f6925207f..e0686c2e4 100644 --- a/tests/backends/Kokkos/CMakeLists.txt +++ b/tests/backends/Kokkos/CMakeLists.txt @@ -10,7 +10,8 @@ set(PLSSVM_KOKKOS_TEST_NAME Kokkos_tests) # list all necessary sources set(PLSSVM_KOKKOS_TEST_SOURCES ${CMAKE_CURRENT_LIST_DIR}/detail/device_ptr.cpp - ${CMAKE_CURRENT_LIST_DIR}/detail/typedefs.cpp + ${CMAKE_CURRENT_LIST_DIR}/detail/device_view_wrapper.cpp + ${CMAKE_CURRENT_LIST_DIR}/detail/device_wrapper.cpp ${CMAKE_CURRENT_LIST_DIR}/detail/standard_layout_tuple.cpp ${CMAKE_CURRENT_LIST_DIR}/detail/pinned_memory.cpp ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp diff --git a/tests/backends/Kokkos/detail/device_ptr.cpp b/tests/backends/Kokkos/detail/device_ptr.cpp index 34b83eefa..c96a1ed87 100644 --- a/tests/backends/Kokkos/detail/device_ptr.cpp +++ b/tests/backends/Kokkos/detail/device_ptr.cpp @@ -10,6 +10,8 @@ #include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper + #include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace #include "tests/backends/generic_device_ptr_tests.hpp" // generic device pointer tests to instantiate @@ -23,10 +25,10 @@ template struct kokkos_device_ptr_test_type { using device_ptr_type = plssvm::kokkos::detail::device_ptr; - using queue_type = Kokkos::DefaultExecutionSpace; + using queue_type = plssvm::kokkos::detail::device_wrapper; static const queue_type &default_queue() { - static const queue_type queue{}; + static const queue_type queue{ Kokkos::DefaultExecutionSpace{} }; return queue; } }; diff --git a/tests/backends/Kokkos/detail/device_view_wrapper.cpp b/tests/backends/Kokkos/detail/device_view_wrapper.cpp new file mode 100644 index 000000000..026daaf1e --- /dev/null +++ b/tests/backends/Kokkos/detail/device_view_wrapper.cpp @@ -0,0 +1,77 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Tests for the device_view_wrapper class. + */ + +#include "plssvm/backends/Kokkos/detail/device_view_wrapper.hpp" + +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{execution_space, kokkos_type_to_execution_space_v} + +#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace, Kokkos::View + +#include "gtest/gtest.h" // TEST, EXPECT_EQ, EXPECT_TRUE, EXPECT_FALSE + +TEST(KokkosDeviceViewWrapper, default_construct) { + // default construct a device view wrapper + const plssvm::kokkos::detail::device_view_wrapper view{}; + + // per std::variant specification, the first type in the underlying variant is now the active member + // -> this always corresponds to the first entry in our constexpr_available_execution_spaces array + constexpr auto spaces = plssvm::kokkos::detail::constexpr_available_execution_spaces(); + EXPECT_EQ(view.get_execution_space(), spaces.front()); +} + +TEST(KokkosDeviceViewWrapper, construct) { + // construct a device view wrapper using the current Kokkos::DefaultExecutionSpace + const plssvm::kokkos::detail::device_view_wrapper view{ Kokkos::View{} }; + + // check that the device view is associated with the correct execution space + EXPECT_EQ(view.get_execution_space(), plssvm::kokkos::kokkos_type_to_execution_space_v); +} + +TEST(KokkosDeviceViewWrapper, get) { + // construct a device view wrapper using the current Kokkos::DefaultExecutionSpace + plssvm::kokkos::detail::device_view_wrapper view{ Kokkos::View{} }; + + // check that the returned Kokkos::View has the correct type + constexpr plssvm::kokkos::execution_space space = plssvm::kokkos::kokkos_type_to_execution_space_v; + ::testing::StaticAssertTypeEq()), Kokkos::View &>(); +} + +TEST(KokkosDeviceViewWrapper, get_const) { + // construct a device view wrapper using the current Kokkos::DefaultExecutionSpace + const plssvm::kokkos::detail::device_view_wrapper view{ Kokkos::View{} }; + + // check that the returned Kokkos::View has the correct type + constexpr plssvm::kokkos::execution_space space = plssvm::kokkos::kokkos_type_to_execution_space_v; + ::testing::StaticAssertTypeEq()), const Kokkos::View &>(); +} + +TEST(KokkosDeviceViewWrapper, get_execution_space) { + // construct a device wrapper using the current Kokkos::DefaultExecutionSpace + const plssvm::kokkos::detail::device_view_wrapper view{ Kokkos::View{} }; + + // check that the device view is associated with the correct execution space + EXPECT_EQ(view.get_execution_space(), plssvm::kokkos::kokkos_type_to_execution_space_v); +} + +TEST(KokkosDeviceViewWrapper, equality) { + const plssvm::kokkos::detail::device_view_wrapper view1{ Kokkos::View{} }; + const plssvm::kokkos::detail::device_view_wrapper view2{ Kokkos::View{} }; + + // should be equal + EXPECT_TRUE(view1 == view2); +} + +TEST(KokkosDeviceViewWrapper, inequality) { + const plssvm::kokkos::detail::device_view_wrapper view1{ Kokkos::View{} }; + const plssvm::kokkos::detail::device_view_wrapper view2{ Kokkos::View{} }; + + // should not be unequal + EXPECT_FALSE(view1 != view2); +} diff --git a/tests/backends/Kokkos/detail/device_wrapper.cpp b/tests/backends/Kokkos/detail/device_wrapper.cpp new file mode 100644 index 000000000..4547281ff --- /dev/null +++ b/tests/backends/Kokkos/detail/device_wrapper.cpp @@ -0,0 +1,115 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Tests for the device_wrapper class. + */ + +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" + +#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{execution_space, kokkos_type_to_execution_space_v} +#include "plssvm/detail/utility.hpp" // plssvm::detail::contains +#include "plssvm/target_platforms.hpp" // plssvm::target_platform + +#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace + +#include "tests/utility.hpp" // util::for_each_variant_type + +#include "gtest/gtest.h" // TEST, EXPECT_GE, EXPECT_EQ + +#include // std::vector + +TEST(KokkosDeviceWrapper, default_construct) { + // default construct a device wrapper + const plssvm::kokkos::detail::device_wrapper device{}; + + // per std::variant specification, the first type in the underlying variant is now the active member + // -> this always corresponds to the first entry in our constexpr_available_execution_spaces array + constexpr auto spaces = plssvm::kokkos::detail::constexpr_available_execution_spaces(); + EXPECT_EQ(device.get_execution_space(), spaces.front()); +} + +TEST(KokkosDeviceWrapper, construct) { + // construct a device wrapper using the current Kokkos::DefaultExecutionSpace + const plssvm::kokkos::detail::device_wrapper device{ Kokkos::DefaultExecutionSpace{} }; + + // check that the device is associated with the correct execution space + EXPECT_EQ(device.get_execution_space(), plssvm::kokkos::kokkos_type_to_execution_space_v); +} + +TEST(KokkosDeviceWrapper, get) { + // construct a device wrapper using the current Kokkos::DefaultExecutionSpace + plssvm::kokkos::detail::device_wrapper device{ Kokkos::DefaultExecutionSpace{} }; + + // check that the returned Kokkos::ExecutionSpace has the correct type + constexpr plssvm::kokkos::execution_space space = plssvm::kokkos::kokkos_type_to_execution_space_v; + ::testing::StaticAssertTypeEq()), Kokkos::DefaultExecutionSpace &>(); +} + +TEST(KokkosDeviceWrapper, get_const) { + // construct a device wrapper using the current Kokkos::DefaultExecutionSpace + const plssvm::kokkos::detail::device_wrapper device{ Kokkos::DefaultExecutionSpace{} }; + + // check that the returned Kokkos::ExecutionSpace has the correct type + constexpr plssvm::kokkos::execution_space space = plssvm::kokkos::kokkos_type_to_execution_space_v; + ::testing::StaticAssertTypeEq()), const Kokkos::DefaultExecutionSpace &>(); +} + +TEST(KokkosDeviceWrapper, get_execution_space) { + // construct a device wrapper using the current Kokkos::DefaultExecutionSpace + const plssvm::kokkos::detail::device_wrapper device{ Kokkos::DefaultExecutionSpace{} }; + + // check that the device is associated with the correct execution space + EXPECT_EQ(device.get_execution_space(), plssvm::kokkos::kokkos_type_to_execution_space_v); +} + +TEST(KokkosDeviceWrapper, equality) { + const plssvm::kokkos::detail::device_wrapper device1{ Kokkos::DefaultExecutionSpace{} }; + const plssvm::kokkos::detail::device_wrapper device2{ Kokkos::DefaultExecutionSpace{} }; + + // should be equal + EXPECT_TRUE(device1 == device2); +} + +TEST(KokkosDeviceWrapper, inequality) { + const plssvm::kokkos::detail::device_wrapper device1{ Kokkos::DefaultExecutionSpace{} }; + const plssvm::kokkos::detail::device_wrapper device2{ Kokkos::DefaultExecutionSpace{} }; + + // should not be unequal + EXPECT_FALSE(device1 != device2); +} + +struct device_list_test { + template + void operator()() const { + // get the default device list + const plssvm::kokkos::execution_space space = plssvm::kokkos::kokkos_type_to_execution_space_v; + plssvm::target_platform default_target{}; + for (const auto &[target, spaces] : plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping()) { + if (::plssvm::detail::contains(spaces, space)) { + default_target = target; + break; + } + } + const std::vector devices = plssvm::kokkos::detail::get_device_list(space, default_target); + + // check the number of returned devices + if (space == plssvm::kokkos::execution_space::cuda || space == plssvm::kokkos::execution_space::hip || space == plssvm::kokkos::execution_space::sycl) { + // TODO: OpenMP Target Offloading / OpenACC + // for the device execution spaces AT LEAST ONE device must be found + EXPECT_GE(devices.size(), 1); + } else { + // for all other execution spaces EXACTLY ONE device must be found + EXPECT_EQ(devices.size(), 1); + } + } +}; + +TEST(KokkosDeviceWrapper, get_device_list) { + using variant_type = typename plssvm::kokkos::detail::impl::create_device_variant_type::type; + util::for_each_variant_type(device_list_test{}); +} diff --git a/tests/backends/Kokkos/detail/typedefs.cpp b/tests/backends/Kokkos/detail/typedefs.cpp deleted file mode 100644 index 4e25d4a6c..000000000 --- a/tests/backends/Kokkos/detail/typedefs.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/** - * @author Alexander Van Craen - * @author Marcel Breyer - * @copyright 2018-today The PLSSVM project - All Rights Reserved - * @license This file is part of the PLSSVM project which is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - * - * @brief Tests for the Kokkos::View typedefs. - */ - -#include "plssvm/backends/Kokkos/detail/typedefs.hpp" // plssvm::kokkos::detail::{device_view_type, host_view_type} - -#include "Kokkos_Core.hpp" // Kokkos::View, Kokkos::DefaultExecutionSpace, Kokkos::HostSpace, Kokkos::MemoryUnmanaged - -#include "gtest/gtest.h" // TEST, ::testing::StaticAssertTypeEq - -TEST(KokkosTypedefs, device_view_type) { - // test device view typedefs - ::testing::StaticAssertTypeEq, plssvm::kokkos::detail::device_view_type>(); - ::testing::StaticAssertTypeEq, plssvm::kokkos::detail::device_view_type>(); -} - -TEST(KokkosTypedefs, host_view_type) { - // test host view typedefs - ::testing::StaticAssertTypeEq, plssvm::kokkos::detail::host_view_type>(); - ::testing::StaticAssertTypeEq, plssvm::kokkos::detail::host_view_type>(); -} diff --git a/tests/backends/Kokkos/detail/utility.cpp b/tests/backends/Kokkos/detail/utility.cpp index ab49f1034..7c6d491d5 100644 --- a/tests/backends/Kokkos/detail/utility.cpp +++ b/tests/backends/Kokkos/detail/utility.cpp @@ -10,102 +10,69 @@ #include "plssvm/backends/Kokkos/detail/utility.hpp" -#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception -#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space -#include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{execution_space, kokkos_type_to_execution_space_v} +#include "plssvm/detail/utility.hpp" // plssvm::detail::contains +#include "plssvm/target_platforms.hpp" // plssvm::target_platform -#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace +#include "Kokkos_Core.hpp" // Kokkos::ExecutionSpace #include "tests/custom_test_macros.hpp" // EXPECT_THROW_WHAT +#include "tests/utility.hpp" // util::for_each_variant_type #include "fmt/core.h" // fmt::format #include "gmock/gmock.h" // EXPECT_THAT; ::testing::AnyOf -#include "gtest/gtest.h" // TEST, EXPECT_GE, EXPECT_NE - -#include // std::regex, std::regex::extended, std::regex_match -#include // std::string -#include // std::vector - -TEST(KokkosUtility, determine_default_target_platform_from_execution_space) { - // determine the potential default target platform - EXPECT_EQ(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::cuda), plssvm::target_platform::gpu_nvidia); - EXPECT_THAT(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::hip), ::testing::AnyOf(plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd)); - EXPECT_NE(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::sycl), plssvm::target_platform::automatic); - EXPECT_EQ(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::hpx), plssvm::target_platform::cpu); - EXPECT_EQ(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::openmp), plssvm::target_platform::cpu); - EXPECT_NE(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::openmp_target), plssvm::target_platform::automatic); - EXPECT_NE(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::openacc), plssvm::target_platform::automatic); - EXPECT_EQ(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::threads), plssvm::target_platform::cpu); - EXPECT_EQ(plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(plssvm::kokkos::execution_space::serial), plssvm::target_platform::cpu); +#include "gtest/gtest.h" // TEST, EXPECT_NE + +#include // std::map +#include // std::regex, std::regex::extended, std::regex_match +#include // std::string +#include // std::variant +#include // std::vector + +TEST(KokkosUtility, is_type_in_variant) { + // check type trait that determines if a type is contained in a type trait + using variant_type = std::variant; + + EXPECT_TRUE((plssvm::kokkos::detail::impl::is_type_in_variant_v) ); + EXPECT_TRUE((plssvm::kokkos::detail::impl::is_type_in_variant_v) ); + EXPECT_TRUE((plssvm::kokkos::detail::impl::is_type_in_variant_v) ); + EXPECT_TRUE((plssvm::kokkos::detail::impl::is_type_in_variant_v) ); + EXPECT_FALSE((plssvm::kokkos::detail::impl::is_type_in_variant_v) ); + EXPECT_FALSE((plssvm::kokkos::detail::impl::is_type_in_variant_v) ); } -TEST(KokkosUtility, check_execution_space_target_platform_combination) { - // check some execution_space <-> target_platform combinations - // the cuda execution space only supports the NVIDIA GPU - EXPECT_NO_THROW(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::cuda, plssvm::target_platform::gpu_nvidia)); - EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::cuda, plssvm::target_platform::gpu_amd), - plssvm::kokkos::backend_exception, - "The target platform gpu_amd is not supported for Kokkos Cuda execution space!"); - EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::cuda, plssvm::target_platform::gpu_intel), - plssvm::kokkos::backend_exception, - "The target platform gpu_intel is not supported for Kokkos Cuda execution space!"); - EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::cuda, plssvm::target_platform::cpu), - plssvm::kokkos::backend_exception, - "The target platform cpu is not supported for Kokkos Cuda execution space!"); - - // the hip execution space only supports the NVIDIA and AMD GPUs - EXPECT_NO_THROW(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::hip, plssvm::target_platform::gpu_nvidia)); - EXPECT_NO_THROW(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::hip, plssvm::target_platform::gpu_amd)); - EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::hip, plssvm::target_platform::gpu_intel), - plssvm::kokkos::backend_exception, - "The target platform gpu_intel is not supported for Kokkos HIP execution space!"); - EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(plssvm::kokkos::execution_space::hip, plssvm::target_platform::cpu), - plssvm::kokkos::backend_exception, - "The target platform cpu is not supported for Kokkos HIP execution space!"); +TEST(KokkosUtility, available_target_platform_to_execution_space_mapping) { + // get the target_platform <-> execution_space mappings + const std::map> mapping = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping(); - // TODO: SYCL - // TODO: OpenMP target - // TODO: OpenACC + // the map must not be empty + EXPECT_FALSE(mapping.empty()); - // the remaining execution spaces all only support CPUs! - for (const plssvm::kokkos::execution_space exec : { plssvm::kokkos::execution_space::hpx, plssvm::kokkos::execution_space::openmp, plssvm::kokkos::execution_space::threads, plssvm::kokkos::execution_space::serial }) { - EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(exec, plssvm::target_platform::gpu_nvidia), - plssvm::kokkos::backend_exception, - fmt::format("The target platform gpu_nvidia is not supported for Kokkos {} execution space!", exec)); - EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(exec, plssvm::target_platform::gpu_amd), - plssvm::kokkos::backend_exception, - fmt::format("The target platform gpu_amd is not supported for Kokkos {} execution space!", exec)); - EXPECT_THROW_WHAT(plssvm::kokkos::detail::check_execution_space_target_platform_combination(exec, plssvm::target_platform::gpu_intel), - plssvm::kokkos::backend_exception, - fmt::format("The target platform gpu_intel is not supported for Kokkos {} execution space!", exec)); - EXPECT_NO_THROW(plssvm::kokkos::detail::check_execution_space_target_platform_combination(exec, plssvm::target_platform::cpu)); + // each vector must at least have one entry + the automatic target platform must not be present + for (const auto &[target, spaces] : mapping) { + EXPECT_NE(target, plssvm::target_platform::automatic); + EXPECT_GE(spaces.size(), 1); } } -TEST(KokkosUtility, get_device_list) { - // get the default device list - const plssvm::kokkos::execution_space space = plssvm::kokkos::determine_default_execution_space(); - const plssvm::target_platform target = plssvm::kokkos::detail::determine_default_target_platform_from_execution_space(space); - const std::vector devices = plssvm::kokkos::detail::get_device_list(space, target); +struct device_name_test { + template + void operator()() const { + // get the device name of the default Kokkos execution space + const std::string name = plssvm::kokkos::detail::get_device_name(plssvm::kokkos::detail::device_wrapper{ ExecutionSpace{} }); + SCOPED_TRACE(name); - // check the number of returned devices - if (space == plssvm::kokkos::execution_space::cuda || space == plssvm::kokkos::execution_space::hip || space == plssvm::kokkos::execution_space::sycl) { - // for the device execution spaces AT LEAST ONE device must be found - EXPECT_GE(devices.size(), 1); - } else { - // for all other execution spaces EXACTLY ONE device must be found - EXPECT_EQ(devices.size(), 1); + // the returned device name may not be empty or unknown + EXPECT_FALSE(name.empty()); + EXPECT_NE(name, std::string{ "unknown" }); } -} +}; TEST(KokkosUtility, get_device_name) { - // get the device name of the default Kokkos execution space - const plssvm::kokkos::execution_space space = plssvm::kokkos::determine_default_execution_space(); - const std::string name = plssvm::kokkos::detail::get_device_name(space, Kokkos::DefaultExecutionSpace{}); - - // the returned device name may not be empty or unknown - EXPECT_FALSE(name.empty()); - EXPECT_NE(name, std::string{ "unknown" }); + using variant_type = typename plssvm::kokkos::detail::impl::create_device_variant_type::type; + util::for_each_variant_type(device_name_test{}); } TEST(KokkosUtility, get_kokkos_version) { diff --git a/tests/backends/Kokkos/execution_space.cpp b/tests/backends/Kokkos/execution_space.cpp index c0cec6f45..2073d1fd4 100644 --- a/tests/backends/Kokkos/execution_space.cpp +++ b/tests/backends/Kokkos/execution_space.cpp @@ -68,12 +68,74 @@ TEST(KokkosExecutionSpace, from_string_unknown) { EXPECT_TRUE(input.fail()); } -TEST(KokkosExecutionSpace, determine_execution_space) { - // check that "unreachable" is never reached - EXPECT_THAT(plssvm::kokkos::determine_default_execution_space(), ::testing::AnyOf(plssvm::kokkos::execution_space::cuda, plssvm::kokkos::execution_space::hip, plssvm::kokkos::execution_space::sycl, plssvm::kokkos::execution_space::hpx, plssvm::kokkos::execution_space::openmp, plssvm::kokkos::execution_space::openmp_target, plssvm::kokkos::execution_space::openacc, plssvm::kokkos::execution_space::threads, plssvm::kokkos::execution_space::serial)); +TEST(KokkosExecutionSpace, execution_space_to_kokkos_type) { + // check conversions +#if defined(KOKKOS_ENABLE_CUDA) + ::testing::StaticAssertTypeEq, Kokkos::Cuda>(); +#endif +#if defined(KOKKOS_ENABLE_HIP) + ::testing::StaticAssertTypeEq, Kokkos::HIP>(); +#endif +#if defined(KOKKOS_ENABLE_SYCL) + ::testing::StaticAssertTypeEq, Kokkos::SYCL>(); +#endif +#if defined(KOKKOS_ENABLE_HPX) + ::testing::StaticAssertTypeEq, Kokkos::Experimental::HPX>(); +#endif +#if defined(KOKKOS_ENABLE_OPENMP) + ::testing::StaticAssertTypeEq, Kokkos::OpenMP>(); +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + ::testing::StaticAssertTypeEq, Kokkos::OpenMPTarget>(); +#endif +#if defined(KOKKOS_ENABLE_OPENACC) + ::testing::StaticAssertTypeEq, Kokkos::OpenACC>(); +#endif +#if defined(KOKKOS_ENABLE_THREADS) + ::testing::StaticAssertTypeEq, Kokkos::Threads>(); +#endif +#if defined(KOKKOS_ENABLE_SERIAL) + ::testing::StaticAssertTypeEq, Kokkos::Serial>(); +#endif } -TEST(KokkosExecutionSpace, available_execution_spaces) { +TEST(KokkosExecutionSpace, kokkos_type_to_execution_space) { + // check conversions +#if defined(KOKKOS_ENABLE_CUDA) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::cuda); +#endif +#if defined(KOKKOS_ENABLE_HIP) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::hip); +#endif +#if defined(KOKKOS_ENABLE_SYCL) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::sycl); +#endif +#if defined(KOKKOS_ENABLE_HPX) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::hpx); +#endif +#if defined(KOKKOS_ENABLE_OPENMP) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::openmp); +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::openmp_target); +#endif +#if defined(KOKKOS_ENABLE_OPENACC) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::openacc); +#endif +#if defined(KOKKOS_ENABLE_THREADS) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::threads); +#endif +#if defined(KOKKOS_ENABLE_SERIAL) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::serial); +#endif +} + +TEST(KokkosExecutionSpace, constexpr_available_execution_spaces) { + // at least one execution space must always be available + EXPECT_FALSE(plssvm::kokkos::detail::constexpr_available_execution_spaces().empty()); +} + +TEST(KokkosExecutionSpace, list_available_execution_spaces) { // at least one execution space must always be available - EXPECT_FALSE(plssvm::kokkos::available_execution_spaces().empty()); + EXPECT_FALSE(plssvm::kokkos::list_available_execution_spaces().empty()); } diff --git a/tests/backends/generic_csvm_tests.hpp b/tests/backends/generic_csvm_tests.hpp index 562785728..4c5d59738 100644 --- a/tests/backends/generic_csvm_tests.hpp +++ b/tests/backends/generic_csvm_tests.hpp @@ -36,6 +36,7 @@ #include "tests/utility.hpp" // util::{redirect_output, generate_specific_matrix, construct_from_tuple, flatten, generate_random_matrix} #include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" #include "gmock/gmock.h" // ::testing::HasSubstr #include "gtest/gtest.h" // TYPED_TEST_SUITE_P, TYPED_TEST_P, REGISTER_TYPED_TEST_SUITE_P, EXPECT_EQ, EXPECT_NE, EXPECT_GT, EXPECT_TRUE, EXPECT_DEATH, // ASSERT_EQ, GTEST_SKIP, SUCCEED, ::testing::Test @@ -891,7 +892,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix_minimal) { const mock_csvm_type svm = util::construct_from_tuple(params, csvm_test_type::additional_arguments); const std::size_t num_devices = svm.num_available_devices(); // be sure to use the correct data distribution - svm.data_distribution_ = std::make_unique(data.num_rows() - 1, 1); + svm.data_distribution_ = std::make_unique(data.num_rows() - 1, num_devices); // automatic solver type not permitted if constexpr (solver == plssvm::solver_type::automatic) { @@ -1001,7 +1002,7 @@ TYPED_TEST_P(GenericCSVMSolverKernelFunction, assemble_kernel_matrix) { const mock_csvm_type svm = util::construct_from_tuple(params, csvm_test_type::additional_arguments); const std::size_t num_devices = svm.num_available_devices(); // be sure to use the correct data distribution - svm.data_distribution_ = std::make_unique(data.num_rows() - 1, 1); + svm.data_distribution_ = std::make_unique(data.num_rows() - 1, num_devices); // automatic solver type not permitted if constexpr (solver == plssvm::solver_type::automatic) { diff --git a/tests/backends/generic_gpu_csvm_tests.hpp b/tests/backends/generic_gpu_csvm_tests.hpp index dea31b85c..38264f507 100644 --- a/tests/backends/generic_gpu_csvm_tests.hpp +++ b/tests/backends/generic_gpu_csvm_tests.hpp @@ -156,7 +156,7 @@ TYPED_TEST_P(GenericGPUCSVM, run_blas_level_3_kernel_explicit) { ground_truth::device_specific_gemm(alpha, full_kernel_matrix, B, correct_C, *svm.data_distribution_, device_id); // check C for correctness - EXPECT_FLOATING_POINT_MATRIX_NEAR(C_res, correct_C); + EXPECT_FLOATING_POINT_MATRIX_NEAR_EPS(C_res, correct_C, 1e6); } } diff --git a/tests/utility.hpp b/tests/utility.hpp index 8e4f51e4f..61a20451d 100644 --- a/tests/utility.hpp +++ b/tests/utility.hpp @@ -46,7 +46,8 @@ #include // std::string #include // std::tuple, std::make_tuple, std::get, std::tuple_size #include // std::is_floating_point_v, std::is_same_v, std::is_signed_v, std::is_unsigned_v, std::decay_t -#include // std::pair, std::make_pair, std::move, std::make_index_sequence, std::index_sequence +#include // std::pair, std::make_pair, std::move, std::make_index_sequence, std::index_sequence, std::forward +#include // std::variant_size_v, std::variant_alternative_t #include // std::vector namespace util { @@ -694,6 +695,23 @@ template return count; } +/** + * @brief Call the function @p func for each type in the @p Variant. + * @brief The function @p func must have a templated overload of the `operator()()` function. + * @tparam Variant the type of the std::variant + * @tparam Func the type of the function to apply + * @tparam Index the current index of the type the function should be applied to + * @param[in] func the function + */ +template +constexpr void for_each_variant_type(Func &&func) { + if constexpr (Index < std::variant_size_v) { + using T = std::variant_alternative_t; + func.template operator()(); // Call function with current type + for_each_variant_type(std::forward(func)); + } +} + } // namespace util #endif // PLSSVM_TESTS_UTILITY_HPP_ From 2b1f0a48a6ab4a73f141683aa2ae78f8d6c97124 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 28 Oct 2024 15:13:14 +0100 Subject: [PATCH 027/123] Update documentation. --- docs/CMakeLists.txt | 1 + docs/resources/dirs.dox | 66 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 3bf366b62..ec8c0c40f 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -32,6 +32,7 @@ set(DOXYGEN_PROJECT_LOGO "${PROJECT_SOURCE_DIR}/docs/resources/logo_90x55.png") set(DOXYGEN_EXCLUDE_SYMBOLS "*_HPP_") set(DOXYGEN_DOT_IMAGE_FORMAT "svg") +set(DOXYGEN_DOT_GRAPH_MAX_NODES "100") set(DOXYGEN_INTERACTIVE_SVG "YES") set(DOXYGEN_INCLUDE_GRAPH "NO") set(DOXYGEN_EXTRACT_PRIVATE "YES") diff --git a/docs/resources/dirs.dox b/docs/resources/dirs.dox index 8c3119aab..de0ce2d6a 100644 --- a/docs/resources/dirs.dox +++ b/docs/resources/dirs.dox @@ -153,6 +153,72 @@ * @brief Directory containing kernel implementations for utility functions using the HIP backend. */ +/** + * @dir include/plssvm/backends/Kokkos + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Directory containing the implementation for the Kokkos backend. + */ + +/** + * @dir include/plssvm/backends/Kokkos/detail + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Directory containing implementation details for the Kokkos backend. + */ + +/** + * @dir include/plssvm/backends/Kokkos/kernel + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Directory containing all kernels for the Kokkos backend. + */ + +/** + * @dir include/plssvm/backends/Kokkos/kernel/cg_explicit + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Directory containing kernel implementations for the explicit CG algorithm using the Kokkos backend. + */ + +/** + * @dir include/plssvm/backends/Kokkos/kernel/cg_implicit + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Directory containing kernel implementations for the implicit CG algorithm using the Kokkos backend. + */ + +/** + * @dir include/plssvm/backends/Kokkos/kernel/detail + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Directory containing kernel implementations for utility functions using the Kokkos backend. + */ + /** * @dir include/plssvm/backends/OpenCL * @author Alexander Van Craen From c13a4200913c04627e38d6423586fcfd1efe7345 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 29 Oct 2024 09:58:15 +0100 Subject: [PATCH 028/123] Fix compilation error. --- .../Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp b/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp index b3d46112d..8e42e8b41 100644 --- a/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp +++ b/include/plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp @@ -151,7 +151,7 @@ class device_kernel_assembly { temp_ij += cost_; } // update the kernel matrix - kernel_matrix_d_[device_global_j * (num_rows_ - row_offset_ + PADDING_SIZE_sz) - device_global_j * (device_global_j + std::size_t{ 1 }) / std::size_t{ 2 }; + device_global_i] = temp_ij; + kernel_matrix_d_[device_global_j * (num_rows_ - row_offset_ + PADDING_SIZE_sz) - device_global_j * (device_global_j + std::size_t{ 1 }) / std::size_t{ 2 } + device_global_i] = temp_ij; } } } From f4c744107147d85dbc2462ae30c35fcf02b954f7 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 29 Oct 2024 10:35:28 +0100 Subject: [PATCH 029/123] Fix Kokkos warning regarding implicit conversions. --- src/plssvm/backends/Kokkos/detail/device_ptr.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp index dcd0f98d3..167ec027f 100644 --- a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp @@ -60,7 +60,6 @@ void device_ptr::memset(const int pattern, const size_type pos, const size_ty } const size_type rnum_bytes = std::min(num_bytes, (this->size_padded() - pos) * sizeof(value_type)); - // TODO: use Kokkos ZeroMemset specialization? data_.execute([&](const auto &data) { using kokkos_execution_space_type = typename ::plssvm::detail::remove_cvref_t::execution_space; @@ -69,7 +68,7 @@ void device_ptr::memset(const int pattern, const size_type pos, const size_ty auto p = static_cast(pattern); // memset subview Kokkos::parallel_for("device_ptr_memset", - Kokkos::RangePolicy(0, rnum_bytes), + Kokkos::RangePolicy(size_type{ 0 }, rnum_bytes), device_memset_kernel{ data_ptr, p }); detail::device_synchronize(queue_); From d72f8a67fdaf0428ab17fc901e61bf78a4dd3710 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 29 Oct 2024 12:40:42 +0100 Subject: [PATCH 030/123] Add missing header. --- include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp b/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp index 3c6f9c8aa..0ca70c2de 100644 --- a/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp +++ b/include/plssvm/backends/Kokkos/kernel/kernel_functions.hpp @@ -19,6 +19,7 @@ #include "Kokkos_MathematicalFunctions.hpp" // KOKKOS_INLINE_FUNCTION, Kokkos::pow, Kokkos::exp, Kokkos::tanh, Kokkos::abs +#include // LT_MIN, DBL_MIN #include // std::is_same_v namespace plssvm::kokkos::detail { From fba8361c58544ea388116f790dcaf5e3a8c625c9 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 31 Oct 2024 16:09:42 +0100 Subject: [PATCH 031/123] Fix problems when using multiple GPUs. --- .../Kokkos/detail/device_view_wrapper.hpp | 61 +++---------------- .../backends/Kokkos/detail/device_ptr.cpp | 53 ++++++++++------ .../Kokkos/detail/device_view_wrapper.cpp | 2 + 3 files changed, 44 insertions(+), 72 deletions(-) diff --git a/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp b/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp index 1baddcec6..ad8f0ddcf 100644 --- a/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp +++ b/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp @@ -13,7 +13,9 @@ #define PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_VIEW_WRAPPER_HPP_ #include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_* +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{execution_space, execution_space_to_kokkos_type_t}, plssvm::kokkos::detail::constexpr_available_execution_spaces +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t #include "plssvm/detail/utility.hpp" // plssvm::detail::unreachable #include "Kokkos_Core.hpp" // Kokkos::View, Kokkos::ExecutionSpace @@ -167,62 +169,17 @@ class device_view_wrapper { /** * @brief Given a execution @p space and the number of elements @p size, creates a Kokkos::View in the respective memory space. * @tparam T the value type of the underlying Kokkos::View - * @param[in] space the specific execution space + * @param[in] device the device for which this view should be allocated * @param[in] size the size of the Kokkos::View (number of elements **not** byte!) * @return a Kokkos::View wrapper where the active member of the internal `std::variant` corresponds to the Kokkos::View in the Kokkos::ExecutionSpace specified by @p space (`[[nodiscard]]`) */ template -[[nodiscard]] device_view_wrapper make_device_view_wrapper(const execution_space &space, const std::size_t size) { - switch (space) { - case execution_space::cuda: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA(([&]() { - return device_view_wrapper{ Kokkos::View{ "cuda_device_ptr_view", size } }; - })); - break; - case execution_space::hip: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP(([&]() { - return device_view_wrapper{ Kokkos::View{ "hip_device_ptr_view", size } }; - })); - break; - case execution_space::sycl: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL(([&]() { - return device_view_wrapper{ Kokkos::View{ "sycl_device_ptr_view", size } }; - })); - break; - case execution_space::hpx: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX(([&]() { - return device_view_wrapper{ Kokkos::View{ "hpx_device_ptr_view", size } }; - })); - break; - case execution_space::openmp: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP(([&]() { - return device_view_wrapper{ Kokkos::View{ "openmp_device_ptr_view", size } }; - })); - break; - case execution_space::openmp_target: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET(([&]() { - return device_view_wrapper{ Kokkos::View{ "openmptarget_device_ptr_view", size } }; - })); - break; - case execution_space::openacc: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC(([&]() { - return device_view_wrapper{ Kokkos::View{ "openacc_device_ptr_view", size } }; - })); - break; - case execution_space::threads: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS(([&]() { - return device_view_wrapper{ Kokkos::View{ "threads_device_ptr_view", size } }; - })); - break; - case execution_space::serial: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL(([&]() { - return device_view_wrapper{ Kokkos::View{ "serial_device_ptr_view", size } }; - })); - break; - } - // all possible cases should be handled by the previous switch - // -> silence missing return statement compiler warnings due to throw statement - ::plssvm::detail::unreachable(); +[[nodiscard]] device_view_wrapper make_device_view_wrapper(const device_wrapper &device, const std::size_t size) { + return device.execute_and_return([&](const auto &value) { + using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; + + return device_view_wrapper{ Kokkos::View{ Kokkos::view_alloc(value, "device_ptr_view"), size } }; + }); } } // namespace plssvm::kokkos::detail diff --git a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp index 167ec027f..6c5640870 100644 --- a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp @@ -47,7 +47,7 @@ device_ptr::device_ptr(const plssvm::shape shape, const device_wrapper &devic template device_ptr::device_ptr(const plssvm::shape shape, const plssvm::shape padding, const device_wrapper &device) : base_type{ shape, padding, device } { - data_ = make_device_view_wrapper(device.get_execution_space(), this->size_padded()); + data_ = make_device_view_wrapper(device, this->size_padded()); this->memset(0); } @@ -61,18 +61,25 @@ void device_ptr::memset(const int pattern, const size_type pos, const size_ty const size_type rnum_bytes = std::min(num_bytes, (this->size_padded() - pos) * sizeof(value_type)); data_.execute([&](const auto &data) { - using kokkos_execution_space_type = typename ::plssvm::detail::remove_cvref_t::execution_space; - // create subview of the device data auto *data_ptr = reinterpret_cast(data.data() + pos); auto p = static_cast(pattern); // memset subview - Kokkos::parallel_for("device_ptr_memset", - Kokkos::RangePolicy(size_type{ 0 }, rnum_bytes), - device_memset_kernel{ data_ptr, p }); - - detail::device_synchronize(queue_); + // TODO: warning? + // TODO: if possible, use fill(0) kernel? + queue_.execute([&](const auto &exec) { + using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; + + // create the execution policy + const Kokkos::RangePolicy policy{ exec, size_type{ 0 }, rnum_bytes }; + // launch the memset kernel + Kokkos::parallel_for("device_ptr_memset", + policy, + device_memset_kernel{ data_ptr, p }); + }); }); + + detail::device_synchronize(queue_); } template @@ -87,11 +94,13 @@ void device_ptr::fill(const value_type value, const size_type pos, const size data_.execute([&](const auto &data) { // create subview of the device data auto data_subview = Kokkos::subview(data, std::make_pair(pos, pos + rcount)); - // fill subview with constant data - Kokkos::deep_copy(data_subview, value); - - detail::device_synchronize(queue_); + queue_.execute([&](const auto &exec) { + // fill subview with constant data + Kokkos::deep_copy(exec, data_subview, value); + }); }); + + detail::device_synchronize(queue_); } template @@ -106,11 +115,13 @@ void device_ptr::copy_to_device(const_host_pointer_type data_to_copy, const s const host_view_type host_view{ data_to_copy, rcount }; // create subview of the device data auto data_subview = Kokkos::subview(data, std::make_pair(pos, pos + rcount)); - // copy the data to the device subview - Kokkos::deep_copy(data_subview, host_view); - - detail::device_synchronize(queue_); + queue_.execute([&](const auto &exec) { + // fill subview with constant data + Kokkos::deep_copy(exec, data_subview, host_view); + }); }); + + detail::device_synchronize(queue_); } template @@ -151,11 +162,13 @@ void device_ptr::copy_to_host(host_pointer_type buffer, const size_type pos, const host_view_type host_view{ buffer, rcount }; // create subview of the device data auto data_subview = Kokkos::subview(data, std::make_pair(pos, pos + rcount)); - // copy the data to the host - Kokkos::deep_copy(host_view, data_subview); - - detail::device_synchronize(queue_); + queue_.execute([&](const auto &exec) { + // fill subview with constant data + Kokkos::deep_copy(exec, host_view, data_subview); + }); }); + + detail::device_synchronize(queue_); } template diff --git a/tests/backends/Kokkos/detail/device_view_wrapper.cpp b/tests/backends/Kokkos/detail/device_view_wrapper.cpp index 026daaf1e..c794072b4 100644 --- a/tests/backends/Kokkos/detail/device_view_wrapper.cpp +++ b/tests/backends/Kokkos/detail/device_view_wrapper.cpp @@ -75,3 +75,5 @@ TEST(KokkosDeviceViewWrapper, inequality) { // should not be unequal EXPECT_FALSE(view1 != view2); } + +// TODO: make_device_view_wrapper \ No newline at end of file From 1c75216135b61bb6a85489fa30ddd7dc38444cdb Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 31 Oct 2024 16:10:08 +0100 Subject: [PATCH 032/123] First try implementing map for Kokkos's SYCL backend. --- src/plssvm/backends/Kokkos/detail/utility.cpp | 54 +++++++++++++++++-- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp index f8521523b..eb02660a6 100644 --- a/src/plssvm/backends/Kokkos/detail/utility.cpp +++ b/src/plssvm/backends/Kokkos/detail/utility.cpp @@ -21,9 +21,10 @@ #include "fmt/core.h" // fmt::format -#include // std::map -#include // std::string -#include // std::vector +#include // std::map +#include // std::string +#include // std::unordered_set +#include // std::vector namespace plssvm::kokkos::detail { @@ -39,12 +40,54 @@ std::map> available_target_platfor available_map[target_platform::gpu_nvidia].push_back(execution_space::cuda); break; case execution_space::hip: - // NVIDIA and AMD GPUs possible - available_map[target_platform::gpu_nvidia].push_back(execution_space::hip); + // NVIDIA or AMD GPUs possible (both simultaneously are unsupported) +#if defined(KOKKOS_ENABLE_HIP) + #if defined(__HIP_PLATFORM_AMD__) available_map[target_platform::gpu_amd].push_back(execution_space::hip); + #elif defined(__HIP_PLATFORM_NVIDIA__) + available_map[target_platform::gpu_nvidia].push_back(execution_space::hip); + #else + #error "Unknown HIP platform" + #endif +#endif break; case execution_space::sycl: + // list all potential target platforms currently available in SYCL +#if defined(KOKKOS_ENABLE_SYCL) + { + std::unordered_set targets{}; + for (const auto &platform : sycl::platform::get_platforms()) { + for (const auto &device : platform.get_devices()) { + // Note: Kokkos is Intel LLVM/DPC++/icpx only -> we can use the specific implementation defined enum values + if (device.is_cpu()) { + targets.insert(target_platform::cpu); + } else if (device.is_gpu()) { + // the current device is a GPU + // get vendor string and convert it to all lower case + const std::string vendor_string = ::plssvm::detail::as_lower_case(device.get_info<::sycl::info::device::vendor>()); + // get platform name of current GPU device and convert it to all lower case + const std::string platform_string = ::plssvm::detail::as_lower_case(platform.get_info<::sycl::info::platform::name>()); + + // check vendor string and insert to correct target platform + if (::plssvm::detail::contains(vendor_string, "nvidia")) { + targets.insert(target_platform::gpu_nvidia); + } else if (::plssvm::detail::contains(vendor_string, "amd") || ::plssvm::detail::contains(vendor_string, "advanced micro devices")) { + targets.insert(target_platform::gpu_amd); + } else if (::plssvm::detail::contains(vendor_string, "intel")) { + targets.insert(target_platform::gpu_intel); + } + } + } + } + // now we know which target platforms are available in SYCL -> add them to our mapping + for (const target_platform target : targets) { + available_map[target].push_back(execution_space::sycl); + } + } +#endif + break; case execution_space::openacc: + // TODO: restrict to available devices // all GPUs and CPU possible available_map[target_platform::gpu_nvidia].push_back(execution_space::sycl); available_map[target_platform::gpu_amd].push_back(execution_space::sycl); @@ -52,6 +95,7 @@ std::map> available_target_platfor available_map[target_platform::cpu].push_back(execution_space::sycl); break; case execution_space::openmp_target: + // TODO: restrict to available devices // all GPUs available_map[target_platform::gpu_nvidia].push_back(execution_space::openmp_target); available_map[target_platform::gpu_amd].push_back(execution_space::openmp_target); From bb89a07a37d005cf2e02146835c005ef93671452 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 31 Oct 2024 16:50:04 +0100 Subject: [PATCH 033/123] Also set Kokkos backend to be OFF per default. --- cmake/presets/common.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/presets/common.json b/cmake/presets/common.json index 3d3d9c4df..85934b051 100644 --- a/cmake/presets/common.json +++ b/cmake/presets/common.json @@ -11,7 +11,8 @@ "PLSSVM_ENABLE_CUDA_BACKEND": "OFF", "PLSSVM_ENABLE_HIP_BACKEND": "OFF", "PLSSVM_ENABLE_OPENCL_BACKEND": "OFF", - "PLSSVM_ENABLE_SYCL_BACKEND": "OFF" + "PLSSVM_ENABLE_SYCL_BACKEND": "OFF", + "PLSSVM_ENABLE_KOKKOS_BACKEND": "OFF" } }, { From 61b38339366e7622c07d458d4353784c6ebb9be8 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Sat, 2 Nov 2024 22:55:18 +0100 Subject: [PATCH 034/123] Add missing device (i.e., Kokkos::ExecutionSpace instance) to Kokkos::TeamPolicy constructor. --- src/plssvm/backends/Kokkos/csvm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index 0a1903c16..b097d42ef 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -546,7 +546,7 @@ auto csvm::run_w_kernel(const std::size_t device_id, const ::plssvm::detail::exe for (const auto &[partial_grid, offsets] : exec.grids) { // create a Kokkos TeamPolicy - Kokkos::TeamPolicy team_policy{ static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; Kokkos::parallel_for("w_kernel", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_w_linear{ w_d.get().get(), alpha_d.get().get(), sv_d.get().get(), num_classes, num_sv, device_specific_num_sv, sv_offset, offsets.x, offsets.y, partial_grid.x }); } From 8064363b1283f0813766dd962bae138b9f7e13ee Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Sat, 2 Nov 2024 23:11:58 +0100 Subject: [PATCH 035/123] Remove unused include. --- include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp b/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp index ad8f0ddcf..018edc48e 100644 --- a/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp +++ b/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp @@ -16,7 +16,6 @@ #include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{execution_space, execution_space_to_kokkos_type_t}, plssvm::kokkos::detail::constexpr_available_execution_spaces #include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t -#include "plssvm/detail/utility.hpp" // plssvm::detail::unreachable #include "Kokkos_Core.hpp" // Kokkos::View, Kokkos::ExecutionSpace From 3cb4a9940f478931e5705705133bdf60903eb607 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Sat, 2 Nov 2024 23:14:48 +0100 Subject: [PATCH 036/123] Use Kokkos::ScopeGuard to be sure that Kokkos::finalize is called correctly even in case of an exception. --- src/main_predict.cpp | 17 ++++++++--------- src/main_train.cpp | 18 ++++++++---------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/src/main_predict.cpp b/src/main_predict.cpp index 1fe40d102..bc83ffcfa 100644 --- a/src/main_predict.cpp +++ b/src/main_predict.cpp @@ -38,12 +38,19 @@ #include // std::mem_fn #include // std::cerr, std::endl #include // std::pair +#include // std::unique_ptr, std::make_unique #include // std::visit #include // std::vector using namespace std::chrono_literals; int main(int argc, char *argv[]) { +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + // create std::unique_ptr containing a Kokkos::ScopeGuard + // -> used to automatically handle Kokkos::finalize + std::unique_ptr kokkos_guard{}; +#endif + try { const std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now(); PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME(start_time); @@ -82,7 +89,7 @@ int main(int argc, char *argv[]) { // initialize Kokkos if necessary if (use_kokkos_as_backend) { - Kokkos::initialize(argc, argv); // TODO: set device? + kokkos_guard = std::make_unique(argc, argv); PLSSVM_ASSERT(Kokkos::is_initialized(), "Something went wrong initializing the Kokkos environment!"); } #endif @@ -161,14 +168,6 @@ int main(int argc, char *argv[]) { PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "accuracy", "num_correct", report.accuracy().num_correct })); PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "accuracy", "num_total", report.accuracy().num_total })); } - - // finalize Kokkos if necessary -#if defined(PLSSVM_HAS_KOKKOS_BACKEND) - if (use_kokkos_as_backend) { // TODO: what if an exception occurred? - Kokkos::finalize(); - PLSSVM_ASSERT(Kokkos::is_finalized(), "Something went wrong finalizing the Kokkos environment!"); - } -#endif }; std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(cmd_parser)); diff --git a/src/main_train.cpp b/src/main_train.cpp index 1d18d2744..14cf8941b 100644 --- a/src/main_train.cpp +++ b/src/main_train.cpp @@ -32,7 +32,7 @@ #include // std::exception #include // std::mem_fn #include // std::cerr, std::endl -#include // std::unique_ptr +#include // std::unique_ptr, std::make_unique #include // std::remove_reference_t #include // std::pair #include // std::visit @@ -41,6 +41,12 @@ using namespace std::chrono_literals; int main(int argc, char *argv[]) { +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + // create std::unique_ptr containing a Kokkos::ScopeGuard + // -> used to automatically handle Kokkos::finalize + std::unique_ptr kokkos_guard{}; +#endif + try { const std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now(); PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SET_REFERENCE_TIME(start_time); @@ -79,7 +85,7 @@ int main(int argc, char *argv[]) { // initialize Kokkos if necessary if (use_kokkos_as_backend) { - Kokkos::initialize(argc, argv); // TODO: set device? + kokkos_guard = std::make_unique(argc, argv); PLSSVM_ASSERT(Kokkos::is_initialized(), "Something went wrong initializing the Kokkos environment!"); } #endif @@ -102,14 +108,6 @@ int main(int argc, char *argv[]) { plssvm::solver = cmd_parser.solver); // save model to file model.save(cmd_parser.model_filename); - - // finalize Kokkos if necessary -#if defined(PLSSVM_HAS_KOKKOS_BACKEND) - if (use_kokkos_as_backend) { // TODO: what if an exception occurred - Kokkos::finalize(); - PLSSVM_ASSERT(Kokkos::is_finalized(), "Something went wrong finalizing the Kokkos environment!"); - } -#endif }; std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(cmd_parser)); From cf52ebc2b4f57157b501507b7fa514033c58adc6 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Sat, 2 Nov 2024 23:22:35 +0100 Subject: [PATCH 037/123] Improve memset implementation. --- .../Kokkos/kernel/detail/memset_kernel.hpp | 56 ------------------- .../backends/Kokkos/detail/device_ptr.cpp | 31 ++++------ 2 files changed, 11 insertions(+), 76 deletions(-) delete mode 100644 include/plssvm/backends/Kokkos/kernel/detail/memset_kernel.hpp diff --git a/include/plssvm/backends/Kokkos/kernel/detail/memset_kernel.hpp b/include/plssvm/backends/Kokkos/kernel/detail/memset_kernel.hpp deleted file mode 100644 index 584b1afdd..000000000 --- a/include/plssvm/backends/Kokkos/kernel/detail/memset_kernel.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/** - * @file - * @author Alexander Van Craen - * @author Marcel Breyer - * @copyright 2018-today The PLSSVM project - All Rights Reserved - * @license This file is part of the PLSSVM project which is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - * - * @brief Defines a Kokkos function object for memsetting a device pointer with a specific value. - */ - -#ifndef PLSSVM_BACKENDS_KOKKOS_KERNEL_DETAIL_MEMSET_KERNEL_HPP_ -#define PLSSVM_BACKENDS_KOKKOS_KERNEL_DETAIL_MEMSET_KERNEL_HPP_ -#pragma once - -#include "plssvm/constants.hpp" // plssvm::real_type - -#include "Kokkos_Core.hpp" // KOKKOS_INLINE_FUNCTION - -#include // std::size_t - -namespace plssvm::kokkos::detail { - -/** - * @brief A kernel to perform a memset-like operation on a Kokkos::View - */ -class device_memset_kernel { - public: - /** - * @brief Memset all bytes in @p data to the provided @p pattern. - * @param[out] data the array to memset - * @param[in] pattern the memset pattern - */ - device_memset_kernel(unsigned char* data, const unsigned char pattern) : - data_{ data }, - pattern_{ pattern } { } - - /** - * @brief Function call operator overload performing the actual calculation. - * @param[in] idx the index representing the current point in the execution space - */ - KOKKOS_INLINE_FUNCTION - void operator()(const std::size_t idx) const { - data_[idx] = pattern_; - } - - private: - /// @cond Doxygen_suppress - unsigned char* data_; - const unsigned char pattern_; - /// @endcond -}; - -} // namespace plssvm::kokkos::detail - -#endif // PLSSVM_BACKENDS_KOKKOS_KERNEL_DETAIL_MEMSET_KERNEL_HPP_ diff --git a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp index 6c5640870..0dfe9adc0 100644 --- a/src/plssvm/backends/Kokkos/detail/device_ptr.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_ptr.cpp @@ -8,14 +8,13 @@ #include "plssvm/backends/Kokkos/detail/device_ptr.hpp" -#include "plssvm/backends/Kokkos/detail/device_view_wrapper.hpp" // plssvm::kokkos::detail::{device_view_wrapper, make_device_view_wrapper} -#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper -#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::detail::device_synchronize -#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception -#include "plssvm/backends/Kokkos/kernel/detail/memset_kernel.hpp" // plssvm::kokkos::detail::device_fill_array -#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT -#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t -#include "plssvm/shape.hpp" // plssvm::shape +#include "plssvm/backends/Kokkos/detail/device_view_wrapper.hpp" // plssvm::kokkos::detail::{device_view_wrapper, make_device_view_wrapper} +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper +#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::detail::device_synchronize +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t +#include "plssvm/shape.hpp" // plssvm::shape #include "Kokkos_Core.hpp" // Kokkos::View, Kokkos::HostSpace, Kokkos::MemoryUnmanaged, Kokkos::subview, Kokkos::parallel_for, Kokkos::deep_copy @@ -61,21 +60,13 @@ void device_ptr::memset(const int pattern, const size_type pos, const size_ty const size_type rnum_bytes = std::min(num_bytes, (this->size_padded() - pos) * sizeof(value_type)); data_.execute([&](const auto &data) { - // create subview of the device data - auto *data_ptr = reinterpret_cast(data.data() + pos); - auto p = static_cast(pattern); - // memset subview - // TODO: warning? - // TODO: if possible, use fill(0) kernel? queue_.execute([&](const auto &exec) { using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; - // create the execution policy - const Kokkos::RangePolicy policy{ exec, size_type{ 0 }, rnum_bytes }; - // launch the memset kernel - Kokkos::parallel_for("device_ptr_memset", - policy, - device_memset_kernel{ data_ptr, p }); + // create view of the device data cast to unsigned char + const Kokkos::View view{ reinterpret_cast(data.data() + pos), rnum_bytes }; + // fill the view with the pattern -> acts like a memset + Kokkos::deep_copy(exec, view, static_cast(pattern)); }); }); From 275da8493a5107aac896a5ed66e406ae50717ade Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Sat, 2 Nov 2024 23:33:09 +0100 Subject: [PATCH 038/123] Handle the Kokkos backend in some more tests. --- tests/backend_types.cpp | 8 +++++++- tests/csvm_factory.cpp | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/backend_types.cpp b/tests/backend_types.cpp index 9975fbbfc..3cd9fa5bc 100644 --- a/tests/backend_types.cpp +++ b/tests/backend_types.cpp @@ -38,11 +38,12 @@ TEST(BackendType, to_string) { EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::hip, "hip"); EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::opencl, "opencl"); EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::sycl, "sycl"); + EXPECT_CONVERSION_TO_STRING(plssvm::backend_type::kokkos, "kokkos"); } TEST(BackendType, to_string_unknown) { // check conversions to std::string from unknown backend_type - EXPECT_CONVERSION_TO_STRING(static_cast(7), "unknown"); + EXPECT_CONVERSION_TO_STRING(static_cast(8), "unknown"); } // check whether the std::string -> plssvm::backend_type conversions are correct @@ -64,6 +65,8 @@ TEST(BackendType, from_string) { EXPECT_CONVERSION_FROM_STRING("OpenCL", plssvm::backend_type::opencl); EXPECT_CONVERSION_FROM_STRING("sycl", plssvm::backend_type::sycl); EXPECT_CONVERSION_FROM_STRING("SYCL", plssvm::backend_type::sycl); + EXPECT_CONVERSION_FROM_STRING("Kokkos", plssvm::backend_type::kokkos); + EXPECT_CONVERSION_FROM_STRING("KOKKOS", plssvm::backend_type::kokkos); } TEST(BackendType, from_string_unknown) { @@ -127,6 +130,7 @@ INSTANTIATE_TEST_SUITE_P(BackendType, BackendTypeSupportedCombination, ::testing supported_combination_type{ { plssvm::backend_type::hip }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::hip }, supported_combination_type{ { plssvm::backend_type::opencl }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::opencl }, supported_combination_type{ { plssvm::backend_type::sycl }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::sycl }, + supported_combination_type{ { plssvm::backend_type::kokkos }, { plssvm::target_platform::cpu, plssvm::target_platform::gpu_nvidia, plssvm::target_platform::gpu_amd, plssvm::target_platform::gpu_intel }, plssvm::backend_type::kokkos }, supported_combination_type{ { plssvm::backend_type::openmp, plssvm::backend_type::cuda, plssvm::backend_type::hip, plssvm::backend_type::opencl, plssvm::backend_type::sycl }, { plssvm::target_platform::cpu }, plssvm::backend_type::sycl }, supported_combination_type{ { plssvm::backend_type::openmp, plssvm::backend_type::cuda, plssvm::backend_type::hip, plssvm::backend_type::opencl, plssvm::backend_type::sycl }, { plssvm::target_platform::gpu_nvidia }, plssvm::backend_type::cuda }, supported_combination_type{ { plssvm::backend_type::openmp, plssvm::backend_type::cuda, plssvm::backend_type::hip, plssvm::backend_type::opencl, plssvm::backend_type::sycl }, { plssvm::target_platform::gpu_amd }, plssvm::backend_type::hip }, @@ -144,6 +148,7 @@ TEST(BackendType, csvm_to_backend_type) { EXPECT_EQ(plssvm::csvm_to_backend_type::value, plssvm::backend_type::sycl); EXPECT_EQ(plssvm::csvm_to_backend_type::value, plssvm::backend_type::sycl); EXPECT_EQ(plssvm::csvm_to_backend_type::value, plssvm::backend_type::sycl); + EXPECT_EQ(plssvm::csvm_to_backend_type::value, plssvm::backend_type::kokkos); EXPECT_EQ(plssvm::csvm_to_backend_type::impl, plssvm::sycl::implementation_type::adaptivecpp); EXPECT_EQ(plssvm::csvm_to_backend_type::impl, plssvm::sycl::implementation_type::dpcpp); @@ -159,4 +164,5 @@ TEST(BackendType, csvm_to_backend_type_v) { EXPECT_EQ(plssvm::csvm_to_backend_type_v, plssvm::backend_type::sycl); EXPECT_EQ(plssvm::csvm_to_backend_type_v, plssvm::backend_type::sycl); EXPECT_EQ(plssvm::csvm_to_backend_type_v, plssvm::backend_type::sycl); + EXPECT_EQ(plssvm::csvm_to_backend_type_v, plssvm::backend_type::kokkos); } diff --git a/tests/csvm_factory.cpp b/tests/csvm_factory.cpp index cb06f6b68..dc365293c 100644 --- a/tests/csvm_factory.cpp +++ b/tests/csvm_factory.cpp @@ -60,6 +60,9 @@ std::string GetTypeName std::string GetTypeName, util::value_list<>>>() { return "sycl_adaptivecpp_csvm"; } + +template <> +std::string GetTypeName, util::value_list<>>>() { return "kokkos_csvm"; } } // namespace testing::internal template From 4d44b56c965c2ccaf313b67fa470cfd778ecf691 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Sat, 2 Nov 2024 23:33:59 +0100 Subject: [PATCH 039/123] Move the Kokkos initialization to the general test main.cpp file (guarded behind ifdef). Remove Kokkos specific main file. --- tests/backends/Kokkos/CMakeLists.txt | 2 +- tests/kokkos_main.cpp | 38 ---------------------- tests/main.cpp | 41 ++++++++++++++++++++++-- tests/main.hpp | 47 ---------------------------- 4 files changed, 40 insertions(+), 88 deletions(-) delete mode 100644 tests/kokkos_main.cpp delete mode 100644 tests/main.hpp diff --git a/tests/backends/Kokkos/CMakeLists.txt b/tests/backends/Kokkos/CMakeLists.txt index e0686c2e4..142f72a37 100644 --- a/tests/backends/Kokkos/CMakeLists.txt +++ b/tests/backends/Kokkos/CMakeLists.txt @@ -23,7 +23,7 @@ set(PLSSVM_KOKKOS_TEST_SOURCES find_package(Kokkos REQUIRED) # add test executable -add_executable(${PLSSVM_KOKKOS_TEST_NAME} ${CMAKE_CURRENT_LIST_DIR}/../../kokkos_main.cpp ${PLSSVM_KOKKOS_TEST_SOURCES}) +add_executable(${PLSSVM_KOKKOS_TEST_NAME} ${CMAKE_CURRENT_LIST_DIR}/../../main.cpp ${PLSSVM_KOKKOS_TEST_SOURCES}) # link against test library target_link_libraries(${PLSSVM_KOKKOS_TEST_NAME} PRIVATE ${PLSSVM_BASE_TEST_LIBRARY_NAME}) diff --git a/tests/kokkos_main.cpp b/tests/kokkos_main.cpp deleted file mode 100644 index 1edfbb9fe..000000000 --- a/tests/kokkos_main.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/** - * @file - * @author Alexander Van Craen - * @author Marcel Breyer - * @copyright 2018-today The PLSSVM project - All Rights Reserved - * @license This file is part of the PLSSVM project which is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - * - * @brief Contains the googletest main function. Sets the DeathTest to "threadsafe" execution instead of "fast". - */ - -#include "Kokkos_Core.hpp" // Kokkos::initialize, Kokkos::finalize - -#include "gtest/gtest.h" // GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST, RUN_ALL_TESTS, ::testing::{InitGoogleTest, GTEST_FLAG} - -#include "main.hpp" // GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST definitions - -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); - - // initialize Kokkos - Kokkos::initialize(argc, argv); - - // prevent problems with fork() in the presence of multiple threads - // https://github.com/google/googletest/blob/main/docs/advanced.md#death-tests-and-threads - // NOTE: may reduce performance of the (death) tests -#if !defined(_WIN32) - ::testing::GTEST_FLAG(death_test_style) = "threadsafe"; -#endif - - // run all tests - const int return_code = RUN_ALL_TESTS(); - - // finalize Kokkos - Kokkos::finalize(); - - return return_code; -} diff --git a/tests/main.cpp b/tests/main.cpp index 944ad9318..614b38cff 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -9,12 +9,49 @@ * @brief Contains the googletest main function. Sets the DeathTest to "threadsafe" execution instead of "fast". */ -#include "main.hpp" // GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST definitions +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + #include "Kokkos_Core.hpp" // Kokkos::ScopeGuard +#endif + +#include "gtest/gtest.h" // RUN_ALL_TESTS, ::testing::{InitGoogleTest, GTEST_FLAG},GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST definitions + +// silence GTest warnings/test errors -#include "gtest/gtest.h" // RUN_ALL_TESTS, ::testing::{InitGoogleTest, GTEST_FLAG} +// generic CSVM tests +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVM); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunction); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolver); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunction); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionClassification); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionClassification); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMDeathTest); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverDeathTest); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionDeathTest); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionDeathTest); +// generic GPU CSVM tests +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVM); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMKernelFunction); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMDeathTest); +// pinned memory tests +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemory); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryLayout); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryDeathTest); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryLayoutDeathTest); +// device pointer tests +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtr); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrLayout); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrDeathTest); +// exception tests +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Exception); int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); + +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + // initialize Kokkos using a Kokkos::ScopeGuard + const Kokkos::ScopeGuard guard{ argc, argv }; +#endif + // prevent problems with fork() in the presence of multiple threads // https://github.com/google/googletest/blob/main/docs/advanced.md#death-tests-and-threads // NOTE: may reduce performance of the (death) tests diff --git a/tests/main.hpp b/tests/main.hpp deleted file mode 100644 index ddb4ea590..000000000 --- a/tests/main.hpp +++ /dev/null @@ -1,47 +0,0 @@ -/** - * @file - * @author Alexander Van Craen - * @author Marcel Breyer - * @copyright 2018-today The PLSSVM project - All Rights Reserved - * @license This file is part of the PLSSVM project which is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - * - * @brief Header file for the GoogleTest main files to reduce code duplication. - */ - -#ifndef PLSSVM_TESTS_MAIN_HPP_ -#define PLSSVM_TESTS_MAIN_HPP_ -#pragma once - -#include "gtest/gtest.h" // GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST - -// silence GTest warnings/test errors - -// generic CSVM tests -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVM); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunction); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolver); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunction); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionClassification); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionClassification); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMDeathTest); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverDeathTest); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMKernelFunctionDeathTest); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericCSVMSolverKernelFunctionDeathTest); -// generic GPU CSVM tests -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVM); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMKernelFunction); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(GenericGPUCSVMDeathTest); -// pinned memory tests -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemory); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryLayout); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryDeathTest); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PinnedMemoryLayoutDeathTest); -// device pointer tests -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtr); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrLayout); -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(DevicePtrDeathTest); -// exception tests -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Exception); - -#endif // PLSSVM_TESTS_MAIN_HPP_ From 7ad8d1ccf61cd3da67fee6487076a962a4ccf842 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 4 Nov 2024 16:40:42 +0100 Subject: [PATCH 040/123] Remove now unused directory documentation. --- docs/resources/dirs.dox | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/docs/resources/dirs.dox b/docs/resources/dirs.dox index de0ce2d6a..7e9a5491d 100644 --- a/docs/resources/dirs.dox +++ b/docs/resources/dirs.dox @@ -208,17 +208,6 @@ * @brief Directory containing kernel implementations for the implicit CG algorithm using the Kokkos backend. */ -/** - * @dir include/plssvm/backends/Kokkos/kernel/detail - * @author Alexander Van Craen - * @author Marcel Breyer - * @copyright 2018-today The PLSSVM project - All Rights Reserved - * @license This file is part of the PLSSVM project which is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - * - * @brief Directory containing kernel implementations for utility functions using the Kokkos backend. - */ - /** * @dir include/plssvm/backends/OpenCL * @author Alexander Van Craen From f551364e544b515fab9a89af863fed90a2fdfeb2 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 5 Nov 2024 10:58:45 +0100 Subject: [PATCH 041/123] Fix failing test. --- tests/csvm_factory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/csvm_factory.cpp b/tests/csvm_factory.cpp index dc365293c..a80b705c5 100644 --- a/tests/csvm_factory.cpp +++ b/tests/csvm_factory.cpp @@ -234,7 +234,7 @@ TEST(CSVMFactory, factory_named_parameter) { } TEST(CSVMFactory, invalid_backend) { - EXPECT_THROW_WHAT(std::ignore = plssvm::make_csvm(static_cast(7)), + EXPECT_THROW_WHAT(std::ignore = plssvm::make_csvm(static_cast(8)), plssvm::unsupported_backend_exception, "Unrecognized backend provided!"); } From b064677dfd1c191c3eb9d5f0459e76702933a740 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 5 Nov 2024 10:59:12 +0100 Subject: [PATCH 042/123] Add new conditional execution macro. --- .../Kokkos/detail/conditional_execution.hpp | 118 ++++++++++++++++-- .../Kokkos/detail/device_view_wrapper.hpp | 7 +- src/plssvm/backends/Kokkos/csvm.cpp | 26 ++-- .../backends/Kokkos/detail/device_wrapper.cpp | 21 ++-- src/plssvm/backends/Kokkos/detail/utility.cpp | 28 ++--- 5 files changed, 143 insertions(+), 57 deletions(-) diff --git a/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp b/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp index 6ed8c3421..95d4c8300 100644 --- a/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp +++ b/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp @@ -24,111 +24,211 @@ namespace plssvm::kokkos::detail { +//***************************************************// +// Kokkos::Cuda // +//***************************************************// + /** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA` macro if `KOKKOS_ENABLE_CUDA` is defined, i.e., the Kokkos CUDA ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_CUDA` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception. + * * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA` macro if `KOKKOS_ENABLE_CUDA` is defined, i.e., the Kokkos CUDA ExecutionSpace is available. * @details If `KOKKOS_ENABLE_CUDA` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. */ #if defined(KOKKOS_ENABLE_CUDA) - #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA(func) std::invoke(func) #else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::cuda) } #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA(func) \ throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::cuda) } #endif +//***************************************************// +// Kokkos::Hip // +//***************************************************// + /** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP` macro if `KOKKOS_ENABLE_HIP` is defined, i.e., the Kokkos HIP ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_HIP` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception. + * * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP` macro if `KOKKOS_ENABLE_HIP` is defined, i.e., the Kokkos HIP ExecutionSpace is available. * @details If `KOKKOS_ENABLE_HIP` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. */ #if defined(KOKKOS_ENABLE_HIP) - #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP(func) std::invoke(func) #else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::hip) } #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP(func) \ throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::hip) } #endif +//***************************************************// +// Kokkos::SYCL // +//***************************************************// + /** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL` macro if `KOKKOS_ENABLE_SYCL` is defined, i.e., the Kokkos SYCL ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_SYCL` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception. + * * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL` macro if `KOKKOS_ENABLE_SYCL` is defined, i.e., the Kokkos SYCL ExecutionSpace is available. * @details If `KOKKOS_ENABLE_SYCL` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. */ #if defined(KOKKOS_ENABLE_SYCL) - #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL(func) std::invoke(func) #else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::sycl) } #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL(func) \ throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::sycl) } #endif +//***************************************************// +// Kokkos::HPX // +//***************************************************// + /** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HPX + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HPX` macro if `KOKKOS_ENABLE_HPX` is defined, i.e., the Kokkos HPX ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_HPX` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception. + * * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX` macro if `KOKKOS_ENABLE_HPX` is defined, i.e., the Kokkos HPX ExecutionSpace is available. * @details If `KOKKOS_ENABLE_HPX` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. */ #if defined(KOKKOS_ENABLE_HPX) - #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HPX(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX(func) std::invoke(func) #else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HPX(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::hpx) } #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX(func) \ throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::hpx) } #endif +//***************************************************// +// Kokkos::OpenMP // +//***************************************************// + /** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMP + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMP` macro if `KOKKOS_ENABLE_OPENMP` is defined, i.e., the Kokkos OpenMP ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_OPENMP` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception. + * * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP` macro if `KOKKOS_ENABLE_OPENMP` is defined, i.e., the Kokkos OpenMP ExecutionSpace is available. * @details If `KOKKOS_ENABLE_OPENMP` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. */ #if defined(KOKKOS_ENABLE_OPENMP) - #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMP(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP(func) std::invoke(func) #else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMP(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openmp) } #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP(func) \ throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openmp) } #endif +//***************************************************// +// Kokkos::OpenMPTarget // +//***************************************************// + /** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMPTARGET + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMPTARGET` macro if `KOKKOS_ENABLE_OPENMPTARGET` is defined, i.e., the Kokkos OpenMP target offloading ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_OPENMPTARGET` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception. + * * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET` macro if `KOKKOS_ENABLE_OPENMPTARGET` is defined, i.e., the Kokkos OpenMP target offloading ExecutionSpace is available. * @details If `KOKKOS_ENABLE_OPENMPTARGET` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. */ #if defined(KOKKOS_ENABLE_OPENMPTARGET) - #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMPTARGET(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET(func) std::invoke(func) #else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMPTARGET(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openmp_target) } #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET(func) \ throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openmp_target) } #endif +//***************************************************// +// Kokkos::OpenACC // +//***************************************************// + /** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENACC + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENACC` macro if `KOKKOS_ENABLE_OPENACC` is defined, i.e., the Kokkos OpenACC ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_OPENACC` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception. + * * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC` macro if `KOKKOS_ENABLE_OPENACC` is defined, i.e., the Kokkos OpenACC ExecutionSpace is available. * @details If `KOKKOS_ENABLE_OPENACC` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. */ #if defined(KOKKOS_ENABLE_OPENACC) - #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENACC(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC(func) std::invoke(func) #else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENACC(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openacc) } #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC(func) \ throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::openacc) } #endif +//***************************************************// +// Kokkos::Threads // +//***************************************************// + /** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_THREADS + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_THREADS` macro if `KOKKOS_ENABLE_THREADS` is defined, i.e., the Kokkos std::thread ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_THREADS` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception. + * * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS` macro if `KOKKOS_ENABLE_THREADS` is defined, i.e., the Kokkos std::thread ExecutionSpace is available. * @details If `KOKKOS_ENABLE_THREADS` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. */ #if defined(KOKKOS_ENABLE_THREADS) - #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_THREADS(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS(func) std::invoke(func) #else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_THREADS(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::threads) } #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS(func) \ throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::threads) } #endif +//***************************************************// +// Kokkos::Serial // +//***************************************************// + /** + * @def PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SERIAL + * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SERIAL` macro if `KOKKOS_ENABLE_SERIAL` is defined, i.e., the Kokkos serial ExecutionSpace is available. + * @details If `KOKKOS_ENABLE_SERIAL` is enabled, invokes the provided function (normally a lambda function) and returns the return value, otherwise throws an exception. + * @note This ExecutionSpace *should* always be available! + * * @def PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL * @brief Defines the `PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL` macro if `KOKKOS_ENABLE_SERIAL` is defined, i.e., the Kokkos serial ExecutionSpace is available. * @details If `KOKKOS_ENABLE_SERIAL` is enabled, invokes the provided function (normally a lambda function), otherwise throws an exception. * @note This ExecutionSpace *should* always be available! */ #if defined(KOKKOS_ENABLE_SERIAL) - #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SERIAL(func) return std::invoke(func) + #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL(func) std::invoke(func) #else + #define PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SERIAL(func) \ + throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::serial) } #define PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL(func) \ throw backend_exception { fmt::format("The Kokkos ExecutionSpace {} is not available!", execution_space::serial) } #endif diff --git a/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp b/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp index 018edc48e..a3019829e 100644 --- a/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp +++ b/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp @@ -12,10 +12,9 @@ #ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_VIEW_WRAPPER_HPP_ #define PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_VIEW_WRAPPER_HPP_ -#include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_* -#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper -#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{execution_space, execution_space_to_kokkos_type_t}, plssvm::kokkos::detail::constexpr_available_execution_spaces -#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{execution_space, execution_space_to_kokkos_type_t}, plssvm::kokkos::detail::constexpr_available_execution_spaces +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t #include "Kokkos_Core.hpp" // Kokkos::View, Kokkos::ExecutionSpace diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index b097d42ef..6e0335e56 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -9,7 +9,7 @@ #include "plssvm/backends/Kokkos/csvm.hpp" #include "plssvm/backends/execution_range.hpp" // plssvm::detail::{execution_range, dim_type} -#include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_* +#include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_*, PLSSVM_KOKKOS_BACKEND_INVOKE_IF_ #include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr #include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::{device_wrapper, get_device_list} #include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::kokkos::detail::get_runtime_version // TODO: docu @@ -170,21 +170,18 @@ std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().cuda_device_prop().totalGlobalMem) }; } - return res; }); case execution_space::hip: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().hip_device_prop().totalGlobalMem) }; } - return res; }); case execution_space::sycl: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().sycl_queue().get_device().get_info<::sycl::info::device::global_mem_size>()) }; } - return res; }); case execution_space::openmp: case execution_space::hpx: @@ -195,12 +192,11 @@ std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const { case execution_space::openacc: throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; } - // all possible cases should be handled by the previous switch - // -> silence missing return statement compiler warnings due to throw statement - ::plssvm::detail::unreachable(); + return res; } std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const { + [[maybe_unused]] std::vector<::plssvm::detail::memory_size> res(this->num_available_devices()); // TODO: implement for other execution spaces switch (space_) { case execution_space::cuda: @@ -211,8 +207,8 @@ std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().sycl_queue().get_device().get_info<::sycl::info::device::max_mem_alloc_size>()) }; } - return res; }); + break; case execution_space::openmp: case execution_space::hpx: case execution_space::threads: @@ -222,9 +218,7 @@ std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const case execution_space::openacc: throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; } - // all possible cases should be handled by the previous switch - // -> silence missing return statement compiler warnings due to throw statement - ::plssvm::detail::unreachable(); + return res; } std::size_t csvm::get_max_work_group_size(const std::size_t device_id) const { @@ -233,15 +227,15 @@ std::size_t csvm::get_max_work_group_size(const std::size_t device_id) const { // TODO: implement for other execution spaces switch (space_) { case execution_space::cuda: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { + PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA([&]() { return static_cast(devices_[device_id].get().cuda_device_prop().maxThreadsPerBlock); }); case execution_space::hip: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { + PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP([&]() { return static_cast(devices_[device_id].get().hip_device_prop().maxThreadsPerBlock); }); case execution_space::sycl: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { + PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL([&]() { return devices_[device_id].get().sycl_queue().get_device().get_info<::sycl::info::device::max_work_group_size>(); }); case execution_space::openmp: @@ -266,14 +260,14 @@ ::plssvm::detail::dim_type csvm::get_max_grid_size(const std::size_t device_id) // TODO: implement for other execution spaces switch (space_) { case execution_space::cuda: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA(([&]() -> ::plssvm::detail::dim_type { + PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA(([&]() -> ::plssvm::detail::dim_type { // TODO: Kokkos only uses maxGridSize[0] const cudaDeviceProp &prop = devices_[device_id].get().cuda_device_prop(); return { static_cast(prop.maxGridSize[0]), static_cast(prop.maxGridSize[1]), static_cast(prop.maxGridSize[2]) }; })); case execution_space::hip: // TODO: Kokkos only uses maxGridSize[0] - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP(([&]() -> ::plssvm::detail::dim_type { + PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP(([&]() -> ::plssvm::detail::dim_type { const hipDeviceProp &prop = devices_[device_id].get().hip_device_prop(); return { static_cast(prop.maxGridSize[0]), static_cast(prop.maxGridSize[1]), static_cast(prop.maxGridSize[2]) }; })); diff --git a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp index add12def4..5fa580aae 100644 --- a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp @@ -35,8 +35,8 @@ std::vector get_device_list(const execution_space space, [[maybe // Note: it is important to pass the cudaStream_t lifetime to be managed by Kokkos devices.emplace_back(Kokkos::Cuda(stream, Kokkos::Impl::ManageStream::yes)); } - return devices; }); + break; case execution_space::hip: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { for (int device = 0; device < Kokkos::num_devices(); ++device) { @@ -48,20 +48,20 @@ std::vector get_device_list(const execution_space space, [[maybe // Note: it is important to pass the hipStream_t lifetime to be managed by Kokkos devices.emplace_back(Kokkos::Hip(stream, Kokkos::Impl::ManageStream::yes)); } - return devices; }); + break; case execution_space::sycl: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { // TODO: use all available devices -> not that trivial // TODO: handle target <- if provide queue -> managed? devices.emplace_back(Kokkos::SYCL{}); - return devices; }); + break; case execution_space::hpx: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX([&]() { devices.emplace_back(Kokkos::Hpx{}); - return devices; }); + break; case execution_space::openmp: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMP([&]() { // Note: if OpenMP should be used as device must be set in order for it to work! @@ -73,34 +73,31 @@ std::vector get_device_list(const execution_space space, [[maybe omp_set_nested(1); } devices.emplace_back(Kokkos::OpenMP{}); - return devices; }); + break; case execution_space::openmp_target: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET([&]() { // TODO: multi-GPU? devices.emplace_back(Kokkos::OpenMPTarget{}); - return devices; }); + break; case execution_space::openacc: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC([&]() { // TODO: multi-GPU? devices.emplace_back(Kokkos::OpenACC{}); - return devices; }); + break; case execution_space::threads: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS([&]() { devices.emplace_back(Kokkos::Threads{}); - return devices; }); case execution_space::serial: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL([&]() { devices.emplace_back(Kokkos::Serial{}); - return devices; }); + break; } - // all possible cases should be handled by the previous switch - // -> silence missing return statement compiler warnings due to throw statement - ::plssvm::detail::unreachable(); + return devices; } } // namespace plssvm::kokkos::detail diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp index eb02660a6..8619471a9 100644 --- a/src/plssvm/backends/Kokkos/detail/utility.cpp +++ b/src/plssvm/backends/Kokkos/detail/utility.cpp @@ -8,7 +8,7 @@ #include "plssvm/backends/Kokkos/detail/utility.hpp" -#include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_* +#include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_* #include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT @@ -41,20 +41,17 @@ std::map> available_target_platfor break; case execution_space::hip: // NVIDIA or AMD GPUs possible (both simultaneously are unsupported) -#if defined(KOKKOS_ENABLE_HIP) - #if defined(__HIP_PLATFORM_AMD__) - available_map[target_platform::gpu_amd].push_back(execution_space::hip); - #elif defined(__HIP_PLATFORM_NVIDIA__) - available_map[target_platform::gpu_nvidia].push_back(execution_space::hip); - #else - #error "Unknown HIP platform" - #endif + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { +#if defined(__HIP_PLATFORM_AMD__) + available_map[target_platform::gpu_amd].push_back(execution_space::hip); +#elif defined(__HIP_PLATFORM_NVIDIA__) + available_map[target_platform::gpu_nvidia].push_back(execution_space::hip); #endif + }); break; case execution_space::sycl: // list all potential target platforms currently available in SYCL -#if defined(KOKKOS_ENABLE_SYCL) - { + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { std::unordered_set targets{}; for (const auto &platform : sycl::platform::get_platforms()) { for (const auto &device : platform.get_devices()) { @@ -83,8 +80,7 @@ std::map> available_target_platfor for (const target_platform target : targets) { available_map[target].push_back(execution_space::sycl); } - } -#endif + }); break; case execution_space::openacc: // TODO: restrict to available devices @@ -122,15 +118,15 @@ std::map> available_target_platfor std::string get_device_name([[maybe_unused]] const device_wrapper &dev) { switch (dev.get_execution_space()) { case execution_space::cuda: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { + PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA([&]() { return std::string{ dev.get().cuda_device_prop().name }; }); case execution_space::hip: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { + PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP([&]() { return std::string{ dev.get().hip_device_prop().name }; }); case execution_space::sycl: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { + PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL([&]() { return dev.get().sycl_queue.get_device().get_info(); }); case execution_space::hpx: From b5174df96016665fdc47656c950131ee93c2fccc Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 5 Nov 2024 11:04:48 +0100 Subject: [PATCH 043/123] Add missing break statements. --- src/plssvm/backends/Kokkos/csvm.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index 6e0335e56..0fa73773e 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -171,18 +171,21 @@ std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const { res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().cuda_device_prop().totalGlobalMem) }; } }); + break; case execution_space::hip: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().hip_device_prop().totalGlobalMem) }; } }); + break; case execution_space::sycl: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().sycl_queue().get_device().get_info<::sycl::info::device::global_mem_size>()) }; } }); + break; case execution_space::openmp: case execution_space::hpx: case execution_space::threads: From 25105ebc1cf84693dc10287d17ed23b195a29dc8 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 5 Nov 2024 11:24:15 +0100 Subject: [PATCH 044/123] Add missing make_device_view_wrapper test. --- .../Kokkos/detail/device_view_wrapper.cpp | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tests/backends/Kokkos/detail/device_view_wrapper.cpp b/tests/backends/Kokkos/detail/device_view_wrapper.cpp index c794072b4..28dc97cba 100644 --- a/tests/backends/Kokkos/detail/device_view_wrapper.cpp +++ b/tests/backends/Kokkos/detail/device_view_wrapper.cpp @@ -10,12 +10,15 @@ #include "plssvm/backends/Kokkos/detail/device_view_wrapper.hpp" -#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{execution_space, kokkos_type_to_execution_space_v} +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{execution_space, kokkos_type_to_execution_space_v} #include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace, Kokkos::View #include "gtest/gtest.h" // TEST, EXPECT_EQ, EXPECT_TRUE, EXPECT_FALSE +#include // std::size_t + TEST(KokkosDeviceViewWrapper, default_construct) { // default construct a device view wrapper const plssvm::kokkos::detail::device_view_wrapper view{}; @@ -76,4 +79,17 @@ TEST(KokkosDeviceViewWrapper, inequality) { EXPECT_FALSE(view1 != view2); } -// TODO: make_device_view_wrapper \ No newline at end of file +TEST(KokkosDeviceViewWrapper, make_device_view_wrapper) { + // create a device wrapper for the Kokkos::DefaultExecutionSpace + const plssvm::kokkos::detail::device_wrapper device{ Kokkos::DefaultExecutionSpace{} }; + + // create device view wrapper + const plssvm::kokkos::detail::device_view_wrapper view = plssvm::kokkos::detail::make_device_view_wrapper(device, 42); + + // check that the returned Kokkos::View has the correct type + constexpr plssvm::kokkos::execution_space space = plssvm::kokkos::kokkos_type_to_execution_space_v; + ::testing::StaticAssertTypeEq()), const Kokkos::View &>(); + + // check the number of elements + EXPECT_EQ(view.get().size(), std::size_t{ 42 }); +} From a1a2fdb5904a475117bc2b7e196ceceb53d91ac8 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 5 Nov 2024 15:34:11 +0100 Subject: [PATCH 045/123] Remove dim_type::total_size in favor of Kokkos specific dim_type_to_native function. --- .../plssvm/backends/Kokkos/detail/utility.hpp | 8 ++ include/plssvm/backends/execution_range.hpp | 12 +-- src/plssvm/backends/Kokkos/csvm.cpp | 73 +++++++++++++------ src/plssvm/backends/Kokkos/detail/utility.cpp | 5 ++ tests/backends/Kokkos/detail/utility.cpp | 12 +++ tests/backends/execution_range.cpp | 14 ---- 6 files changed, 76 insertions(+), 48 deletions(-) diff --git a/include/plssvm/backends/Kokkos/detail/utility.hpp b/include/plssvm/backends/Kokkos/detail/utility.hpp index fe8b0367f..9bbc9b172 100644 --- a/include/plssvm/backends/Kokkos/detail/utility.hpp +++ b/include/plssvm/backends/Kokkos/detail/utility.hpp @@ -13,6 +13,7 @@ #define PLSSVM_BACKENDS_KOKKOS_DETAIL_UTILITY_HPP_ #pragma once +#include "plssvm/backends/execution_range.hpp" // plssvm::detail::dim_type #include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/detail/type_traits.hpp" // PLSSVM_REQUIRES @@ -54,6 +55,13 @@ inline constexpr bool is_type_in_variant_v = is_type_in_variant::val } // namespace impl +/** + * @brief Convert a `plssvm::detail::dim_type` to a Kokkos native one-dimensional value. + * @param[in] dims the dimensional value to convert + * @return the native one-dimensional value (`[[nodiscard]]`) + */ +[[nodiscard]] int dim_type_to_native(const ::plssvm::detail::dim_type &dims); + /** * @brief Return a `std::map` containing a mapping from all available target platforms to the available Kokkos::ExecutionSpace that supports said target platform. * @details If a target platform is supported by multiple Kokkos::ExecutionSpace, the order is determined by the order as returned by `list_available_execution_spaces`. diff --git a/include/plssvm/backends/execution_range.hpp b/include/plssvm/backends/execution_range.hpp index c44aab7b6..5be842f9a 100644 --- a/include/plssvm/backends/execution_range.hpp +++ b/include/plssvm/backends/execution_range.hpp @@ -12,6 +12,8 @@ #ifndef PLSSVM_BACKENDS_EXECUTION_RANGE_HPP_ #define PLSSVM_BACKENDS_EXECUTION_RANGE_HPP_ +#include "plssvm/backend_types.hpp" // plssvm::backend_type + #include "fmt/base.h" // fmt::formatter #include "fmt/ostream.h" // fmt::ostream_formatter @@ -77,15 +79,6 @@ struct [[nodiscard]] dim_type { swap_ull(z, other.z); } - /** - * @brief Return the total number of elements in the dimensional type. - * @details Equal to: `x * y * z`. - * @return the total number of elements (`[[nodiscard]]`) - */ - [[nodiscard]] constexpr unsigned long long total_size() const noexcept { - return x * y * z; - } - /// The dimensional size in x direction. unsigned long long x{ 1 }; /// The dimensional size in y direction. @@ -170,7 +163,6 @@ struct execution_range { /// The grids. Multiple grids are used, if the grid sizes would exceed the maximum allowed number. Also stores the offsets for the respective grids used in the kernels. /// Note: no default initialization due to a linker error occurring with NVIDIA's nvhpc! std::vector grids; - }; /** diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index 0fa73773e..6c7807527 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -312,16 +312,20 @@ auto csvm::run_assemble_kernel_matrix_explicit(const std::size_t device_id, cons const real_type cost_factor = real_type{ 1.0 } / params.cost; const std::size_t scratch_memory_size = static_cast(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); - // save the team sizes - const ::plssvm::detail::dim_type team_sizes = exec.block; + // save the team size + const int team_size = detail::dim_type_to_native(exec.block); return devices_[device_id].execute_and_return([&](auto &device) { using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; constexpr execution_space space = kokkos_type_to_execution_space_v; for (const auto &[partial_grid, offsets] : exec.grids) { + // convert execution range partial_grid to Kokkos' native one-dimensional size + const int native_partial_grid = detail::dim_type_to_native(partial_grid); + // create a Kokkos TeamPolicy - Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()) }; + Kokkos::TeamPolicy team_policy{ device, native_partial_grid, team_size }; + // TODO: test MDRangeTeamPolicy?! switch (params.kernel_type) { case kernel_function_type::linear: @@ -383,25 +387,31 @@ void csvm::run_blas_level_3_kernel_explicit(const std::size_t device_id, const : // the necessary amount of scratch memory for the kernels const std::size_t scratch_memory_size = static_cast(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); - // save the team sizes - const ::plssvm::detail::dim_type team_sizes = exec.block; + // save the team size + const int team_size = detail::dim_type_to_native(exec.block); for (const auto &[partial_grid, offsets] : exec.grids) { + // convert execution range partial_grid to Kokkos' native one-dimensional size + const int native_partial_grid = detail::dim_type_to_native(partial_grid); + // create a Kokkos TeamPolicy - Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + Kokkos::TeamPolicy team_policy{ device, native_partial_grid, team_size }; Kokkos::parallel_for("blas_level_3_kernel_explicit", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_symm{ num_rows, num_rhs, device_specific_num_rows, row_offset, alpha, A_d.get().get(), B_d.get().get(), beta, C_d.get().get(), offsets.x, offsets.y, partial_grid.x }); } - // save the mirror team sizes - const ::plssvm::detail::dim_type mirror_team_sizes = mirror_exec.block; + // save the team size + const int mirror_team_size = detail::dim_type_to_native(mirror_exec.block); for (const auto &[partial_grid, offsets] : mirror_exec.grids) { const unsigned long long num_mirror_rows = num_rows - row_offset - device_specific_num_rows; if (num_mirror_rows > 0) { + // convert execution range partial_grid to Kokkos' native one-dimensional size + const int native_partial_grid = detail::dim_type_to_native(partial_grid); + // create a Kokkos TeamPolicy - Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(mirror_team_sizes.total_size()), Kokkos::AUTO }; + Kokkos::TeamPolicy team_policy{ device, native_partial_grid, mirror_team_size }; Kokkos::parallel_for("blas_level_3_kernel_explicit_mirror", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_symm_mirror{ num_rows, num_rhs, num_mirror_rows, device_specific_num_rows, row_offset, alpha, A_d.get().get(), B_d.get().get(), beta, C_d.get().get(), offsets.x, offsets.y, partial_grid.x }); } @@ -417,12 +427,15 @@ void csvm::run_inplace_matrix_addition(const std::size_t device_id, const ::plss using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; constexpr execution_space space = kokkos_type_to_execution_space_v; - // save the team sizes - const ::plssvm::detail::dim_type team_sizes = exec.block; + // save the team size + const int team_size = detail::dim_type_to_native(exec.block); for (const auto &[partial_grid, offsets] : exec.grids) { + // convert execution range partial_grid to Kokkos' native one-dimensional size + const int native_partial_grid = detail::dim_type_to_native(partial_grid); + // create a Kokkos TeamPolicy - Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + Kokkos::TeamPolicy team_policy{ device, native_partial_grid, team_size }; Kokkos::parallel_for("inplace_matrix_addition", team_policy, detail::device_kernel_inplace_matrix_add{ num_rhs, lhs_d.get().get(), rhs_d.get().get(), offsets.x, offsets.y, partial_grid.x }); } @@ -437,12 +450,15 @@ void csvm::run_inplace_matrix_scale(const std::size_t device_id, const ::plssvm: using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; constexpr execution_space space = kokkos_type_to_execution_space_v; - // save the team sizes - const ::plssvm::detail::dim_type team_sizes = exec.block; + // save the team size + const int team_size = detail::dim_type_to_native(exec.block); for (const auto &[partial_grid, offsets] : exec.grids) { + // convert execution range partial_grid to Kokkos' native one-dimensional size + const int native_partial_grid = detail::dim_type_to_native(partial_grid); + // create a Kokkos TeamPolicy - Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + Kokkos::TeamPolicy team_policy{ device, native_partial_grid, team_size }; Kokkos::parallel_for("inplace_matrix_scale", team_policy, detail::device_kernel_inplace_matrix_scale{ num_rhs, lhs_d.get().get(), scale, offsets.x, offsets.y, partial_grid.x }); } @@ -467,12 +483,15 @@ void csvm::run_assemble_kernel_matrix_implicit_blas_level_3(const std::size_t de const real_type cost_factor = real_type{ 1.0 } / params.cost; const std::size_t scratch_memory_size = static_cast(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); - // save the team sizes - const ::plssvm::detail::dim_type team_sizes = exec.block; + // save the team size + const int team_size = detail::dim_type_to_native(exec.block); for (const auto &[partial_grid, offsets] : exec.grids) { + // convert execution range partial_grid to Kokkos' native one-dimensional size + const int native_partial_grid = detail::dim_type_to_native(partial_grid); + // create a Kokkos TeamPolicy - Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + Kokkos::TeamPolicy team_policy{ device, native_partial_grid, team_size }; switch (params.kernel_type) { case kernel_function_type::linear: @@ -534,16 +553,19 @@ auto csvm::run_w_kernel(const std::size_t device_id, const ::plssvm::detail::exe const std::size_t scratch_memory_size = static_cast(2u * THREAD_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); - // save the team sizes - const ::plssvm::detail::dim_type team_sizes = exec.block; + // save the team size + const int team_size = detail::dim_type_to_native(exec.block); return devices_[device_id].execute_and_return([&](auto &device) { using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; constexpr execution_space space = kokkos_type_to_execution_space_v; for (const auto &[partial_grid, offsets] : exec.grids) { + // convert execution range partial_grid to Kokkos' native one-dimensional size + const int native_partial_grid = detail::dim_type_to_native(partial_grid); + // create a Kokkos TeamPolicy - Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + Kokkos::TeamPolicy team_policy{ device, native_partial_grid, team_size }; Kokkos::parallel_for("w_kernel", team_policy.set_scratch_size(0, Kokkos::PerTeam(scratch_memory_size)), detail::device_kernel_w_linear{ w_d.get().get(), alpha_d.get().get(), sv_d.get().get(), num_classes, num_sv, device_specific_num_sv, sv_offset, offsets.x, offsets.y, partial_grid.x }); } @@ -563,16 +585,19 @@ auto csvm::run_predict_kernel(const std::size_t device_id, const ::plssvm::detai const std::size_t scratch_memory_size = static_cast(2u * FEATURE_BLOCK_SIZE * THREAD_BLOCK_SIZE * INTERNAL_BLOCK_SIZE) * sizeof(real_type); - // save the team sizes - const ::plssvm::detail::dim_type team_sizes = exec.block; + // save the team size + const int team_size = detail::dim_type_to_native(exec.block); return devices_[device_id].execute_and_return([&](auto &device) { using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; constexpr execution_space space = kokkos_type_to_execution_space_v; for (const auto &[partial_grid, offsets] : exec.grids) { + // convert execution range partial_grid to Kokkos' native one-dimensional size + const int native_partial_grid = detail::dim_type_to_native(partial_grid); + // create a Kokkos TeamPolicy - Kokkos::TeamPolicy team_policy{ device, static_cast(partial_grid.total_size()), static_cast(team_sizes.total_size()), Kokkos::AUTO }; + Kokkos::TeamPolicy team_policy{ device, native_partial_grid, team_size }; switch (params.kernel_type) { case kernel_function_type::linear: diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp index 8619471a9..b5451bc2c 100644 --- a/src/plssvm/backends/Kokkos/detail/utility.cpp +++ b/src/plssvm/backends/Kokkos/detail/utility.cpp @@ -8,6 +8,7 @@ #include "plssvm/backends/Kokkos/detail/utility.hpp" +#include "plssvm/backends/execution_range.hpp" // plssvm::detail::dim_type #include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_* #include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space @@ -28,6 +29,10 @@ namespace plssvm::kokkos::detail { +int dim_type_to_native(const ::plssvm::detail::dim_type &dims) { + return static_cast(dims.x * dims.y * dims.z); +} + std::map> available_target_platform_to_execution_space_mapping() { std::map> available_map{}; diff --git a/tests/backends/Kokkos/detail/utility.cpp b/tests/backends/Kokkos/detail/utility.cpp index 7c6d491d5..ec18a977b 100644 --- a/tests/backends/Kokkos/detail/utility.cpp +++ b/tests/backends/Kokkos/detail/utility.cpp @@ -10,6 +10,7 @@ #include "plssvm/backends/Kokkos/detail/utility.hpp" +#include "plssvm/backends/execution_range.hpp" // plssvm::detail::dim_type #include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper #include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{execution_space, kokkos_type_to_execution_space_v} @@ -43,6 +44,17 @@ TEST(KokkosUtility, is_type_in_variant) { EXPECT_FALSE((plssvm::kokkos::detail::impl::is_type_in_variant_v) ); } +TEST(KokkosUtility, dim_type_to_native) { + // create a dim_type + constexpr plssvm::detail::dim_type dim{ 128ull, 64ull, 32ull }; + + // convert it to a Kokkos one-dimensional execution range + const int native_dim = plssvm::kokkos::detail::dim_type_to_native(dim); + + // check values for correctness + EXPECT_EQ(native_dim, 262'144); // = 128 * 64 * 32 +} + TEST(KokkosUtility, available_target_platform_to_execution_space_mapping) { // get the target_platform <-> execution_space mappings const std::map> mapping = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping(); diff --git a/tests/backends/execution_range.cpp b/tests/backends/execution_range.cpp index 866dae83a..75fe16ef2 100644 --- a/tests/backends/execution_range.cpp +++ b/tests/backends/execution_range.cpp @@ -94,20 +94,6 @@ TEST(DimType, swap_free_function) { EXPECT_EQ(dim2.z, 1ull); } -TEST(DimType, total_size) { - // create dim types - constexpr plssvm::detail::dim_type dim1{}; - constexpr plssvm::detail::dim_type dim2{ 64ull }; - constexpr plssvm::detail::dim_type dim3{ 64ull, 32ull }; - constexpr plssvm::detail::dim_type dim4{ 64ull, 32ull, 16ull }; - - // test total_size function - EXPECT_EQ(dim1.total_size(), 1ull); - EXPECT_EQ(dim2.total_size(), 64ull); - EXPECT_EQ(dim3.total_size(), 2048ull); - EXPECT_EQ(dim4.total_size(), 32768ull); -} - TEST(DimType, equality) { // create dim types constexpr plssvm::detail::dim_type dim1{}; From 208055ebabb7cf7d70d3ba15d6c93d24d076fcd9 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 5 Nov 2024 15:35:08 +0100 Subject: [PATCH 046/123] Fix maximum grid size problems when using Kokkos (since Kokkos only supports a one-dimensional execution range). --- src/plssvm/backends/Kokkos/csvm.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index 6c7807527..157e08685 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -38,6 +38,7 @@ #include "fmt/core.h" // fmt::format #include "fmt/format.h" // fmt::format +#include // std::sqrt #include // std::size_t #include // std::terminate #include // std::cout, std::endl @@ -260,22 +261,24 @@ std::size_t csvm::get_max_work_group_size(const std::size_t device_id) const { ::plssvm::detail::dim_type csvm::get_max_grid_size(const std::size_t device_id) const { PLSSVM_ASSERT(device_id < this->num_available_devices(), "Invalid device {} requested!", device_id); + // NOTE: Kokkos only supports one-dimensional execution ranges! + // NOTE: we only use two-dimensional kernels! // TODO: implement for other execution spaces switch (space_) { case execution_space::cuda: PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA(([&]() -> ::plssvm::detail::dim_type { - // TODO: Kokkos only uses maxGridSize[0] const cudaDeviceProp &prop = devices_[device_id].get().cuda_device_prop(); - return { static_cast(prop.maxGridSize[0]), static_cast(prop.maxGridSize[1]), static_cast(prop.maxGridSize[2]) }; + const auto max_grid_size = static_cast(std::sqrt(prop.maxGridSize[0])); + return { max_grid_size, max_grid_size, std::size_t{ 1 } }; })); case execution_space::hip: - // TODO: Kokkos only uses maxGridSize[0] PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP(([&]() -> ::plssvm::detail::dim_type { const hipDeviceProp &prop = devices_[device_id].get().hip_device_prop(); - return { static_cast(prop.maxGridSize[0]), static_cast(prop.maxGridSize[1]), static_cast(prop.maxGridSize[2]) }; + const auto max_grid_size = static_cast(std::sqrt(prop.maxGridSize[0])); + return { max_grid_size, max_grid_size, std::size_t{ 1 } }; })); case execution_space::openmp: - return { 16, 16, 16 }; // TODO: correct values + return { 16, 16, 1 }; // TODO: correct values case execution_space::serial: return { 1, 1, 1 }; // TODO: correct values case execution_space::sycl: From 2d82a248dcd95a45a5872f1d35801c1189f972e8 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 5 Nov 2024 15:46:19 +0100 Subject: [PATCH 047/123] Fix TODO. --- src/plssvm/backends/Kokkos/CMakeLists.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/plssvm/backends/Kokkos/CMakeLists.txt b/src/plssvm/backends/Kokkos/CMakeLists.txt index 20ae3c0a6..d2bf6addf 100644 --- a/src/plssvm/backends/Kokkos/CMakeLists.txt +++ b/src/plssvm/backends/Kokkos/CMakeLists.txt @@ -39,11 +39,6 @@ target_link_libraries(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC Kokkos::kokko # link base library against Kokkos library target_link_libraries(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC ${PLSSVM_BASE_LIBRARY_NAME}) -# set whether the kernel source should be compiled with fast math enabled or not # TODO: enable fast-math -#if (PLSSVM_ENABLE_FAST_MATH) -# target_compile_definitions(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE PLSSVM_ENABLE_FAST_MATH) -#endif () - # set compile definition that the Kokkos backend is available target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_HAS_KOKKOS_BACKEND) target_compile_definitions(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC PLSSVM_HAS_KOKKOS_BACKEND) From b130e15d68abfae5af2677b9c5716f7fa92b5ac9 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 5 Nov 2024 20:57:29 +0100 Subject: [PATCH 048/123] Enable the Kokkos CSVM in the Python bindings. --- bindings/Python/CMakeLists.txt | 3 + bindings/Python/README.md | 18 +++++- bindings/Python/backends/kokkos_csvm.cpp | 72 ++++++++++++++++++++++++ bindings/Python/main.cpp | 4 ++ 4 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 bindings/Python/backends/kokkos_csvm.cpp diff --git a/bindings/Python/CMakeLists.txt b/bindings/Python/CMakeLists.txt index 0ae329356..8dfba2e04 100644 --- a/bindings/Python/CMakeLists.txt +++ b/bindings/Python/CMakeLists.txt @@ -94,6 +94,9 @@ endif () if (TARGET ${PLSSVM_SYCL_BACKEND_DPCPP_LIBRARY_NAME}) list(APPEND PLSSVM_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/backends/dpcpp_csvm.cpp) endif () +if (TARGET ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME}) + list(APPEND PLSSVM_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/backends/kokkos_csvm.cpp) +endif () # create pybind11 module set(PLSSVM_PYTHON_BINDINGS_LIBRARY_NAME plssvm) diff --git a/bindings/Python/README.md b/bindings/Python/README.md index b7825de52..7d6140bc3 100644 --- a/bindings/Python/README.md +++ b/bindings/Python/README.md @@ -10,7 +10,7 @@ - [plssvm.Parameter](#plssvmparameter) - [plssvm.DataSet](#plssvmdataset) - [plssvm.CSVM](#plssvmcsvm) - - [plssvm.openmp.CSVM, plssvm.stdpar.CSVM, plssvm.cuda.CSVM, plssvm.hip.CSVM, plssvm.opencl.CSVM, plssvm.sycl.CSVM, plssvm.dpcpp.CSVM, plssvm.adaptivecpp.CSVM](#plssvmopenmpcsvm-plssvmcudacsvm-plssvmhipcsvm-plssvmopenclcsvm-plssvmsyclcsvm-plssvmdpcppcsvm-plssvmadaptivecppcsvm) + - [plssvm.openmp.CSVM, plssvm.stdpar.CSVM, plssvm.cuda.CSVM, plssvm.hip.CSVM, plssvm.opencl.CSVM, plssvm.sycl.CSVM, plssvm.dpcpp.CSVM, plssvm.adaptivecpp.CSVM, plssvm.kokkos.CSVM](#plssvmopenmpcsvm-plssvmcudacsvm-plssvmhipcsvm-plssvmopenclcsvm-plssvmsyclcsvm-plssvmdpcppcsvm-plssvmadaptivecppcsvm) - [plssvm.Model](#plssvmmodel) - [plssvm.Version](#plssvmversion) - [plssvm.detail.tracking.PerformanceTracker](#plssvmdetailtrackingperformancetracker) @@ -211,6 +211,12 @@ If the stdpar backend is available, an additional enumeration is available: |----------------------|---------------------------------------------------------------|-------------------------------------------------| | `ImplementationType` | `NVHPC`, `ROC_STDPAR`, `INTEL_LLVM`, `ADAPTIVECPP`, `GNU_TBB` | The different supported stdpar implementations. | +If the Kokos backend is available, an additional enumeration is available: + +| enumeration | values | description | +|------------------|----------------------------------------------------------------------------------------|--------------------------------------------------| +| `ExecutionSpace` | `CUDA`, `HIP`, `SYCL`, `HPX`, `OPENMP`, `OPENMPTARGET`, `OPENACC`, `THREADS`, `SERIAL` | The different supported Kokkos execution spaces. | + ### Classes and submodules The following tables list all PLSSVM classes exposed on the Python side: @@ -347,7 +353,7 @@ and `sycl_kernel_invocation_type` to choose between the two different SYCL kerne | `score(model)` | Score the model with respect to itself returning its accuracy. | | `score(model, data_set)` | Score the model given the provided data set returning its accuracy. | -#### `plssvm.openmp.CSVM`, `plssvm.stdpar.CSVM`, plssvm.cuda.CSVM`, `plssvm.hip.CSVM`, `plssvm.opencl.CSVM`, `plssvm.sycl.CSVM`, `plssvm.dpcpp.CSVM`, `plssvm.adaptivecpp.CSVM` +#### `plssvm.openmp.CSVM`, `plssvm.stdpar.CSVM`, plssvm.cuda.CSVM`, `plssvm.hip.CSVM`, `plssvm.opencl.CSVM`, `plssvm.sycl.CSVM`, `plssvm.dpcpp.CSVM`, `plssvm.adaptivecpp.CSVM`, `plssvm.kokkos.CSVM` These classes represent the backend specific CSVMs. **Note**: they are only available if the respective backend has been enabled during PLSSVM's build step. @@ -385,6 +391,14 @@ CSVM. |-----------------------------|---------------------------------------------| | `get_implementation_type()` | Return the used stdpar implementation type. | +In case of the Kokkos CSVM (`plssvm.kokkos.CSVM`) the following method is additional available for the backend specific +CSVM. + + +| methods | description | +|-------------------------|-----------------------------------------| +| `get_execution_space()` | Return the used Kokkos execution space. | + #### `plssvm.Model` A class encapsulating a model learned during a call to `plssvm.CSVM.fit()`. diff --git a/bindings/Python/backends/kokkos_csvm.cpp b/bindings/Python/backends/kokkos_csvm.cpp new file mode 100644 index 000000000..ea0dc17e6 --- /dev/null +++ b/bindings/Python/backends/kokkos_csvm.cpp @@ -0,0 +1,72 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "plssvm/backends/Kokkos/csvm.hpp" // plssvm::kokkos::csvm +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/csvm.hpp" // plssvm::csvm +#include "plssvm/exceptions/exceptions.hpp" // plssvm::exception +#include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/target_platforms.hpp" // plssvm::target_platform + +#include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception + +#include "pybind11/pybind11.h" // py::module_, py::class_, py::init +#include "pybind11/stl.h" // support for STL types + +#include // std::make_unique + +namespace py = pybind11; + +void init_kokkos_csvm(py::module_ &m, const py::exception &base_exception) { + // use its own submodule for the Kokkos CSVM bindings + py::module_ kokkos_module = m.def_submodule("kokkos", "a module containing all Kokkos backend specific functionality"); + + // bind the CSVM using the Kokkos backend + py::class_(kokkos_module, "CSVM") + .def(py::init<>(), "create an SVM with the automatic target platform and default parameter object") + .def(py::init(), "create an SVM with the automatic target platform and provided parameter object") + .def(py::init(), "create an SVM with the provided target platform and default parameter object") + .def(py::init(), "create an SVM with the provided target platform and parameter object") + .def(py::init([](const py::kwargs &args) { + // check for valid keys + check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + // if one of the value keyword parameter is provided, set the respective value + const plssvm::parameter params = convert_kwargs_to_parameter(args); + // create CSVM with the default target platform + return std::make_unique(params); + }), + "create an SVM with the default target platform and keyword arguments") + .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) { + // check for valid keys + check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + // if one of the value keyword parameter is provided, set the respective value + const plssvm::parameter params = convert_kwargs_to_parameter(args); + // create CSVM with the provided target platform + return std::make_unique(target, params); + }), + "create an SVM with the provided target platform and keyword arguments") + .def("get_execution_space", &plssvm::kokkos::csvm::get_execution_space, "get the Kokkos execution space used in this Kokkos SVM"); + + // register Kokkos backend specific exceptions + register_py_exception(kokkos_module, "BackendError", base_exception); + + // bind the execution space enum classes + py::enum_(kokkos_module, "ExecutionSpace") + .value("CUDA", plssvm::kokkos::execution_space::cuda, "execution space representing execution on a CUDA device") + .value("HIP", plssvm::kokkos::execution_space::hip, "execution space representing execution on a device supported by HIP") + .value("SYCL", plssvm::kokkos::execution_space::sycl, "execution space representing execution on a device supported by SYCL") + .value("HPX", plssvm::kokkos::execution_space::hpx, "execution space representing execution with the HPX runtime system") + .value("OPENMP", plssvm::kokkos::execution_space::openmp, "execution space representing execution with the OpenMP runtime system") + .value("OPENMPTARGET", plssvm::kokkos::execution_space::openmp_target, "execution space representing execution using the target offloading feature of the OpenMP runtime system") + .value("OPENACC", plssvm::kokkos::execution_space::openacc, "execution space representing execution with the OpenACC runtime system") + .value("THREADS", plssvm::kokkos::execution_space::threads, "execution space representing parallel execution with std::threads") + .value("SERIAL", plssvm::kokkos::execution_space::serial, "execution space representing serial execution on the CPU; should always be available"); + + kokkos_module.def("list_available_execution_spaces", &plssvm::kokkos::list_available_execution_spaces, "list all available Kokkos execution spaces"); +} diff --git a/bindings/Python/main.cpp b/bindings/Python/main.cpp index 10fbafbef..c49d57092 100644 --- a/bindings/Python/main.cpp +++ b/bindings/Python/main.cpp @@ -39,6 +39,7 @@ void init_cuda_csvm(py::module_ &, const py::exception &); void init_hip_csvm(py::module_ &, const py::exception &); void init_opencl_csvm(py::module_ &, const py::exception &); void init_sycl(py::module_ &, const py::exception &); +void init_kokkos_csvm(py::module_ &, const py::exception &); void init_sklearn(py::module_ &); PYBIND11_MODULE(plssvm, m) { @@ -99,6 +100,9 @@ PYBIND11_MODULE(plssvm, m) { #if defined(PLSSVM_HAS_SYCL_BACKEND) init_sycl(m, base_exception); #endif +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + init_kokkos_csvm(m, base_exception); +#endif init_sklearn(m); } From f0fc6aefe38a450131dcb3c19630cecb26f33206 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 5 Nov 2024 20:57:47 +0100 Subject: [PATCH 049/123] Improve documentation. --- include/plssvm/backends/Kokkos/execution_space.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/plssvm/backends/Kokkos/execution_space.hpp b/include/plssvm/backends/Kokkos/execution_space.hpp index bb37a39a7..07ecadf24 100644 --- a/include/plssvm/backends/Kokkos/execution_space.hpp +++ b/include/plssvm/backends/Kokkos/execution_space.hpp @@ -44,7 +44,7 @@ enum class execution_space { openacc, /** Execution space representing parallel execution with std::threads. */ threads, - /** Execution space representing serial execution on the CPU. Always available. */ + /** Execution space representing serial execution on the CPU. Should always be available. */ serial }; From 2b69199d4b50f040070dff159c7fed7bb384398d Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 5 Nov 2024 21:52:34 +0100 Subject: [PATCH 050/123] Add missing const. --- src/plssvm/backends/Kokkos/csvm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index 157e08685..806e908c0 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -438,7 +438,7 @@ void csvm::run_inplace_matrix_addition(const std::size_t device_id, const ::plss const int native_partial_grid = detail::dim_type_to_native(partial_grid); // create a Kokkos TeamPolicy - Kokkos::TeamPolicy team_policy{ device, native_partial_grid, team_size }; + const Kokkos::TeamPolicy team_policy{ device, native_partial_grid, team_size }; Kokkos::parallel_for("inplace_matrix_addition", team_policy, detail::device_kernel_inplace_matrix_add{ num_rhs, lhs_d.get().get(), rhs_d.get().get(), offsets.x, offsets.y, partial_grid.x }); } @@ -461,7 +461,7 @@ void csvm::run_inplace_matrix_scale(const std::size_t device_id, const ::plssvm: const int native_partial_grid = detail::dim_type_to_native(partial_grid); // create a Kokkos TeamPolicy - Kokkos::TeamPolicy team_policy{ device, native_partial_grid, team_size }; + const Kokkos::TeamPolicy team_policy{ device, native_partial_grid, team_size }; Kokkos::parallel_for("inplace_matrix_scale", team_policy, detail::device_kernel_inplace_matrix_scale{ num_rhs, lhs_d.get().get(), scale, offsets.x, offsets.y, partial_grid.x }); } From 0f5663f285d638a6427e6682e5535e5f127ae1f9 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Wed, 6 Nov 2024 14:50:51 +0100 Subject: [PATCH 051/123] Change wrong Kokkos::Hip to Kokkos::HIP. --- include/plssvm/backends/Kokkos/detail/conditional_execution.hpp | 2 +- src/plssvm/backends/Kokkos/detail/device_wrapper.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp b/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp index 95d4c8300..752405f76 100644 --- a/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp +++ b/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp @@ -48,7 +48,7 @@ namespace plssvm::kokkos::detail { #endif //***************************************************// -// Kokkos::Hip // +// Kokkos::HIP // //***************************************************// /** diff --git a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp index 5fa580aae..10013d227 100644 --- a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp @@ -46,7 +46,7 @@ std::vector get_device_list(const execution_space space, [[maybe hipStreamCreate(&stream); // create Kokkos execution space for the specific device // Note: it is important to pass the hipStream_t lifetime to be managed by Kokkos - devices.emplace_back(Kokkos::Hip(stream, Kokkos::Impl::ManageStream::yes)); + devices.emplace_back(Kokkos::HIP(stream, Kokkos::Impl::ManageStream::yes)); } }); break; From 0bcd6d74a147c2191ea1281bab729bbecaaf2607 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Wed, 6 Nov 2024 14:51:46 +0100 Subject: [PATCH 052/123] Add missing namespace qualifiers and function call parenthesis. --- src/plssvm/backends/Kokkos/detail/utility.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp index b5451bc2c..bd07e4fab 100644 --- a/src/plssvm/backends/Kokkos/detail/utility.cpp +++ b/src/plssvm/backends/Kokkos/detail/utility.cpp @@ -58,7 +58,7 @@ std::map> available_target_platfor // list all potential target platforms currently available in SYCL PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { std::unordered_set targets{}; - for (const auto &platform : sycl::platform::get_platforms()) { + for (const auto &platform : ::sycl::platform::get_platforms()) { for (const auto &device : platform.get_devices()) { // Note: Kokkos is Intel LLVM/DPC++/icpx only -> we can use the specific implementation defined enum values if (device.is_cpu()) { @@ -132,7 +132,7 @@ std::string get_device_name([[maybe_unused]] const device_wrapper &dev) { }); case execution_space::sycl: PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL([&]() { - return dev.get().sycl_queue.get_device().get_info(); + return dev.get().sycl_queue().get_device().get_info<::sycl::info::device::name>(); }); case execution_space::hpx: return "HPX CPU host device"; From f38fe3777f65917d55df96acce342905a0037cf4 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Wed, 6 Nov 2024 14:52:02 +0100 Subject: [PATCH 053/123] Add missing switch break. --- src/plssvm/backends/Kokkos/detail/device_wrapper.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp index 10013d227..b232a3316 100644 --- a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp @@ -91,6 +91,7 @@ std::vector get_device_list(const execution_space space, [[maybe PLSSVM_KOKKOS_BACKEND_INVOKE_IF_THREADS([&]() { devices.emplace_back(Kokkos::Threads{}); }); + break; case execution_space::serial: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SERIAL([&]() { devices.emplace_back(Kokkos::Serial{}); From c9cd1c5b7d5b9f617ed31164db64ac0c13cf4727 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Wed, 6 Nov 2024 16:50:43 +0100 Subject: [PATCH 054/123] Fix usage of wrong Kokkos HPX namespace. --- include/plssvm/backends/Kokkos/detail/conditional_execution.hpp | 2 +- src/plssvm/backends/Kokkos/detail/device_wrapper.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp b/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp index 752405f76..20fc118f6 100644 --- a/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp +++ b/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp @@ -94,7 +94,7 @@ namespace plssvm::kokkos::detail { #endif //***************************************************// -// Kokkos::HPX // +// Kokkos::Experimental::HPX // //***************************************************// /** diff --git a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp index b232a3316..2daa03b6d 100644 --- a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp @@ -59,7 +59,7 @@ std::vector get_device_list(const execution_space space, [[maybe break; case execution_space::hpx: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX([&]() { - devices.emplace_back(Kokkos::Hpx{}); + devices.emplace_back(Kokkos::Experimental::HPX{}); }); break; case execution_space::openmp: From eb5cd36e5e67225993f1d662c53cb8ce729a4ecc Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Wed, 6 Nov 2024 17:43:33 +0100 Subject: [PATCH 055/123] Implement SYCL specific device selection (supporting potentially multi-GPU). --- .../backends/Kokkos/detail/device_wrapper.cpp | 41 ++++++++++++++++--- src/plssvm/backends/Kokkos/detail/utility.cpp | 2 +- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp index 2daa03b6d..e65031538 100644 --- a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp @@ -11,7 +11,8 @@ #include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_* #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/detail/logging_without_performance_tracking.hpp" // plssvm::detail::log_untracked -#include "plssvm/detail/utility.hpp" // plssvm::detail::unreachable +#include "plssvm/detail/string_utility.hpp" // plssvm::detail::as_lower_case +#include "plssvm/detail/utility.hpp" // plssvm::detail::contains #include "plssvm/target_platforms.hpp" // plssvm::target_platform #include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level @@ -51,11 +52,39 @@ std::vector get_device_list(const execution_space space, [[maybe }); break; case execution_space::sycl: - PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { - // TODO: use all available devices -> not that trivial - // TODO: handle target <- if provide queue -> managed? - devices.emplace_back(Kokkos::SYCL{}); - }); + PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL(([&]() { + // all user provided sycl::queues must be in-order queues + ::sycl::property_list props{ ::sycl::property::queue::in_order{} }; + static ::sycl::queue q; + + for (const auto &platform : ::sycl::platform::get_platforms()) { + for (const auto &device : platform.get_devices()) { + // Note: Kokkos is IntelLLVM/DPC++/icpx only + if (device.is_cpu() && target == target_platform::cpu) { + q = ::sycl::queue{ device, props }; + devices.emplace_back(Kokkos::SYCL{ q }); + } else if (device.is_gpu()) { + // the current device is a GPU + // get vendor string and convert it to all lower case + const std::string vendor_string = ::plssvm::detail::as_lower_case(device.get_info<::sycl::info::device::vendor>()); + // get platform name of current GPU device and convert it to all lower case + const std::string platform_string = ::plssvm::detail::as_lower_case(platform.get_info<::sycl::info::platform::name>()); + + // check vendor string and insert to correct target platform + if (::plssvm::detail::contains(vendor_string, "nvidia") && target == target_platform::gpu_nvidia) { + q = ::sycl::queue{ device, props }; + devices.emplace_back(Kokkos::SYCL{ q }); + } else if ((::plssvm::detail::contains(vendor_string, "amd") || ::plssvm::detail::contains(vendor_string, "advanced micro devices")) && target == target_platform::gpu_amd) { + q = ::sycl::queue{ device, props }; + devices.emplace_back(Kokkos::SYCL{ q }); + } else if (::plssvm::detail::contains(vendor_string, "intel") && target == target_platform::gpu_intel) { + q = ::sycl::queue{ device, props }; + devices.emplace_back(Kokkos::SYCL{ q }); + } + } + } + } + })); break; case execution_space::hpx: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HPX([&]() { diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp index bd07e4fab..b922b9a1d 100644 --- a/src/plssvm/backends/Kokkos/detail/utility.cpp +++ b/src/plssvm/backends/Kokkos/detail/utility.cpp @@ -60,7 +60,7 @@ std::map> available_target_platfor std::unordered_set targets{}; for (const auto &platform : ::sycl::platform::get_platforms()) { for (const auto &device : platform.get_devices()) { - // Note: Kokkos is Intel LLVM/DPC++/icpx only -> we can use the specific implementation defined enum values + // Note: Kokkos is Intel LLVM/DPC++/icpx only if (device.is_cpu()) { targets.insert(target_platform::cpu); } else if (device.is_gpu()) { From c88d156edc97a8f8780b50c935c6ba0aee4f6e81 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Wed, 6 Nov 2024 17:44:10 +0100 Subject: [PATCH 056/123] Use icpx specific compilation flags. --- src/plssvm/backends/Kokkos/CMakeLists.txt | 79 +++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/src/plssvm/backends/Kokkos/CMakeLists.txt b/src/plssvm/backends/Kokkos/CMakeLists.txt index d2bf6addf..271c32527 100644 --- a/src/plssvm/backends/Kokkos/CMakeLists.txt +++ b/src/plssvm/backends/Kokkos/CMakeLists.txt @@ -36,6 +36,85 @@ set_local_and_parent(PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME plssvm-Kokkos) add_library(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} SHARED ${PLSSVM_KOKKOS_SOURCES}) target_link_libraries(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC Kokkos::kokkos) +if (Kokkos_ENABLE_SYCL) + # set SYCL (icpx) specific compilation flags + if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "IntelLLVM") + message(FATAL_ERROR "For Kokkos::SYCL to work, the compiler must be IntelLLVM, but is ${CMAKE_CXX_COMPILER}!") + endif () + + # set icpx specific compiler flags based on the provided PLSSVM_TARGET_PLATFORMS + set(PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "") + # cpu targets + if (DEFINED PLSSVM_CPU_TARGET_ARCHS) + # assemble -fsycl-targets + list(APPEND PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "spir64_x86_64") + endif () + # nvidia targets + if (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS) + # assemble -fsycl-targets + list(APPEND PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "nvptx64-nvidia-cuda") + endif () + # amd targets + if (DEFINED PLSSVM_AMD_TARGET_ARCHS) + # assemble -fsycl-targets + list(APPEND PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "amdgcn-amd-amdhsa") + # add target specific flags for AOT -> must always be specified von amd targets + if (NOT PLSSVM_NUM_AMD_TARGET_ARCHS EQUAL 1) + message(SEND_ERROR "IntelLLVM currently only supports a single AMD architecture specification but ${PLSSVM_NUM_AMD_TARGET_ARCHS} were provided!") + endif () + target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${PLSSVM_AMD_TARGET_ARCHS}) + target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${PLSSVM_AMD_TARGET_ARCHS}) + endif () + # intel targets + if (DEFINED PLSSVM_INTEL_TARGET_ARCHS) + # assemble -fsycl-targets + list(APPEND PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "spir64_gen") + endif () + # set -fsycl-targets + list(JOIN PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "," PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS_STRING) + target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -sycl-std=2020 -fsycl -fsycl-targets=${PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS_STRING}) + target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -fsycl -fsycl-targets=${PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS_STRING}) +endif () + +# add option for IntelLLVM Ahead-of-Time (AOT) compilation +option(PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT "Enables Ahead-of-Time compilation for the Kokkos::SYCL execution space using IntelLLVM." ON) +if (PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT) + message(STATUS "Enabled Ahead-of-Time (AOT) compilation for the Kokkos::SYCL execution space using IntelLLVM.") + target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT) + target_compile_definitions(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT) + ## set AOT compiler flags + # cpu targets + if (DEFINED PLSSVM_CPU_TARGET_ARCHS) + # add target specific flags for AOT + if (PLSSVM_NUM_CPU_TARGET_ARCHS EQUAL 1) + target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_x86_64 "-march=${PLSSVM_CPU_TARGET_ARCHS}") + target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_x86_64 "-march=${PLSSVM_CPU_TARGET_ARCHS}") + endif () + endif () + # nvidia targets + if (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS) + # add target specific flags for AOT + if (NOT PLSSVM_NUM_NVIDIA_TARGET_ARCHS EQUAL 1) + message(SEND_ERROR "IntelLLVM currently only supports a single NVIDIA architecture specification for AOT but ${PLSSVM_NUM_NVIDIA_TARGET_ARCHS} were provided!") + endif () + target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${PLSSVM_NVIDIA_TARGET_ARCHS}) + target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${PLSSVM_NVIDIA_TARGET_ARCHS}) + endif () + # intel targets + if (DEFINED PLSSVM_INTEL_TARGET_ARCHS) + # add target specific flags for AOT + list(JOIN PLSSVM_INTEL_TARGET_ARCHS "," PLSSVM_INTEL_TARGET_ARCHS_STRING) + target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_gen "-device ${PLSSVM_INTEL_TARGET_ARCHS_STRING}") + target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_gen "-device ${PLSSVM_INTEL_TARGET_ARCHS_STRING}") + endif () +endif () + +if (Kokkos_ENABLE_HWLOC) + message(STATUS "Kokkos was built with hwloc support.") +else() + message(STATUS "Kokkos was NOT built with hwloc support.") +endif() + # link base library against Kokkos library target_link_libraries(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC ${PLSSVM_BASE_LIBRARY_NAME}) From ff1bb2442578a3d4cf1dbac2ce2656f789b71849 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Wed, 6 Nov 2024 17:59:42 +0100 Subject: [PATCH 057/123] Add return code checks. --- .../backends/Kokkos/detail/device_wrapper.cpp | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp index e65031538..f79eb396f 100644 --- a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp @@ -9,6 +9,7 @@ #include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" #include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_* +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/detail/logging_without_performance_tracking.hpp" // plssvm::detail::log_untracked #include "plssvm/detail/string_utility.hpp" // plssvm::detail::as_lower_case @@ -18,6 +19,20 @@ #include "Kokkos_Core.hpp" // Kokkos::num_devices, Kokkos::ExecutionSpace +#if defined(KOKKOS_ENABLE_CUDA) + #define PLSSVM_CUDA_ERROR_CHECK(err) \ + if ((err) != cudaSuccess) { \ + throw plssvm::kokkos::backend_exception{ fmt::format("Kokkos::Cuda assert '{}': {}", cudaGetErrorName(err), cudaGetErrorString(err)) }; \ + } +#endif + +#if defined(KOKKOS_ENABLE_HIP) + #define PLSSVM_HIP_ERROR_CHECK(err) \ + if ((err) != hipSuccess) { \ + throw plssvm::kokkos::backend_exception{ fmt::format("HIP assert '{}': {}", hipGetErrorName(err), hipGetErrorString(err)) }; \ + } +#endif + #include // std::vector namespace plssvm::kokkos::detail { @@ -29,9 +44,9 @@ std::vector get_device_list(const execution_space space, [[maybe PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { for (int device = 0; device < Kokkos::num_devices(); ++device) { // create CUDA stream using the CUDA specific functions - cudaSetDevice(device); + PLSSVM_CUDA_ERROR_CHECK(cudaSetDevice(device)); cudaStream_t stream{}; - cudaStreamCreate(&stream); + PLSSVM_CUDA_ERROR_CHECK(cudaStreamCreate(&stream)); // create Kokkos execution space for the specific device // Note: it is important to pass the cudaStream_t lifetime to be managed by Kokkos devices.emplace_back(Kokkos::Cuda(stream, Kokkos::Impl::ManageStream::yes)); @@ -42,9 +57,9 @@ std::vector get_device_list(const execution_space space, [[maybe PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { for (int device = 0; device < Kokkos::num_devices(); ++device) { // HIP CUDA stream using the HIP specific functions - hipSetDevice(device); + PLSSVM_HIP_ERROR_CHECK(hipSetDevice(device)); hipStream_t stream{}; - hipStreamCreate(&stream); + PLSSVM_HIP_ERROR_CHECK(hipStreamCreate(&stream)); // create Kokkos execution space for the specific device // Note: it is important to pass the hipStream_t lifetime to be managed by Kokkos devices.emplace_back(Kokkos::HIP(stream, Kokkos::Impl::ManageStream::yes)); From 0ab2f64319d735a36c61101ccbd9957ad73574e3 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Wed, 6 Nov 2024 18:00:13 +0100 Subject: [PATCH 058/123] Move CMake functionality inside correct if. --- src/plssvm/backends/Kokkos/CMakeLists.txt | 68 +++++++++++------------ 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/src/plssvm/backends/Kokkos/CMakeLists.txt b/src/plssvm/backends/Kokkos/CMakeLists.txt index 271c32527..b66927fa9 100644 --- a/src/plssvm/backends/Kokkos/CMakeLists.txt +++ b/src/plssvm/backends/Kokkos/CMakeLists.txt @@ -41,7 +41,7 @@ if (Kokkos_ENABLE_SYCL) if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "IntelLLVM") message(FATAL_ERROR "For Kokkos::SYCL to work, the compiler must be IntelLLVM, but is ${CMAKE_CXX_COMPILER}!") endif () - + # set icpx specific compiler flags based on the provided PLSSVM_TARGET_PLATFORMS set(PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "") # cpu targets @@ -74,47 +74,41 @@ if (Kokkos_ENABLE_SYCL) list(JOIN PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS "," PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS_STRING) target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -sycl-std=2020 -fsycl -fsycl-targets=${PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS_STRING}) target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -fsycl -fsycl-targets=${PLSSVM_KOKKOS_SYCL_FSYCL_TARGETS_STRING}) -endif () - -# add option for IntelLLVM Ahead-of-Time (AOT) compilation -option(PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT "Enables Ahead-of-Time compilation for the Kokkos::SYCL execution space using IntelLLVM." ON) -if (PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT) - message(STATUS "Enabled Ahead-of-Time (AOT) compilation for the Kokkos::SYCL execution space using IntelLLVM.") - target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT) - target_compile_definitions(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT) - ## set AOT compiler flags - # cpu targets - if (DEFINED PLSSVM_CPU_TARGET_ARCHS) - # add target specific flags for AOT - if (PLSSVM_NUM_CPU_TARGET_ARCHS EQUAL 1) - target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_x86_64 "-march=${PLSSVM_CPU_TARGET_ARCHS}") - target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_x86_64 "-march=${PLSSVM_CPU_TARGET_ARCHS}") + + # add option for IntelLLVM Ahead-of-Time (AOT) compilation + option(PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT "Enables Ahead-of-Time compilation for the Kokkos::SYCL execution space using IntelLLVM." ON) + if (PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT) + message(STATUS "Enabled Ahead-of-Time (AOT) compilation for the Kokkos::SYCL execution space using IntelLLVM.") + target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PRIVATE PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT) + target_compile_definitions(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT) + ## set AOT compiler flags + # cpu targets + if (DEFINED PLSSVM_CPU_TARGET_ARCHS) + # add target specific flags for AOT + if (PLSSVM_NUM_CPU_TARGET_ARCHS EQUAL 1) + target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_x86_64 "-march=${PLSSVM_CPU_TARGET_ARCHS}") + target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_x86_64 "-march=${PLSSVM_CPU_TARGET_ARCHS}") + endif () endif () - endif () - # nvidia targets - if (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS) - # add target specific flags for AOT - if (NOT PLSSVM_NUM_NVIDIA_TARGET_ARCHS EQUAL 1) - message(SEND_ERROR "IntelLLVM currently only supports a single NVIDIA architecture specification for AOT but ${PLSSVM_NUM_NVIDIA_TARGET_ARCHS} were provided!") + # nvidia targets + if (DEFINED PLSSVM_NVIDIA_TARGET_ARCHS) + # add target specific flags for AOT + if (NOT PLSSVM_NUM_NVIDIA_TARGET_ARCHS EQUAL 1) + message(SEND_ERROR "IntelLLVM currently only supports a single NVIDIA architecture specification for AOT but ${PLSSVM_NUM_NVIDIA_TARGET_ARCHS} were provided!") + endif () + target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${PLSSVM_NVIDIA_TARGET_ARCHS}) + target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${PLSSVM_NVIDIA_TARGET_ARCHS}) + endif () + # intel targets + if (DEFINED PLSSVM_INTEL_TARGET_ARCHS) + # add target specific flags for AOT + list(JOIN PLSSVM_INTEL_TARGET_ARCHS "," PLSSVM_INTEL_TARGET_ARCHS_STRING) + target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_gen "-device ${PLSSVM_INTEL_TARGET_ARCHS_STRING}") + target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_gen "-device ${PLSSVM_INTEL_TARGET_ARCHS_STRING}") endif () - target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${PLSSVM_NVIDIA_TARGET_ARCHS}) - target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=${PLSSVM_NVIDIA_TARGET_ARCHS}) - endif () - # intel targets - if (DEFINED PLSSVM_INTEL_TARGET_ARCHS) - # add target specific flags for AOT - list(JOIN PLSSVM_INTEL_TARGET_ARCHS "," PLSSVM_INTEL_TARGET_ARCHS_STRING) - target_compile_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_gen "-device ${PLSSVM_INTEL_TARGET_ARCHS_STRING}") - target_link_options(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PRIVATE -Xsycl-target-backend=spir64_gen "-device ${PLSSVM_INTEL_TARGET_ARCHS_STRING}") endif () endif () -if (Kokkos_ENABLE_HWLOC) - message(STATUS "Kokkos was built with hwloc support.") -else() - message(STATUS "Kokkos was NOT built with hwloc support.") -endif() - # link base library against Kokkos library target_link_libraries(${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME} PUBLIC ${PLSSVM_BASE_LIBRARY_NAME}) From 9b7736e635cf4ca7c7892b22b680fc7365923d72 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Wed, 6 Nov 2024 18:50:18 +0100 Subject: [PATCH 059/123] Remove old test implementation. --- .../backends/Kokkos/detail/device_wrapper.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp index f79eb396f..eb82ec0d4 100644 --- a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp @@ -70,14 +70,12 @@ std::vector get_device_list(const execution_space space, [[maybe PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL(([&]() { // all user provided sycl::queues must be in-order queues ::sycl::property_list props{ ::sycl::property::queue::in_order{} }; - static ::sycl::queue q; for (const auto &platform : ::sycl::platform::get_platforms()) { for (const auto &device : platform.get_devices()) { // Note: Kokkos is IntelLLVM/DPC++/icpx only if (device.is_cpu() && target == target_platform::cpu) { - q = ::sycl::queue{ device, props }; - devices.emplace_back(Kokkos::SYCL{ q }); + devices.emplace_back(Kokkos::SYCL{ ::sycl::queue{ device, props } }); } else if (device.is_gpu()) { // the current device is a GPU // get vendor string and convert it to all lower case @@ -87,14 +85,11 @@ std::vector get_device_list(const execution_space space, [[maybe // check vendor string and insert to correct target platform if (::plssvm::detail::contains(vendor_string, "nvidia") && target == target_platform::gpu_nvidia) { - q = ::sycl::queue{ device, props }; - devices.emplace_back(Kokkos::SYCL{ q }); + devices.emplace_back(Kokkos::SYCL{ ::sycl::queue{ device, props } }); } else if ((::plssvm::detail::contains(vendor_string, "amd") || ::plssvm::detail::contains(vendor_string, "advanced micro devices")) && target == target_platform::gpu_amd) { - q = ::sycl::queue{ device, props }; - devices.emplace_back(Kokkos::SYCL{ q }); + devices.emplace_back(Kokkos::SYCL{ ::sycl::queue{ device, props } }); } else if (::plssvm::detail::contains(vendor_string, "intel") && target == target_platform::gpu_intel) { - q = ::sycl::queue{ device, props }; - devices.emplace_back(Kokkos::SYCL{ q }); + devices.emplace_back(Kokkos::SYCL{ ::sycl::queue{ device, props } }); } } } From a1d13d9333a997eaea955360cc31fe40fbd6b40b Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 11:05:46 +0100 Subject: [PATCH 060/123] Fix TODO in documentation. --- include/plssvm/backends/Kokkos/detail/pinned_memory.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/plssvm/backends/Kokkos/detail/pinned_memory.hpp b/include/plssvm/backends/Kokkos/detail/pinned_memory.hpp index dffb0d1c7..cb328e6d3 100644 --- a/include/plssvm/backends/Kokkos/detail/pinned_memory.hpp +++ b/include/plssvm/backends/Kokkos/detail/pinned_memory.hpp @@ -6,7 +6,7 @@ * @license This file is part of the PLSSVM project which is released under the MIT license. * See the LICENSE.md file in the project root for full license information. * - * @brief Small wrapper around RAII enabled TODO. + * @brief Small wrapper around RAII for registering memory as pinned memory. */ #ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_PINNED_MEMORY_HPP_ From a0af87c328998c9a755adf3e483c31893d378f52 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 11:46:02 +0100 Subject: [PATCH 061/123] Correctly use the Kokkos::Experimental::OpenMPTarget namespace. --- .../backends/Kokkos/detail/conditional_execution.hpp | 2 +- include/plssvm/backends/Kokkos/execution_space.hpp | 8 ++++---- src/plssvm/backends/Kokkos/detail/device_wrapper.cpp | 2 +- tests/backends/Kokkos/execution_space.cpp | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp b/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp index 20fc118f6..f981fb2bd 100644 --- a/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp +++ b/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp @@ -140,7 +140,7 @@ namespace plssvm::kokkos::detail { #endif //***************************************************// -// Kokkos::OpenMPTarget // +// Kokkos::Experimental::OpenMPTarget // //***************************************************// /** diff --git a/include/plssvm/backends/Kokkos/execution_space.hpp b/include/plssvm/backends/Kokkos/execution_space.hpp index 07ecadf24..abf8b227d 100644 --- a/include/plssvm/backends/Kokkos/execution_space.hpp +++ b/include/plssvm/backends/Kokkos/execution_space.hpp @@ -126,11 +126,11 @@ struct execution_space_to_kokkos_type { #if defined(KOKKOS_ENABLE_OPENMPTARGET) /** - * @brief Convert an `execution_space::openmp_target` enum value to a `Kokkos::OpenMPTarget` Kokkos::ExecutionSpace type. + * @brief Convert an `execution_space::openmp_target` enum value to a `Kokkos::Experimental::OpenMPTarget` Kokkos::ExecutionSpace type. */ template <> struct execution_space_to_kokkos_type { - using type = Kokkos::OpenMPTarget; + using type = Kokkos::Experimental::OpenMPTarget; }; #endif @@ -233,10 +233,10 @@ struct kokkos_type_to_execution_space { #if defined(KOKKOS_ENABLE_OPENMPTARGET) /** - * @brief Convert a `Kokkos::OpenMPTarget` Kokkos::ExecutionSpace type to an `execution_space::openmp_target` enum value. + * @brief Convert a `Kokkos::Experimental::OpenMPTarget` Kokkos::ExecutionSpace type to an `execution_space::openmp_target` enum value. */ template <> -struct kokkos_type_to_execution_space { +struct kokkos_type_to_execution_space { constexpr static execution_space value = execution_space::openmp_target; }; #endif diff --git a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp index eb82ec0d4..a020454ef 100644 --- a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp @@ -117,7 +117,7 @@ std::vector get_device_list(const execution_space space, [[maybe case execution_space::openmp_target: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET([&]() { // TODO: multi-GPU? - devices.emplace_back(Kokkos::OpenMPTarget{}); + devices.emplace_back(Kokkos::Experimental::OpenMPTarget{}); }); break; case execution_space::openacc: diff --git a/tests/backends/Kokkos/execution_space.cpp b/tests/backends/Kokkos/execution_space.cpp index 2073d1fd4..24c3135de 100644 --- a/tests/backends/Kokkos/execution_space.cpp +++ b/tests/backends/Kokkos/execution_space.cpp @@ -86,7 +86,7 @@ TEST(KokkosExecutionSpace, execution_space_to_kokkos_type) { ::testing::StaticAssertTypeEq, Kokkos::OpenMP>(); #endif #if defined(KOKKOS_ENABLE_OPENMPTARGET) - ::testing::StaticAssertTypeEq, Kokkos::OpenMPTarget>(); + ::testing::StaticAssertTypeEq, Kokkos::Experimental::OpenMPTarget>(); #endif #if defined(KOKKOS_ENABLE_OPENACC) ::testing::StaticAssertTypeEq, Kokkos::OpenACC>(); @@ -117,7 +117,7 @@ TEST(KokkosExecutionSpace, kokkos_type_to_execution_space) { EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::openmp); #endif #if defined(KOKKOS_ENABLE_OPENMPTARGET) - EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::openmp_target); + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::openmp_target); #endif #if defined(KOKKOS_ENABLE_OPENACC) EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::openacc); From 39e447cdac14361f2d4d662906692ffc268df875 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 13:44:59 +0100 Subject: [PATCH 062/123] Add support for more Kokkos execution spaces. NOTE: OpenMPTarget and OpenACC are currently NOT supported. --- src/plssvm/backends/Kokkos/csvm.cpp | 164 +++++++++++------- .../backends/Kokkos/detail/device_wrapper.cpp | 4 +- .../backends/Kokkos/detail/pinned_memory.cpp | 2 - src/plssvm/backends/Kokkos/detail/utility.cpp | 3 - 4 files changed, 107 insertions(+), 66 deletions(-) diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index 806e908c0..28823fd83 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -12,9 +12,9 @@ #include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_*, PLSSVM_KOKKOS_BACKEND_INVOKE_IF_ #include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr #include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::{device_wrapper, get_device_list} -#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::kokkos::detail::get_runtime_version // TODO: docu +#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::kokkos::detail::{available_target_platform_to_execution_space_mapping, get_kokkos_version, dim_type_to_native, get_device_name, device_synchronize} #include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception -#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{execution_space, list_available_execution_spaces} #include "plssvm/backends/Kokkos/kernel/cg_explicit/blas.hpp" // plssvm::kokkos::detail::{device_kernel_symm, device_kernel_symm_mirror, device_kernel_inplace_matrix_add, device_kernel_inplace_matrix_scale} #include "plssvm/backends/Kokkos/kernel/cg_explicit/kernel_matrix_assembly.hpp" // plssvm::kokkos::detail::device_kernel_assembly #include "plssvm/backends/Kokkos/kernel/cg_implicit/kernel_matrix_assembly_blas.hpp" // plssvm::kokkos::detail::device_kernel_assembly_symm @@ -33,7 +33,8 @@ #include "plssvm/target_platforms.hpp" // plssvm::target_platform #include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level -#include "Kokkos_Core.hpp" // TODO: docu +#include "Kokkos_Core.hpp" // Kokkos::TeamPolicy, Kokkos::ParallelForTag, Kokkos::parallel_for, Kokkos::PerTeam + // Kokkos::Experimental::HPX::impl_max_hardware_threads, Kokkos::OpenMP::impl_max_hardware_threads, Kokkos::Threads::impl_max_hardware_threads #include "fmt/core.h" // fmt::format #include "fmt/format.h" // fmt::format @@ -42,11 +43,19 @@ #include // std::size_t #include // std::terminate #include // std::cout, std::endl +#include // std::numeric_limits::max #include // std::map #include // std::string #include // std::move #include // std::vector +// a dummy class used as functor to the team_size_max function +template +struct dummy { + KOKKOS_INLINE_FUNCTION + void operator()(const typename Kokkos::TeamPolicy::member_type &) const { } +}; + namespace plssvm::kokkos { csvm::csvm(parameter params) : @@ -109,6 +118,11 @@ void csvm::init(const target_platform target) { } } + // Kokkos::Experimental::OpenMPTarget and Kokkos::Experimental::OpenACC currently not supported! + if (space_ == execution_space::openmp_target || space_ == execution_space::openacc) { + throw backend_exception{ fmt::format("The Kokkos execution space {} is currently not supported!", space_) }; + } + plssvm::detail::log(verbosity_level::full, "\nUsing Kokkos ({}) as backend with the Kokkos::ExecutionSpace \"{}\".\n", plssvm::detail::tracking::tracking_entry{ "dependencies", "kokkos_version", detail::get_kokkos_version() }, @@ -163,129 +177,162 @@ csvm::~csvm() { } std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const { - // TODO: implement for other execution spaces - std::vector<::plssvm::detail::memory_size> res(this->num_available_devices()); + std::vector<::plssvm::detail::memory_size> device_memory(this->num_available_devices()); switch (space_) { case execution_space::cuda: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { - res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().cuda_device_prop().totalGlobalMem) }; + device_memory[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().cuda_device_prop().totalGlobalMem) }; } }); break; case execution_space::hip: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_HIP([&]() { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { - res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().hip_device_prop().totalGlobalMem) }; + device_memory[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().hip_device_prop().totalGlobalMem) }; } }); break; case execution_space::sycl: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { - res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().sycl_queue().get_device().get_info<::sycl::info::device::global_mem_size>()) }; + device_memory[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().sycl_queue().get_device().get_info<::sycl::info::device::global_mem_size>()) }; } }); break; - case execution_space::openmp: case execution_space::hpx: + case execution_space::openmp: case execution_space::threads: case execution_space::serial: - return std::vector<::plssvm::detail::memory_size>(this->num_available_devices(), ::plssvm::detail::get_system_memory()); + // NOTE: for these execution spaces, this->num_available_devices will always return 1 + PLSSVM_ASSERT(this->num_available_devices() == 1, "The host side Kokkos execution spaces should always only be represented using a single device!"); + device_memory[0] = ::plssvm::detail::get_system_memory(); + break; + // TODO: implement for Kokkos::Experimental::OpenMPTarget and Kokkos::Experimental::OpenACC case execution_space::openmp_target: case execution_space::openacc: throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; } - return res; + return device_memory; } std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const { - [[maybe_unused]] std::vector<::plssvm::detail::memory_size> res(this->num_available_devices()); - // TODO: implement for other execution spaces + std::vector<::plssvm::detail::memory_size> max_mem_alloc_size(this->num_available_devices()); switch (space_) { case execution_space::cuda: case execution_space::hip: - return this->get_device_memory(); + max_mem_alloc_size = this->get_device_memory(); + break; case execution_space::sycl: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_SYCL([&]() { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { - res[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().sycl_queue().get_device().get_info<::sycl::info::device::max_mem_alloc_size>()) }; + max_mem_alloc_size[device_id] = ::plssvm::detail::memory_size{ static_cast(devices_[device_id].get().sycl_queue().get_device().get_info<::sycl::info::device::max_mem_alloc_size>()) }; } }); break; - case execution_space::openmp: case execution_space::hpx: + case execution_space::openmp: case execution_space::threads: case execution_space::serial: - return this->get_device_memory(); + max_mem_alloc_size = this->get_device_memory(); + break; + // TODO: implement for Kokkos::Experimental::OpenMPTarget and Kokkos::Experimental::OpenACC case execution_space::openmp_target: case execution_space::openacc: throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; } - return res; + return max_mem_alloc_size; } std::size_t csvm::get_max_work_group_size(const std::size_t device_id) const { PLSSVM_ASSERT(device_id < this->num_available_devices(), "Invalid device {} requested!", device_id); - // TODO: implement for other execution spaces - switch (space_) { - case execution_space::cuda: - PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA([&]() { - return static_cast(devices_[device_id].get().cuda_device_prop().maxThreadsPerBlock); - }); - case execution_space::hip: - PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP([&]() { - return static_cast(devices_[device_id].get().hip_device_prop().maxThreadsPerBlock); - }); - case execution_space::sycl: - PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL([&]() { - return devices_[device_id].get().sycl_queue().get_device().get_info<::sycl::info::device::max_work_group_size>(); - }); - case execution_space::openmp: - return 16; // TODO: most likely dependent on the number of cores in Kokkos... - case execution_space::serial: - // only one thread allowed in serial execution - return 1; - case execution_space::openmp_target: - case execution_space::openacc: - case execution_space::hpx: - case execution_space::threads: - throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; - } - // all possible cases should be handled by the previous switch - // -> silence missing return statement compiler warnings due to throw statement - ::plssvm::detail::unreachable(); + // NOTE: the maximum theoretical work-group size, may be additionally limited by the amount of used scratch memory + return devices_[device_id].execute_and_return([](const auto &device) { + using kokkos_execution_space_type = ::plssvm::detail::remove_cvref_t; + // NOTE: CUDA + HIP + SYCL: returns the maximum possible number of threads, due to no further limitations in the dummy functor (like, e.g., scratch memory) + // NOTE: HPX + Serial: hardcoded to 1 + // NOTE: OpenMP: should be 1-2; most likely 1 + // NOTE: Threads: should be equal to number of hardware threads IF hwloc is enabled; otherwise 1 + // NOTE: OpenMPTarget: hardcoded to 256 + // NOTE: OpenACC: hardcoded to 512 + + // NOTE: the functor types doesn't matter -> the dummy class + return Kokkos::TeamPolicy{}.team_size_max(dummy{}, Kokkos::ParallelForTag{}); + }); } -::plssvm::detail::dim_type csvm::get_max_grid_size(const std::size_t device_id) const { +::plssvm::detail::dim_type csvm::get_max_grid_size([[maybe_unused]] const std::size_t device_id) const { PLSSVM_ASSERT(device_id < this->num_available_devices(), "Invalid device {} requested!", device_id); // NOTE: Kokkos only supports one-dimensional execution ranges! // NOTE: we only use two-dimensional kernels! - // TODO: implement for other execution spaces switch (space_) { case execution_space::cuda: PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA(([&]() -> ::plssvm::detail::dim_type { const cudaDeviceProp &prop = devices_[device_id].get().cuda_device_prop(); - const auto max_grid_size = static_cast(std::sqrt(prop.maxGridSize[0])); - return { max_grid_size, max_grid_size, std::size_t{ 1 } }; + const auto max_grid_size = static_cast(std::sqrt(prop.maxGridSize[0])); + return { max_grid_size, max_grid_size, 1ull }; })); case execution_space::hip: PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HIP(([&]() -> ::plssvm::detail::dim_type { - const hipDeviceProp &prop = devices_[device_id].get().hip_device_prop(); - const auto max_grid_size = static_cast(std::sqrt(prop.maxGridSize[0])); - return { max_grid_size, max_grid_size, std::size_t{ 1 } }; + const hipDeviceProp_t &prop = devices_[device_id].get().hip_device_prop(); + const auto max_grid_size = static_cast(std::sqrt(prop.maxGridSize[0])); + return { max_grid_size, max_grid_size, 1ull }; + })); + case execution_space::sycl: + PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SYCL(([&]() -> ::plssvm::detail::dim_type { + // TODO: replace with standardized function if there will be one in the future +#if defined(SYCL_EXT_ONEAPI_MAX_WORK_GROUP_QUERY) + const ::sycl::id<3> native_range = devices_[device_id].get().sycl_queue().get_device().get_info<::sycl::ext::oneapi::experimental::info::device::max_work_groups<3>>(); +#else + // fallback to maximum theoretical value, may break at runtime! + ::sycl::id<3> native_range{}; + const std::size_t max_int32 = std::numeric_limits::max(); + const std::size_t max_uint16 = std::numeric_limits::max(); + if (target_ == target_platform::cpu) { + native_range = ::sycl::id<3>{ max_int32, max_int32, max_int32 }; + } else { + native_range = ::sycl::id<3>{ max_int32, max_uint16, max_uint16 }; + } +#endif + // note: account for SYCL's different iteration range! + return { native_range[2], native_range[1], native_range[0] }; + })); + case execution_space::hpx: + PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_HPX(([&]() -> ::plssvm::detail::dim_type { + // get the total number of threads + const std::size_t num_threads = Kokkos::Experimental::HPX::impl_max_hardware_threads(); + // set the maximum league size to twice the number of available hardware threads + // NOTE: this is just an estimate and can or should be changed depending on the performance + const auto league_size = static_cast(std::ceil(std::sqrt(num_threads * 2))); + return { league_size, league_size, 1ull }; })); case execution_space::openmp: - return { 16, 16, 1 }; // TODO: correct values + PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_OPENMP(([&]() -> ::plssvm::detail::dim_type { + // get the total number of threads + const std::size_t num_threads = Kokkos::OpenMP::impl_max_hardware_threads(); + // set the maximum league size to twice the number of available hardware threads + // NOTE: this is just an estimate and can or should be changed depending on the performance + const auto league_size = static_cast(std::ceil(std::sqrt(num_threads * 2))); + return { league_size, league_size, 1ull }; + })); + case execution_space::threads: + PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_THREADS(([&]() -> ::plssvm::detail::dim_type { + // get the total number of threads + const std::size_t num_threads = Kokkos::Threads::impl_max_hardware_threads(); + // set the maximum league size to twice the number of available hardware threads + // NOTE: this is just an estimate and can or should be changed depending on the performance + const auto league_size = static_cast(std::ceil(std::sqrt(num_threads * 2))); + return { league_size, league_size, 1ull }; + })); case execution_space::serial: - return { 1, 1, 1 }; // TODO: correct values - case execution_space::sycl: + PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_SERIAL(([&]() -> ::plssvm::detail::dim_type { + return { std::numeric_limits::max(), std::numeric_limits::max(), 1ull }; + })); + // TODO: implement for Kokkos::Experimental::OpenMPTarget and Kokkos::Experimental::OpenACC case execution_space::openmp_target: case execution_space::openacc: - case execution_space::hpx: - case execution_space::threads: throw backend_exception{ fmt::format("Currently not implemented for the execution space: {}!", space_) }; } // all possible cases should be handled by the previous switch @@ -328,7 +375,6 @@ auto csvm::run_assemble_kernel_matrix_explicit(const std::size_t device_id, cons // create a Kokkos TeamPolicy Kokkos::TeamPolicy team_policy{ device, native_partial_grid, team_size }; - // TODO: test MDRangeTeamPolicy?! switch (params.kernel_type) { case kernel_function_type::linear: diff --git a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp index a020454ef..6d1c950dd 100644 --- a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp @@ -116,13 +116,13 @@ std::vector get_device_list(const execution_space space, [[maybe break; case execution_space::openmp_target: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENMPTARGET([&]() { - // TODO: multi-GPU? + // TODO: implement multi-GPU support? devices.emplace_back(Kokkos::Experimental::OpenMPTarget{}); }); break; case execution_space::openacc: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC([&]() { - // TODO: multi-GPU? + // TODO: implement multi-GPU support? devices.emplace_back(Kokkos::OpenACC{}); }); break; diff --git a/src/plssvm/backends/Kokkos/detail/pinned_memory.cpp b/src/plssvm/backends/Kokkos/detail/pinned_memory.cpp index dfae19661..919cbdaa1 100644 --- a/src/plssvm/backends/Kokkos/detail/pinned_memory.cpp +++ b/src/plssvm/backends/Kokkos/detail/pinned_memory.cpp @@ -40,8 +40,6 @@ pinned_memory::~pinned_memory() { } } -// TODO: check if implementable via Kokkos? - template class pinned_memory; template class pinned_memory; diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp index b922b9a1d..45392e509 100644 --- a/src/plssvm/backends/Kokkos/detail/utility.cpp +++ b/src/plssvm/backends/Kokkos/detail/utility.cpp @@ -36,7 +36,6 @@ int dim_type_to_native(const ::plssvm::detail::dim_type &dims) { std::map> available_target_platform_to_execution_space_mapping() { std::map> available_map{}; - // TODO: only return really POSSIBLE target platforms? // iterate over all available execution spaces for (const execution_space space : list_available_execution_spaces()) { switch (space) { @@ -139,10 +138,8 @@ std::string get_device_name([[maybe_unused]] const device_wrapper &dev) { case execution_space::openmp: return "OpenMP CPU host device"; case execution_space::openmp_target: - // TODO: device name? return "OpenMP target device"; case execution_space::openacc: - // TODO: device name? return "OpenACC target device"; case execution_space::threads: return "std::threads CPU host device"; From 63d38144d44651eaf7728ea8754d59a750d51970 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 13:54:27 +0100 Subject: [PATCH 063/123] Correctly use Kokkos::Experimental::OpenACC instead of Kokkos::OpenACC. --- include/plssvm/backends/Kokkos/detail/conditional_execution.hpp | 2 +- src/plssvm/backends/Kokkos/detail/device_wrapper.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp b/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp index f981fb2bd..559c9e75c 100644 --- a/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp +++ b/include/plssvm/backends/Kokkos/detail/conditional_execution.hpp @@ -163,7 +163,7 @@ namespace plssvm::kokkos::detail { #endif //***************************************************// -// Kokkos::OpenACC // +// Kokkos::Experimental::OpenACC // //***************************************************// /** diff --git a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp index 6d1c950dd..bfd79d9d2 100644 --- a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp @@ -123,7 +123,7 @@ std::vector get_device_list(const execution_space space, [[maybe case execution_space::openacc: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_OPENACC([&]() { // TODO: implement multi-GPU support? - devices.emplace_back(Kokkos::OpenACC{}); + devices.emplace_back(Kokkos::Experimental::OpenACC{}); }); break; case execution_space::threads: From b673f2834acac44a6469ed742e4ebba75d4c7718 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 14:18:25 +0100 Subject: [PATCH 064/123] Split execution_space header in multiple headers such that the Kokkos_Core.hpp header must not be included in execution_space.hpp. --- .../constexpr_available_execution_spaces.hpp | 65 +++++ .../Kokkos/detail/device_view_wrapper.hpp | 8 +- .../backends/Kokkos/detail/device_wrapper.hpp | 6 +- .../backends/Kokkos/execution_space.hpp | 264 ------------------ .../Kokkos/execution_space_type_traits.hpp | 238 ++++++++++++++++ .../backends/Kokkos/execution_space.cpp | 3 +- tests/backends/Kokkos/CMakeLists.txt | 2 + .../constexpr_available_execution_spaces.cpp | 18 ++ tests/backends/Kokkos/execution_space.cpp | 70 +---- .../Kokkos/execution_space_type_traits.cpp | 75 +++++ 10 files changed, 410 insertions(+), 339 deletions(-) create mode 100644 include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp create mode 100644 include/plssvm/backends/Kokkos/execution_space_type_traits.hpp create mode 100644 tests/backends/Kokkos/detail/constexpr_available_execution_spaces.cpp create mode 100644 tests/backends/Kokkos/execution_space_type_traits.cpp diff --git a/include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp b/include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp new file mode 100644 index 000000000..5d964f66a --- /dev/null +++ b/include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp @@ -0,0 +1,65 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Function to list all available execution spaces at compile time. + * @note Must be a separate file such that the Kokkos header must not be included in the "execution_space.hpp" file. + */ + +#ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_CONSTEXPR_AVAILABLE_EXECUTION_SPACES_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_DETAIL_CONSTEXPR_AVAILABLE_EXECUTION_SPACES_HPP_ + +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space + +#include "Kokkos_Core.hpp" // Kokkos macros, Kokkos ExecutionSpace types + +#include // std::array + +namespace plssvm::kokkos::detail { + +/** + * @brief List all available Kokkos::ExecutionSpaces at compile time. + * @details At least one execution space must **always** be available! + * @return a `std::array` containing all available execution spaces (`[[nodiscard]]`) + */ +[[nodiscard]] inline constexpr auto constexpr_available_execution_spaces() noexcept { + // Note: the trailing comma is explicitly allowed by the standard + // Note: the order is intentionally chosen this way -> the order of the entries determines the priority when using a backend to run our code + return std::array{ +#if defined(KOKKOS_ENABLE_CUDA) + execution_space::cuda, +#endif +#if defined(KOKKOS_ENABLE_HIP) + execution_space::hip, +#endif +#if defined(KOKKOS_ENABLE_SYCL) + execution_space::sycl, +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + execution_space::openmp_target, +#endif +#if defined(KOKKOS_ENABLE_OPENACC) + execution_space::openacc, +#endif +#if defined(KOKKOS_ENABLE_OPENMP) + execution_space::openmp, +#endif +#if defined(KOKKOS_ENABLE_THREADS) + execution_space::threads, +#endif +#if defined(KOKKOS_ENABLE_HPX) + execution_space::hpx, +#endif +#if defined(KOKKOS_ENABLE_SERIAL) + execution_space::serial, +#endif + }; +} + +} // namespace plssvm::kokkos::detail + +#endif // PLSSVM_BACKENDS_KOKKOS_DETAIL_CONSTEXPR_AVAILABLE_EXECUTION_SPACES_HPP_ diff --git a/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp b/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp index a3019829e..ea60bb1fd 100644 --- a/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp +++ b/include/plssvm/backends/Kokkos/detail/device_view_wrapper.hpp @@ -12,9 +12,11 @@ #ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_VIEW_WRAPPER_HPP_ #define PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_VIEW_WRAPPER_HPP_ -#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper -#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{execution_space, execution_space_to_kokkos_type_t}, plssvm::kokkos::detail::constexpr_available_execution_spaces -#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t +#include "plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp" // plssvm::kokkos::detail::constexpr_available_execution_spaces +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/backends/Kokkos/execution_space_type_traits.hpp" // plssvm::kokkos::execution_space_to_kokkos_type_t +#include "plssvm/detail/type_traits.hpp" // plssvm::detail::remove_cvref_t #include "Kokkos_Core.hpp" // Kokkos::View, Kokkos::ExecutionSpace diff --git a/include/plssvm/backends/Kokkos/detail/device_wrapper.hpp b/include/plssvm/backends/Kokkos/detail/device_wrapper.hpp index 30b2a91be..da0aaf755 100644 --- a/include/plssvm/backends/Kokkos/detail/device_wrapper.hpp +++ b/include/plssvm/backends/Kokkos/detail/device_wrapper.hpp @@ -12,8 +12,10 @@ #ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_WRAPPER_HPP_ #define PLSSVM_BACKENDS_KOKKOS_DETAIL_DEVICE_WRAPPER_HPP_ -#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{execution_space, execution_space_to_kokkos_type_t}, plssvm::kokkos::detail::constexpr_available_execution_spaces -#include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp" // plssvm::kokkos::detail::constexpr_available_execution_spaces +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/backends/Kokkos/execution_space_type_traits.hpp" // plssvm::kokkos::execution_space_to_kokkos_type_t +#include "plssvm/target_platforms.hpp" // plssvm::target_platform #include // std::array #include // std::size_t diff --git a/include/plssvm/backends/Kokkos/execution_space.hpp b/include/plssvm/backends/Kokkos/execution_space.hpp index abf8b227d..d77ae845b 100644 --- a/include/plssvm/backends/Kokkos/execution_space.hpp +++ b/include/plssvm/backends/Kokkos/execution_space.hpp @@ -13,12 +13,9 @@ #define PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_HPP_ #pragma once -#include "Kokkos_Core.hpp" // Kokkos macros, Kokkos ExecutionSpace types - #include "fmt/base.h" // fmt::formatter #include "fmt/ostream.h" // fmt::ostream_formatter -#include // std::array #include // std::ostream forward declaration #include // std::vector @@ -64,267 +61,6 @@ std::ostream &operator<<(std::ostream &out, execution_space space); */ std::istream &operator>>(std::istream &in, execution_space &space); -//***************************************************// -// execution_space_to_kokkos_type // -//***************************************************// - -/** - * @brief Uninstantiated base type to convert an `execution_space` enum value to a Kokkos::ExecutionSpace type. - */ -template -struct execution_space_to_kokkos_type; - -#if defined(KOKKOS_ENABLE_CUDA) -/** - * @brief Convert an `execution_space::cuda` enum value to a `Kokkos::Cuda` Kokkos::ExecutionSpace type. - */ -template <> -struct execution_space_to_kokkos_type { - using type = Kokkos::Cuda; -}; -#endif - -#if defined(KOKKOS_ENABLE_HIP) -/** - * @brief Convert an `execution_space::hip` enum value to a `Kokkos::HIP` Kokkos::ExecutionSpace type. - */ -template <> -struct execution_space_to_kokkos_type { - using type = Kokkos::HIP; -}; -#endif - -#if defined(KOKKOS_ENABLE_SYCL) -/** - * @brief Convert an `execution_space::sycl` enum value to a `Kokkos::SYCL` Kokkos::ExecutionSpace type. - */ -template <> -struct execution_space_to_kokkos_type { - using type = Kokkos::SYCL; -}; -#endif - -#if defined(KOKKOS_ENABLE_HPX) -/** - * @brief Convert an `execution_space::hpx` enum value to a `Kokkos::Experimental::HPX` Kokkos::ExecutionSpace type. - */ -template <> -struct execution_space_to_kokkos_type { - using type = Kokkos::Experimental::HPX; -}; -#endif - -#if defined(KOKKOS_ENABLE_OPENMP) -/** - * @brief Convert an `execution_space::openmp` enum value to a `Kokkos::OpenMP` Kokkos::ExecutionSpace type. - */ -template <> -struct execution_space_to_kokkos_type { - using type = Kokkos::OpenMP; -}; -#endif - -#if defined(KOKKOS_ENABLE_OPENMPTARGET) -/** - * @brief Convert an `execution_space::openmp_target` enum value to a `Kokkos::Experimental::OpenMPTarget` Kokkos::ExecutionSpace type. - */ -template <> -struct execution_space_to_kokkos_type { - using type = Kokkos::Experimental::OpenMPTarget; -}; -#endif - -#if defined(KOKKOS_ENABLE_OPENACC) -/** - * @brief Convert an `execution_space::openacc` enum value to a `Kokkos::Experimental::OpenACC` Kokkos::ExecutionSpace type. - */ -template <> -struct execution_space_to_kokkos_type { - using type = Kokkos::Experimental::OpenACC; -}; -#endif - -#if defined(KOKKOS_ENABLE_THREADS) -/** - * @brief Convert an `execution_space::threads` enum value to a `Kokkos::Threads` Kokkos::ExecutionSpace type. - */ -template <> -struct execution_space_to_kokkos_type { - using type = Kokkos::Threads; -}; -#endif - -#if defined(KOKKOS_ENABLE_SERIAL) -/** - * @brief Convert an `execution_space::serial` enum value to a `Kokkos::Serial` Kokkos::ExecutionSpace type. - */ -template <> -struct execution_space_to_kokkos_type { - using type = Kokkos::Serial; -}; -#endif - -/** - * @brief Convert the `execution_space` @p space to the corresponding Kokkos::ExecutionSpace type. - * @tparam space the enum value to convert - */ -template -using execution_space_to_kokkos_type_t = typename execution_space_to_kokkos_type::type; - -//***************************************************// -// kokkos_type_to_execution_space // -//***************************************************// - -/** - * @brief Uninstantiated base type to convert a Kokkos::ExecutionSpace type to a `execution_space` enum value. - */ -template -struct kokkos_type_to_execution_space; - -#if defined(KOKKOS_ENABLE_CUDA) -/** - * @brief Convert a `Kokkos::Cuda` Kokkos::ExecutionSpace type to an `execution_space::cuda` enum value. - */ -template <> -struct kokkos_type_to_execution_space { - constexpr static execution_space value = execution_space::cuda; -}; -#endif - -#if defined(KOKKOS_ENABLE_HIP) -/** - * @brief Convert a `Kokkos::HIP` Kokkos::ExecutionSpace type to an `execution_space::hip` enum value. - */ -template <> -struct kokkos_type_to_execution_space { - constexpr static execution_space value = execution_space::hip; -}; -#endif - -#if defined(KOKKOS_ENABLE_SYCL) -/** - * @brief Convert a `Kokkos::SYCL` Kokkos::ExecutionSpace type to an `execution_space::sycl` enum value. - */ -template <> -struct kokkos_type_to_execution_space { - constexpr static execution_space value = execution_space::sycl; -}; -#endif - -#if defined(KOKKOS_ENABLE_HPX) -/** - * @brief Convert a `Kokkos::Experimental::HPX` Kokkos::ExecutionSpace type to an `execution_space::hpx` enum value. - */ -template <> -struct kokkos_type_to_execution_space { - constexpr static execution_space value = execution_space::hpx; -}; -#endif - -#if defined(KOKKOS_ENABLE_OPENMP) -/** - * @brief Convert a `Kokkos::OpenMP` Kokkos::ExecutionSpace type to an `execution_space::openmp` enum value. - */ -template <> -struct kokkos_type_to_execution_space { - constexpr static execution_space value = execution_space::openmp; -}; -#endif - -#if defined(KOKKOS_ENABLE_OPENMPTARGET) -/** - * @brief Convert a `Kokkos::Experimental::OpenMPTarget` Kokkos::ExecutionSpace type to an `execution_space::openmp_target` enum value. - */ -template <> -struct kokkos_type_to_execution_space { - constexpr static execution_space value = execution_space::openmp_target; -}; -#endif - -#if defined(KOKKOS_ENABLE_OPENACC) -/** - * @brief Convert a `Kokkos::Experimental::OpenACC` Kokkos::ExecutionSpace type to an `execution_space::openacc` enum value. - */ -template <> -struct kokkos_type_to_execution_space { - constexpr static execution_space value = execution_space::openacc; -}; -#endif - -#if defined(KOKKOS_ENABLE_THREADS) -/** - * @brief Convert a `Kokkos::Threads` Kokkos::ExecutionSpace type to an `execution_space::threads` enum value. - */ -template <> -struct kokkos_type_to_execution_space { - constexpr static execution_space value = execution_space::threads; -}; -#endif - -#if defined(KOKKOS_ENABLE_SERIAL) -/** - * @brief Convert a `Kokkos::Serial` Kokkos::ExecutionSpace type to an `execution_space::serial` enum value. - */ -template <> -struct kokkos_type_to_execution_space { - constexpr static execution_space value = execution_space::serial; -}; -#endif - -/** - * @brief Convert the Kokkos::ExecutionSpace type @p ExecutionSpace to the corresponding `execution_space` enum value. - * @tparam ExecutionSpace the Kokkos::ExecutionSpace type to convert - */ -template -inline constexpr execution_space kokkos_type_to_execution_space_v = kokkos_type_to_execution_space::value; - -//***************************************************// -// other functions // -//***************************************************// - -namespace detail { - -/** - * @brief List all available Kokkos::ExecutionSpaces at compile time. - * @details At least one execution space must **always** be available! - * @return a `std::array` containing all available execution spaces (`[[nodiscard]]`) - */ -[[nodiscard]] inline constexpr auto constexpr_available_execution_spaces() noexcept { - // Note: the trailing comma is explicitly allowed by the standard - // Note: the order is intentionally chosen this way -> the order of the entries determines the priority when using a backend to run our code - return std::array{ -#if defined(KOKKOS_ENABLE_CUDA) - execution_space::cuda, -#endif -#if defined(KOKKOS_ENABLE_HIP) - execution_space::hip, -#endif -#if defined(KOKKOS_ENABLE_SYCL) - execution_space::sycl, -#endif -#if defined(KOKKOS_ENABLE_OPENMPTARGET) - execution_space::openmp_target, -#endif -#if defined(KOKKOS_ENABLE_OPENACC) - execution_space::openacc, -#endif -#if defined(KOKKOS_ENABLE_OPENMP) - execution_space::openmp, -#endif -#if defined(KOKKOS_ENABLE_THREADS) - execution_space::threads, -#endif -#if defined(KOKKOS_ENABLE_HPX) - execution_space::hpx, -#endif -#if defined(KOKKOS_ENABLE_SERIAL) - execution_space::serial, -#endif - }; -} - -} // namespace detail - /** * @brief List all available Kokkos::ExecutionSpaces. * @details Only Kokkos::ExecutionSpaces that where enabled during the CMake configuration are available. diff --git a/include/plssvm/backends/Kokkos/execution_space_type_traits.hpp b/include/plssvm/backends/Kokkos/execution_space_type_traits.hpp new file mode 100644 index 000000000..aa5e31751 --- /dev/null +++ b/include/plssvm/backends/Kokkos/execution_space_type_traits.hpp @@ -0,0 +1,238 @@ +/** + * @file + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Execution space type traits for the ExecutionSpaces in Kokkos. + */ + +#ifndef PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_TYPE_TRAITS_HPP_ +#define PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_TYPE_TRAITS_HPP_ +#pragma once + +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space + +#include "Kokkos_Core.hpp" // Kokkos macros, Kokkos ExecutionSpace types + +namespace plssvm::kokkos { + +//***************************************************// +// execution_space_to_kokkos_type // +//***************************************************// + +/** + * @brief Uninstantiated base type to convert an `execution_space` enum value to a Kokkos::ExecutionSpace type. + */ +template +struct execution_space_to_kokkos_type; + +#if defined(KOKKOS_ENABLE_CUDA) +/** + * @brief Convert an `execution_space::cuda` enum value to a `Kokkos::Cuda` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::Cuda; +}; +#endif + +#if defined(KOKKOS_ENABLE_HIP) +/** + * @brief Convert an `execution_space::hip` enum value to a `Kokkos::HIP` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::HIP; +}; +#endif + +#if defined(KOKKOS_ENABLE_SYCL) +/** + * @brief Convert an `execution_space::sycl` enum value to a `Kokkos::SYCL` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::SYCL; +}; +#endif + +#if defined(KOKKOS_ENABLE_HPX) +/** + * @brief Convert an `execution_space::hpx` enum value to a `Kokkos::Experimental::HPX` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::Experimental::HPX; +}; +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +/** + * @brief Convert an `execution_space::openmp` enum value to a `Kokkos::OpenMP` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::OpenMP; +}; +#endif + +#if defined(KOKKOS_ENABLE_OPENMPTARGET) +/** + * @brief Convert an `execution_space::openmp_target` enum value to a `Kokkos::Experimental::OpenMPTarget` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::Experimental::OpenMPTarget; +}; +#endif + +#if defined(KOKKOS_ENABLE_OPENACC) +/** + * @brief Convert an `execution_space::openacc` enum value to a `Kokkos::Experimental::OpenACC` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::Experimental::OpenACC; +}; +#endif + +#if defined(KOKKOS_ENABLE_THREADS) +/** + * @brief Convert an `execution_space::threads` enum value to a `Kokkos::Threads` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::Threads; +}; +#endif + +#if defined(KOKKOS_ENABLE_SERIAL) +/** + * @brief Convert an `execution_space::serial` enum value to a `Kokkos::Serial` Kokkos::ExecutionSpace type. + */ +template <> +struct execution_space_to_kokkos_type { + using type = Kokkos::Serial; +}; +#endif + +/** + * @brief Convert the `execution_space` @p space to the corresponding Kokkos::ExecutionSpace type. + * @tparam space the enum value to convert + */ +template +using execution_space_to_kokkos_type_t = typename execution_space_to_kokkos_type::type; + +//***************************************************// +// kokkos_type_to_execution_space // +//***************************************************// + +/** + * @brief Uninstantiated base type to convert a Kokkos::ExecutionSpace type to a `execution_space` enum value. + */ +template +struct kokkos_type_to_execution_space; + +#if defined(KOKKOS_ENABLE_CUDA) +/** + * @brief Convert a `Kokkos::Cuda` Kokkos::ExecutionSpace type to an `execution_space::cuda` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::cuda; +}; +#endif + +#if defined(KOKKOS_ENABLE_HIP) +/** + * @brief Convert a `Kokkos::HIP` Kokkos::ExecutionSpace type to an `execution_space::hip` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::hip; +}; +#endif + +#if defined(KOKKOS_ENABLE_SYCL) +/** + * @brief Convert a `Kokkos::SYCL` Kokkos::ExecutionSpace type to an `execution_space::sycl` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::sycl; +}; +#endif + +#if defined(KOKKOS_ENABLE_HPX) +/** + * @brief Convert a `Kokkos::Experimental::HPX` Kokkos::ExecutionSpace type to an `execution_space::hpx` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::hpx; +}; +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +/** + * @brief Convert a `Kokkos::OpenMP` Kokkos::ExecutionSpace type to an `execution_space::openmp` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::openmp; +}; +#endif + +#if defined(KOKKOS_ENABLE_OPENMPTARGET) +/** + * @brief Convert a `Kokkos::Experimental::OpenMPTarget` Kokkos::ExecutionSpace type to an `execution_space::openmp_target` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::openmp_target; +}; +#endif + +#if defined(KOKKOS_ENABLE_OPENACC) +/** + * @brief Convert a `Kokkos::Experimental::OpenACC` Kokkos::ExecutionSpace type to an `execution_space::openacc` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::openacc; +}; +#endif + +#if defined(KOKKOS_ENABLE_THREADS) +/** + * @brief Convert a `Kokkos::Threads` Kokkos::ExecutionSpace type to an `execution_space::threads` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::threads; +}; +#endif + +#if defined(KOKKOS_ENABLE_SERIAL) +/** + * @brief Convert a `Kokkos::Serial` Kokkos::ExecutionSpace type to an `execution_space::serial` enum value. + */ +template <> +struct kokkos_type_to_execution_space { + constexpr static execution_space value = execution_space::serial; +}; +#endif + +/** + * @brief Convert the Kokkos::ExecutionSpace type @p ExecutionSpace to the corresponding `execution_space` enum value. + * @tparam ExecutionSpace the Kokkos::ExecutionSpace type to convert + */ +template +inline constexpr execution_space kokkos_type_to_execution_space_v = kokkos_type_to_execution_space::value; + +} // namespace plssvm::kokkos + +#endif // PLSSVM_BACKENDS_KOKKOS_EXECUTION_SPACE_TYPE_TRAITS_HPP_ diff --git a/src/plssvm/backends/Kokkos/execution_space.cpp b/src/plssvm/backends/Kokkos/execution_space.cpp index 6179c496d..2e0c08a01 100644 --- a/src/plssvm/backends/Kokkos/execution_space.cpp +++ b/src/plssvm/backends/Kokkos/execution_space.cpp @@ -8,7 +8,8 @@ #include "plssvm/backends/Kokkos/execution_space.hpp" -#include "plssvm/detail/string_utility.hpp" // plssvm::detail::to_lower_case +#include "plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp" // plssvm::kokkos::detail::constexpr_available_execution_spaces +#include "plssvm/detail/string_utility.hpp" // plssvm::detail::to_lower_case #include // std::array #include // std::ios::failbit diff --git a/tests/backends/Kokkos/CMakeLists.txt b/tests/backends/Kokkos/CMakeLists.txt index 142f72a37..34ce3881f 100644 --- a/tests/backends/Kokkos/CMakeLists.txt +++ b/tests/backends/Kokkos/CMakeLists.txt @@ -9,6 +9,7 @@ set(PLSSVM_KOKKOS_TEST_NAME Kokkos_tests) # list all necessary sources set(PLSSVM_KOKKOS_TEST_SOURCES + ${CMAKE_CURRENT_LIST_DIR}/detail/constexpr_available_execution_spaces.cpp ${CMAKE_CURRENT_LIST_DIR}/detail/device_ptr.cpp ${CMAKE_CURRENT_LIST_DIR}/detail/device_view_wrapper.cpp ${CMAKE_CURRENT_LIST_DIR}/detail/device_wrapper.cpp @@ -18,6 +19,7 @@ set(PLSSVM_KOKKOS_TEST_SOURCES ${CMAKE_CURRENT_LIST_DIR}/kokkos_csvm.cpp ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp ${CMAKE_CURRENT_LIST_DIR}/execution_space.cpp + ${CMAKE_CURRENT_LIST_DIR}/execution_space_type_traits.cpp ) find_package(Kokkos REQUIRED) diff --git a/tests/backends/Kokkos/detail/constexpr_available_execution_spaces.cpp b/tests/backends/Kokkos/detail/constexpr_available_execution_spaces.cpp new file mode 100644 index 000000000..2e8f064e7 --- /dev/null +++ b/tests/backends/Kokkos/detail/constexpr_available_execution_spaces.cpp @@ -0,0 +1,18 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Tests for the Kokkos `constexpr_available_execution_spaces()` function. + */ + +#include "plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp" + +#include "gtest/gtest.h" // TEST, EXPECT_TRUE, EXPECT_FALSE + +TEST(KokkosConstexprAvailableExecutionSpaces, constexpr_available_execution_spaces) { + // at least one execution space must always be available + EXPECT_FALSE(plssvm::kokkos::detail::constexpr_available_execution_spaces().empty()); +} diff --git a/tests/backends/Kokkos/execution_space.cpp b/tests/backends/Kokkos/execution_space.cpp index 24c3135de..679ccb240 100644 --- a/tests/backends/Kokkos/execution_space.cpp +++ b/tests/backends/Kokkos/execution_space.cpp @@ -12,8 +12,7 @@ #include "tests/custom_test_macros.hpp" // EXPECT_CONVERSION_TO_STRING, EXPECT_CONVERSION_FROM_STRING -#include "gmock/gmock.h" // EXPECT_THAT; ::testing::AnyOf -#include "gtest/gtest.h" // TEST, EXPECT_TRUE +#include "gtest/gtest.h" // TEST, EXPECT_TRUE, EXPECT_FALSE #include // std::istringstream @@ -68,73 +67,6 @@ TEST(KokkosExecutionSpace, from_string_unknown) { EXPECT_TRUE(input.fail()); } -TEST(KokkosExecutionSpace, execution_space_to_kokkos_type) { - // check conversions -#if defined(KOKKOS_ENABLE_CUDA) - ::testing::StaticAssertTypeEq, Kokkos::Cuda>(); -#endif -#if defined(KOKKOS_ENABLE_HIP) - ::testing::StaticAssertTypeEq, Kokkos::HIP>(); -#endif -#if defined(KOKKOS_ENABLE_SYCL) - ::testing::StaticAssertTypeEq, Kokkos::SYCL>(); -#endif -#if defined(KOKKOS_ENABLE_HPX) - ::testing::StaticAssertTypeEq, Kokkos::Experimental::HPX>(); -#endif -#if defined(KOKKOS_ENABLE_OPENMP) - ::testing::StaticAssertTypeEq, Kokkos::OpenMP>(); -#endif -#if defined(KOKKOS_ENABLE_OPENMPTARGET) - ::testing::StaticAssertTypeEq, Kokkos::Experimental::OpenMPTarget>(); -#endif -#if defined(KOKKOS_ENABLE_OPENACC) - ::testing::StaticAssertTypeEq, Kokkos::OpenACC>(); -#endif -#if defined(KOKKOS_ENABLE_THREADS) - ::testing::StaticAssertTypeEq, Kokkos::Threads>(); -#endif -#if defined(KOKKOS_ENABLE_SERIAL) - ::testing::StaticAssertTypeEq, Kokkos::Serial>(); -#endif -} - -TEST(KokkosExecutionSpace, kokkos_type_to_execution_space) { - // check conversions -#if defined(KOKKOS_ENABLE_CUDA) - EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::cuda); -#endif -#if defined(KOKKOS_ENABLE_HIP) - EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::hip); -#endif -#if defined(KOKKOS_ENABLE_SYCL) - EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::sycl); -#endif -#if defined(KOKKOS_ENABLE_HPX) - EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::hpx); -#endif -#if defined(KOKKOS_ENABLE_OPENMP) - EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::openmp); -#endif -#if defined(KOKKOS_ENABLE_OPENMPTARGET) - EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::openmp_target); -#endif -#if defined(KOKKOS_ENABLE_OPENACC) - EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::openacc); -#endif -#if defined(KOKKOS_ENABLE_THREADS) - EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::threads); -#endif -#if defined(KOKKOS_ENABLE_SERIAL) - EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::serial); -#endif -} - -TEST(KokkosExecutionSpace, constexpr_available_execution_spaces) { - // at least one execution space must always be available - EXPECT_FALSE(plssvm::kokkos::detail::constexpr_available_execution_spaces().empty()); -} - TEST(KokkosExecutionSpace, list_available_execution_spaces) { // at least one execution space must always be available EXPECT_FALSE(plssvm::kokkos::list_available_execution_spaces().empty()); diff --git a/tests/backends/Kokkos/execution_space_type_traits.cpp b/tests/backends/Kokkos/execution_space_type_traits.cpp new file mode 100644 index 000000000..f813fa836 --- /dev/null +++ b/tests/backends/Kokkos/execution_space_type_traits.cpp @@ -0,0 +1,75 @@ +/** + * @author Alexander Van Craen + * @author Marcel Breyer + * @copyright 2018-today The PLSSVM project - All Rights Reserved + * @license This file is part of the PLSSVM project which is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Tests for functions related to the different Kokkos execution spaces. + */ + +#include "plssvm/backends/Kokkos/execution_space_type_traits.hpp" + +#include "gtest/gtest.h" // TEST, EXPECT_EQ, ::testing::StaticAssertTypeEq + +TEST(KokkosExecutionSpaceTypeTraits, execution_space_to_kokkos_type) { + // check conversions +#if defined(KOKKOS_ENABLE_CUDA) + ::testing::StaticAssertTypeEq, Kokkos::Cuda>(); +#endif +#if defined(KOKKOS_ENABLE_HIP) + ::testing::StaticAssertTypeEq, Kokkos::HIP>(); +#endif +#if defined(KOKKOS_ENABLE_SYCL) + ::testing::StaticAssertTypeEq, Kokkos::SYCL>(); +#endif +#if defined(KOKKOS_ENABLE_HPX) + ::testing::StaticAssertTypeEq, Kokkos::Experimental::HPX>(); +#endif +#if defined(KOKKOS_ENABLE_OPENMP) + ::testing::StaticAssertTypeEq, Kokkos::OpenMP>(); +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + ::testing::StaticAssertTypeEq, Kokkos::Experimental::OpenMPTarget>(); +#endif +#if defined(KOKKOS_ENABLE_OPENACC) + ::testing::StaticAssertTypeEq, Kokkos::Experimental::OpenACC>(); +#endif +#if defined(KOKKOS_ENABLE_THREADS) + ::testing::StaticAssertTypeEq, Kokkos::Threads>(); +#endif +#if defined(KOKKOS_ENABLE_SERIAL) + ::testing::StaticAssertTypeEq, Kokkos::Serial>(); +#endif +} + +TEST(KokkosExecutionSpaceTypeTraits, kokkos_type_to_execution_space) { + // check conversions +#if defined(KOKKOS_ENABLE_CUDA) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::cuda); +#endif +#if defined(KOKKOS_ENABLE_HIP) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::hip); +#endif +#if defined(KOKKOS_ENABLE_SYCL) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::sycl); +#endif +#if defined(KOKKOS_ENABLE_HPX) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::hpx); +#endif +#if defined(KOKKOS_ENABLE_OPENMP) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::openmp); +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::openmp_target); +#endif +#if defined(KOKKOS_ENABLE_OPENACC) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::openacc); +#endif +#if defined(KOKKOS_ENABLE_THREADS) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::threads); +#endif +#if defined(KOKKOS_ENABLE_SERIAL) + EXPECT_EQ(plssvm::kokkos::kokkos_type_to_execution_space_v, plssvm::kokkos::execution_space::serial); +#endif +} From 29feaeadd5034249447ca0940f4dfcdd2c28394f Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 14:43:07 +0100 Subject: [PATCH 065/123] Add new automatic execution space. --- .../constexpr_available_execution_spaces.hpp | 1 + include/plssvm/backends/Kokkos/execution_space.hpp | 2 ++ src/plssvm/backends/Kokkos/csvm.cpp | 12 ++++++++++++ .../backends/Kokkos/detail/device_wrapper.cpp | 9 +++++++-- src/plssvm/backends/Kokkos/detail/utility.cpp | 5 +++++ src/plssvm/backends/Kokkos/execution_space.cpp | 12 ++++++++++-- tests/backends/Kokkos/execution_space.cpp | 14 +++++++++++--- 7 files changed, 48 insertions(+), 7 deletions(-) diff --git a/include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp b/include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp index 5d964f66a..ea5dafb02 100644 --- a/include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp +++ b/include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp @@ -27,6 +27,7 @@ namespace plssvm::kokkos::detail { * @return a `std::array` containing all available execution spaces (`[[nodiscard]]`) */ [[nodiscard]] inline constexpr auto constexpr_available_execution_spaces() noexcept { + // Note: The execution_space::automatic value may NEVER be added here! // Note: the trailing comma is explicitly allowed by the standard // Note: the order is intentionally chosen this way -> the order of the entries determines the priority when using a backend to run our code return std::array{ diff --git a/include/plssvm/backends/Kokkos/execution_space.hpp b/include/plssvm/backends/Kokkos/execution_space.hpp index d77ae845b..cc9114412 100644 --- a/include/plssvm/backends/Kokkos/execution_space.hpp +++ b/include/plssvm/backends/Kokkos/execution_space.hpp @@ -25,6 +25,8 @@ namespace plssvm::kokkos { * @brief Enum class for all execution spaces supported by [Kokkos](https://github.com/kokkos/kokkos). */ enum class execution_space { + /** Automatically determine the used Kokkos execution space. Note: this does not necessarily correspond to Kokkos::DefaultExecutionSpace! */ + automatic, /** Execution space representing execution on a CUDA device. */ cuda, /** Execution space representing execution on a device supported by HIP. */ diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index 28823fd83..58e4e24fe 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -177,8 +177,12 @@ csvm::~csvm() { } std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const { + PLSSVM_ASSERT(space_ != execution_space::automatic, "The automatic execution_space may not be provided to this function!"); + std::vector<::plssvm::detail::memory_size> device_memory(this->num_available_devices()); switch (space_) { + case execution_space::automatic: + throw backend_exception{ "Unsupported execution_space::automatic provided!" }; case execution_space::cuda: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { for (std::size_t device_id = 0; device_id < this->num_available_devices(); ++device_id) { @@ -217,8 +221,12 @@ std::vector<::plssvm::detail::memory_size> csvm::get_device_memory() const { } std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const { + PLSSVM_ASSERT(space_ != execution_space::automatic, "The automatic execution_space may not be provided to this function!"); + std::vector<::plssvm::detail::memory_size> max_mem_alloc_size(this->num_available_devices()); switch (space_) { + case execution_space::automatic: + throw backend_exception{ "Unsupported execution_space::automatic provided!" }; case execution_space::cuda: case execution_space::hip: max_mem_alloc_size = this->get_device_memory(); @@ -246,6 +254,7 @@ std::vector<::plssvm::detail::memory_size> csvm::get_max_mem_alloc_size() const std::size_t csvm::get_max_work_group_size(const std::size_t device_id) const { PLSSVM_ASSERT(device_id < this->num_available_devices(), "Invalid device {} requested!", device_id); + PLSSVM_ASSERT(space_ != execution_space::automatic, "The automatic execution_space may not be provided to this function!"); // NOTE: the maximum theoretical work-group size, may be additionally limited by the amount of used scratch memory return devices_[device_id].execute_and_return([](const auto &device) { @@ -264,10 +273,13 @@ std::size_t csvm::get_max_work_group_size(const std::size_t device_id) const { ::plssvm::detail::dim_type csvm::get_max_grid_size([[maybe_unused]] const std::size_t device_id) const { PLSSVM_ASSERT(device_id < this->num_available_devices(), "Invalid device {} requested!", device_id); + PLSSVM_ASSERT(space_ != execution_space::automatic, "The automatic execution_space may not be provided to this function!"); // NOTE: Kokkos only supports one-dimensional execution ranges! // NOTE: we only use two-dimensional kernels! switch (space_) { + case execution_space::automatic: + throw backend_exception{ "Unsupported execution_space::automatic provided!" }; case execution_space::cuda: PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA(([&]() -> ::plssvm::detail::dim_type { const cudaDeviceProp &prop = devices_[device_id].get().cuda_device_prop(); diff --git a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp index bfd79d9d2..35dd6c2e9 100644 --- a/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp +++ b/src/plssvm/backends/Kokkos/detail/device_wrapper.cpp @@ -11,6 +11,7 @@ #include "plssvm/backends/Kokkos/detail/conditional_execution.hpp" // PLSSVM_KOKKOS_BACKEND_INVOKE_IF_* #include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT #include "plssvm/detail/logging_without_performance_tracking.hpp" // plssvm::detail::log_untracked #include "plssvm/detail/string_utility.hpp" // plssvm::detail::as_lower_case #include "plssvm/detail/utility.hpp" // plssvm::detail::contains @@ -19,6 +20,8 @@ #include "Kokkos_Core.hpp" // Kokkos::num_devices, Kokkos::ExecutionSpace +#include // std::vector + #if defined(KOKKOS_ENABLE_CUDA) #define PLSSVM_CUDA_ERROR_CHECK(err) \ if ((err) != cudaSuccess) { \ @@ -33,13 +36,15 @@ } #endif -#include // std::vector - namespace plssvm::kokkos::detail { std::vector get_device_list(const execution_space space, [[maybe_unused]] const target_platform target) { + PLSSVM_ASSERT(space != execution_space::automatic, "The automatic execution_space may not be provided to this function!"); + std::vector devices{}; switch (space) { + case execution_space::automatic: + throw backend_exception{ "Unsupported execution_space::automatic provided!" }; case execution_space::cuda: PLSSVM_KOKKOS_BACKEND_INVOKE_IF_CUDA([&]() { for (int device = 0; device < Kokkos::num_devices(); ++device) { diff --git a/src/plssvm/backends/Kokkos/detail/utility.cpp b/src/plssvm/backends/Kokkos/detail/utility.cpp index 45392e509..5dc3f8cda 100644 --- a/src/plssvm/backends/Kokkos/detail/utility.cpp +++ b/src/plssvm/backends/Kokkos/detail/utility.cpp @@ -39,6 +39,9 @@ std::map> available_target_platfor // iterate over all available execution spaces for (const execution_space space : list_available_execution_spaces()) { switch (space) { + case execution_space::automatic: + // nothing to do here + break; case execution_space::cuda: // NVIDIA GPUs only available_map[target_platform::gpu_nvidia].push_back(execution_space::cuda); @@ -121,6 +124,8 @@ std::map> available_target_platfor std::string get_device_name([[maybe_unused]] const device_wrapper &dev) { switch (dev.get_execution_space()) { + case execution_space::automatic: + throw backend_exception{ "Unsupported execution_space::automatic provided!" }; case execution_space::cuda: PLSSVM_KOKKOS_BACKEND_INVOKE_RETURN_IF_CUDA([&]() { return std::string{ dev.get().cuda_device_prop().name }; diff --git a/src/plssvm/backends/Kokkos/execution_space.cpp b/src/plssvm/backends/Kokkos/execution_space.cpp index 2e0c08a01..0caae212f 100644 --- a/src/plssvm/backends/Kokkos/execution_space.cpp +++ b/src/plssvm/backends/Kokkos/execution_space.cpp @@ -22,6 +22,8 @@ namespace plssvm::kokkos { std::ostream &operator<<(std::ostream &out, const execution_space space) { switch (space) { + case execution_space::automatic: + return out << "automatic"; case execution_space::cuda: return out << "Cuda"; case execution_space::hip: @@ -49,7 +51,9 @@ std::istream &operator>>(std::istream &in, execution_space &space) { in >> str; ::plssvm::detail::to_lower_case(str); - if (str == "cuda") { + if (str == "automatic" || str == "auto") { + space = execution_space::automatic; + } else if (str == "cuda") { space = execution_space::cuda; } else if (str == "hip") { space = execution_space::hip; @@ -74,8 +78,12 @@ std::istream &operator>>(std::istream &in, execution_space &space) { } std::vector list_available_execution_spaces() { + // always add the automatic execution space + std::vector spaces{ execution_space::automatic }; + // add all other available execution spaces constexpr auto arr = detail::constexpr_available_execution_spaces(); - return std::vector(arr.cbegin(), arr.cend()); + spaces.insert(spaces.cend(), arr.begin(), arr.end()); + return spaces; } } // namespace plssvm::kokkos diff --git a/tests/backends/Kokkos/execution_space.cpp b/tests/backends/Kokkos/execution_space.cpp index 679ccb240..3e54f3be5 100644 --- a/tests/backends/Kokkos/execution_space.cpp +++ b/tests/backends/Kokkos/execution_space.cpp @@ -19,6 +19,7 @@ // check whether the plssvm::kokkos::execution_space -> std::string conversions are correct TEST(KokkosExecutionSpace, to_string) { // check conversions to std::string + EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::automatic, "automatic"); EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::cuda, "Cuda"); EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::hip, "HIP"); EXPECT_CONVERSION_TO_STRING(plssvm::kokkos::execution_space::sycl, "SYCL"); @@ -32,12 +33,14 @@ TEST(KokkosExecutionSpace, to_string) { TEST(KokkosExecutionSpace, to_string_unknown) { // check conversions to std::string from unknown execution_space - EXPECT_CONVERSION_TO_STRING(static_cast(9), "unknown"); + EXPECT_CONVERSION_TO_STRING(static_cast(10), "unknown"); } // check whether the std::string -> plssvm::kokkos::execution_space conversions are correct TEST(KokkosExecutionSpace, from_string) { // check conversion from std::string + EXPECT_CONVERSION_FROM_STRING("Automatic", plssvm::kokkos::execution_space::automatic); + EXPECT_CONVERSION_FROM_STRING("AUTO", plssvm::kokkos::execution_space::automatic); EXPECT_CONVERSION_FROM_STRING("Cuda", plssvm::kokkos::execution_space::cuda); EXPECT_CONVERSION_FROM_STRING("CUDA", plssvm::kokkos::execution_space::cuda); EXPECT_CONVERSION_FROM_STRING("Hip", plssvm::kokkos::execution_space::hip); @@ -68,6 +71,11 @@ TEST(KokkosExecutionSpace, from_string_unknown) { } TEST(KokkosExecutionSpace, list_available_execution_spaces) { - // at least one execution space must always be available - EXPECT_FALSE(plssvm::kokkos::list_available_execution_spaces().empty()); + const std::vector execution_spaces = plssvm::kokkos::list_available_execution_spaces(); + + // at least one must be available (automatic)! + EXPECT_GE(execution_spaces.size(), 1); + + // the automatic execution space must always be present + EXPECT_THAT(execution_spaces, ::testing::Contains(plssvm::kokkos::execution_space::automatic)); } From d7f54232467708de53638b63114857fe24a4350c Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 16:22:15 +0100 Subject: [PATCH 066/123] Add the possibility to explicitly specify the Kokkos execution space on the command line and using a named parameter. --- include/plssvm/backends/Kokkos/csvm.hpp | 15 ++++- include/plssvm/detail/cmd/parser_predict.hpp | 4 ++ include/plssvm/detail/cmd/parser_train.hpp | 4 ++ include/plssvm/parameter.hpp | 11 +++- src/main_predict.cpp | 16 +++-- src/main_train.cpp | 14 +++-- src/plssvm/backends/Kokkos/csvm.cpp | 64 +++++++++++++++----- src/plssvm/detail/cmd/parser_predict.cpp | 52 +++++++++++----- src/plssvm/detail/cmd/parser_train.cpp | 62 +++++++++++++------ 9 files changed, 182 insertions(+), 60 deletions(-) diff --git a/include/plssvm/backends/Kokkos/csvm.hpp b/include/plssvm/backends/Kokkos/csvm.hpp index d8dcfaab8..2ff662933 100644 --- a/include/plssvm/backends/Kokkos/csvm.hpp +++ b/include/plssvm/backends/Kokkos/csvm.hpp @@ -21,11 +21,14 @@ #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/constants.hpp" // plssvm::real_type #include "plssvm/csvm.hpp" // plssvm::detail::csvm_backend_exists +#include "plssvm/detail/igor_utility.hpp" // plssvm::detail::get_value_from_named_parameter #include "plssvm/detail/memory_size.hpp" // plssvm::detail::memory_size #include "plssvm/detail/type_traits.hpp" // PLSSVM_REQUIRES #include "plssvm/parameter.hpp" // plssvm::parameter, plssvm::detail::parameter #include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "igor/igor.hpp" // igor::parser + #include // std::size_t #include // std::true_type #include // std::forward @@ -77,7 +80,7 @@ class csvm : public ::plssvm::detail::gpu_csvm)> + template )> explicit csvm(Args &&...named_args) : csvm{ plssvm::target_platform::automatic, std::forward(named_args)... } { } @@ -89,9 +92,17 @@ class csvm : public ::plssvm::detail::gpu_csvm)> + template )> explicit csvm(const target_platform target, Args &&...named_args) : base_type{ std::forward(named_args)... } { + // check igor parameter + igor::parser parser{ std::forward(named_args)... }; + + // check whether a specific Kokkos execution space has been requested + if constexpr (parser.has(kokkos_execution_space)) { + // compile time check: the value must have the correct type + space_ = ::plssvm::detail::get_value_from_named_parameter(parser, kokkos_execution_space); + } this->init(target); } diff --git a/include/plssvm/detail/cmd/parser_predict.hpp b/include/plssvm/detail/cmd/parser_predict.hpp index 1fc364c7e..4da63c508 100644 --- a/include/plssvm/detail/cmd/parser_predict.hpp +++ b/include/plssvm/detail/cmd/parser_predict.hpp @@ -14,6 +14,7 @@ #pragma once #include "plssvm/backend_types.hpp" // plssvm::backend_type +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/backends/SYCL/implementation_types.hpp" // plssvm::sycl::implementation_type #include "plssvm/target_platforms.hpp" // plssvm::target_platform @@ -45,6 +46,9 @@ struct parser_predict { /// The SYCL implementation to use with `--backend sycl`. sycl::implementation_type sycl_implementation_type{ sycl::implementation_type::automatic }; + /// The Kokkos execution space to use with --backend=kokkos. + kokkos::execution_space kokkos_execution_space{ kokkos::execution_space::automatic }; + /// `true` if `std::string` should be used as label type instead of the default type `ìnt`. bool strings_as_labels{ false }; diff --git a/include/plssvm/detail/cmd/parser_train.hpp b/include/plssvm/detail/cmd/parser_train.hpp index c448a1300..1fea29e57 100644 --- a/include/plssvm/detail/cmd/parser_train.hpp +++ b/include/plssvm/detail/cmd/parser_train.hpp @@ -14,6 +14,7 @@ #pragma once #include "plssvm/backend_types.hpp" // plssvm::backend_type +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/backends/SYCL/implementation_types.hpp" // plssvm::sycl::implementation_type #include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type #include "plssvm/classification_types.hpp" // plssvm::classification_type @@ -65,6 +66,9 @@ struct parser_train { /// The SYCL implementation to use with --backend=sycl. sycl::implementation_type sycl_implementation_type{ sycl::implementation_type::automatic }; + /// The Kokkos execution space to use with --backend=kokkos. + kokkos::execution_space kokkos_execution_space{ kokkos::execution_space::automatic }; + /// `true` if `std::string` should be used as label type instead of the default type `ìnt`. bool strings_as_labels{ false }; diff --git a/include/plssvm/parameter.hpp b/include/plssvm/parameter.hpp index 4e51b90d7..1f229e98a 100644 --- a/include/plssvm/parameter.hpp +++ b/include/plssvm/parameter.hpp @@ -56,6 +56,8 @@ IGOR_MAKE_NAMED_ARGUMENT(classification); IGOR_MAKE_NAMED_ARGUMENT(sycl_implementation_type); /// Create a named argument for the SYCL backend specific kernel invocation type. IGOR_MAKE_NAMED_ARGUMENT(sycl_kernel_invocation_type); +/// Create a named argument for the Kokkos backend specific execution space. +IGOR_MAKE_NAMED_ARGUMENT(kokkos_execution_space); /// @endcond @@ -73,6 +75,13 @@ constexpr bool has_only_parameter_named_args_v = !igor::has_other_than( template constexpr bool has_only_sycl_parameter_named_args_v = !igor::has_other_than(plssvm::kernel_type, plssvm::gamma, plssvm::degree, plssvm::coef0, plssvm::cost, plssvm::sycl_implementation_type, plssvm::sycl_kernel_invocation_type); +/** + * @brief Trait to check whether @p Args only contains named-parameter that can be used to initialize a `plssvm::parameter` struct including Kokkos specific named-parameters. + */ +template +constexpr bool has_only_kokkos_parameter_named_args_v = !igor::has_other_than(plssvm::kernel_type, plssvm::gamma, plssvm::degree, plssvm::coef0, plssvm::cost, plssvm::kokkos_execution_space); + + } // namespace detail /** @@ -185,7 +194,7 @@ struct parameter { // compile time check: each named parameter must only be passed once static_assert(!parser.has_duplicates(), "Can only use each named parameter once!"); // compile time check: only some named parameters are allowed - static_assert(!parser.has_other_than(plssvm::kernel_type, plssvm::gamma, plssvm::degree, plssvm::coef0, plssvm::cost, plssvm::sycl_implementation_type, plssvm::sycl_kernel_invocation_type), + static_assert(!parser.has_other_than(plssvm::kernel_type, plssvm::gamma, plssvm::degree, plssvm::coef0, plssvm::cost, plssvm::sycl_implementation_type, plssvm::sycl_kernel_invocation_type, plssvm::kokkos_execution_space), "An illegal named parameter has been passed!"); // shorthand function for emitting a warning if a provided parameter is not used by the current kernel function diff --git a/src/main_predict.cpp b/src/main_predict.cpp index bc83ffcfa..58015d928 100644 --- a/src/main_predict.cpp +++ b/src/main_predict.cpp @@ -37,8 +37,8 @@ #include // std::ofstream #include // std::mem_fn #include // std::cerr, std::endl +#include // std::unique_ptr, std::make_unique #include // std::pair -#include // std::unique_ptr, std::make_unique #include // std::visit #include // std::vector @@ -82,11 +82,10 @@ int main(int argc, char *argv[]) { // check whether SYCL is used as backend (it is either requested directly or as automatic backend) const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) }; - -#if defined(PLSSVM_HAS_KOKKOS_BACKEND) // check whether Kokkos is used as backend (it is either requested directly or as automatic backend) const bool use_kokkos_as_backend{ cmd_parser.backend == plssvm::backend_type::kokkos || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::kokkos) }; +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) // initialize Kokkos if necessary if (use_kokkos_as_backend) { kokkos_guard = std::make_unique(argc, argv); @@ -95,8 +94,15 @@ int main(int argc, char *argv[]) { #endif // create default csvm - const std::unique_ptr svm = use_sycl_as_backend ? plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type) - : plssvm::make_csvm(cmd_parser.backend, cmd_parser.target); + const std::unique_ptr svm = [&]() { + if (use_sycl_as_backend) { + return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type); + } else if (use_kokkos_as_backend) { + return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, plssvm::kokkos_execution_space = cmd_parser.kokkos_execution_space); + } else { + return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target); + } + }(); // create model const plssvm::model model{ cmd_parser.model_filename }; diff --git a/src/main_train.cpp b/src/main_train.cpp index 14cf8941b..93cb2abe8 100644 --- a/src/main_train.cpp +++ b/src/main_train.cpp @@ -78,11 +78,10 @@ int main(int argc, char *argv[]) { // check whether SYCL is used as backend (it is either requested directly or as automatic backend) const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) }; - -#if defined(PLSSVM_HAS_KOKKOS_BACKEND) // check whether Kokkos is used as backend (it is either requested directly or as automatic backend) const bool use_kokkos_as_backend{ cmd_parser.backend == plssvm::backend_type::kokkos || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::kokkos) }; +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) // initialize Kokkos if necessary if (use_kokkos_as_backend) { kokkos_guard = std::make_unique(argc, argv); @@ -91,8 +90,15 @@ int main(int argc, char *argv[]) { #endif // create SVM - const std::unique_ptr svm = use_sycl_as_backend ? plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type, plssvm::sycl_kernel_invocation_type = cmd_parser.sycl_kernel_invocation_type) - : plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params); + const std::unique_ptr svm = [&]() { + if (use_sycl_as_backend) { + return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type, plssvm::sycl_kernel_invocation_type = cmd_parser.sycl_kernel_invocation_type); + } else if (use_kokkos_as_backend) { + return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::kokkos_execution_space = cmd_parser.kokkos_execution_space); + } else { + return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params); + } + }(); // only specify plssvm::max_iter if it isn't its default value const plssvm::model model = diff --git a/src/plssvm/backends/Kokkos/csvm.cpp b/src/plssvm/backends/Kokkos/csvm.cpp index 58e4e24fe..2bcefeeff 100644 --- a/src/plssvm/backends/Kokkos/csvm.cpp +++ b/src/plssvm/backends/Kokkos/csvm.cpp @@ -93,38 +93,72 @@ void csvm::init(const target_platform target) { break; } + // check whether the requested execution space is available + if (!::plssvm::detail::contains(list_available_execution_spaces(), space_)) { + throw backend_exception{ fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space_, fmt::join(list_available_execution_spaces(), ", ")) }; + } + // get all available target_platform <-> Kokkos::ExecutionSpace combinations const std::map> available_combinations = detail::available_target_platform_to_execution_space_mapping(); - if (target == target_platform::automatic) { - // go through all combinations and choose the first execution space in order: gpu_nvidia -> gpu_amd -> gpu_intel -> cpu - for (const target_platform target_order : { target_platform::gpu_nvidia, target_platform::gpu_amd, target_platform::gpu_intel, target_platform::cpu }) { - if (::plssvm::detail::contains(available_combinations, target_order)) { + // check whether the provided execution space is the automatic one + if (space_ == execution_space::automatic) { + // automatically determine the execution space and potentially automatically determine the target platform + if (target == target_platform::automatic) { + // go through all combinations and choose the first execution space in order: gpu_nvidia -> gpu_amd -> gpu_intel -> cpu + for (const target_platform target_order : list_available_target_platforms()) { + if (::plssvm::detail::contains(available_combinations, target_order)) { + // the target platform is supported -> choose the first execution space to use in the Kokkos backend + space_ = available_combinations.at(target_order).front(); + target_ = target_order; + break; + } + } + } else { + // check whether the provided target platform is compatible with the currently available Kokkos::ExecutionSpaces + if (::plssvm::detail::contains(available_combinations, target)) { // the target platform is supported -> choose the first execution space to use in the Kokkos backend - space_ = available_combinations.at(target_order).front(); - target_ = target_order; - break; + space_ = available_combinations.at(target).front(); + target_ = target; + } else { + // the provided target platform is unsupported -> throw an exception + throw backend_exception{ fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform {}!", fmt::join(list_available_execution_spaces(), ", "), target) }; } } + + // output what we use as automatic Kokkos execution space + plssvm::detail::log(verbosity_level::full, + "\nUsing {} as automatic Kokkos::ExecutionSpace.", + space_); } else { - // check whether the provided target platform is compatible with the currently available Kokkos::ExecutionSpaces - if (::plssvm::detail::contains(available_combinations, target)) { - // the target platform is supported -> choose the first execution space to use in the Kokkos backend - space_ = available_combinations.at(target).front(); - target_ = target; + // execution space explicitly provided and potentially automatically determine the target platform + if (target == target_platform::automatic) { + // go through all combinations (gpu_nvidia -> gpu_amd -> gpu_intel -> cpu) and check whether the requested execution space supports that target platform + for (const target_platform target_order : list_available_target_platforms()) { + if (::plssvm::detail::contains(available_combinations, target_order) && ::plssvm::detail::contains(available_combinations.at(target_order), space_)) { + // the provided execution space supports the target platform + target_ = target_order; + break; + } + } } else { - // the provided target platform is unsupported -> throw an exception - throw backend_exception{ fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform {}!", fmt::join(list_available_execution_spaces(), ", "), target) }; + if (!::plssvm::detail::contains(available_combinations, target) || !::plssvm::detail::contains(available_combinations.at(target), space_)) { + // the provided execution space and target platform combination is unsupported + throw backend_exception{ fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform {}!", space_, target) }; + } } } + // At this point, space_ may NEVER be execution_space::automatic! + PLSSVM_ASSERT(space_ != execution_space::automatic, "At this point, the Kokkos execution space must be determined and must NOT be automatic!"); + // Kokkos::Experimental::OpenMPTarget and Kokkos::Experimental::OpenACC currently not supported! if (space_ == execution_space::openmp_target || space_ == execution_space::openacc) { throw backend_exception{ fmt::format("The Kokkos execution space {} is currently not supported!", space_) }; } plssvm::detail::log(verbosity_level::full, - "\nUsing Kokkos ({}) as backend with the Kokkos::ExecutionSpace \"{}\".\n", + "\nUsing Kokkos ({}) as backend with the Kokkos::ExecutionSpace {}.\n", plssvm::detail::tracking::tracking_entry{ "dependencies", "kokkos_version", detail::get_kokkos_version() }, plssvm::detail::tracking::tracking_entry{ "dependencies", "kokkos_default_execution_space", space_ }); diff --git a/src/plssvm/detail/cmd/parser_predict.cpp b/src/plssvm/detail/cmd/parser_predict.cpp index 88e91bb2c..298c90da0 100644 --- a/src/plssvm/detail/cmd/parser_predict.cpp +++ b/src/plssvm/detail/cmd/parser_predict.cpp @@ -9,6 +9,7 @@ #include "plssvm/detail/cmd/parser_predict.hpp" #include "plssvm/backend_types.hpp" // plssvm::list_available_backends +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::list_available_execution_spaces #include "plssvm/backends/SYCL/implementation_types.hpp" // plssvm::sycl::list_available_sycl_implementations #include "plssvm/constants.hpp" // plssvm::real_type #include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT @@ -17,8 +18,8 @@ #include "plssvm/verbosity_levels.hpp" // plssvm::verbosity, plssvm::verbosity_level #include "plssvm/version/version.hpp" // plssvm::version::detail::get_version_info -#include "cxxopts.hpp" // cxxopts::{Options, value, ParseResult} -#include "fmt/color.h" // fmt::fg, fmt::color::orange +#include "cxxopts.hpp" // cxxopts::{Options, value, ParseResult} +#include "fmt/color.h" // fmt::fg, fmt::color::orange #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join @@ -51,6 +52,9 @@ parser_predict::parser_predict(int argc, char **argv) { #if defined(PLSSVM_HAS_SYCL_BACKEND) ("sycl_implementation_type", fmt::format("choose the SYCL implementation to be used in the SYCL backend: {}", fmt::join(sycl::list_available_sycl_implementations(), "|")), cxxopts::value()->default_value(fmt::format("{}", sycl_implementation_type))) #endif +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + ("kokkos_execution_space", fmt::format("choose the Kokkos execution space to be used in the Kokkos backend: {}", fmt::join(kokkos::list_available_execution_spaces(), "|")), cxxopts::value()->default_value(fmt::format("{}", kokkos_execution_space))) +#endif #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED) ("performance_tracking", "the output YAML file where the performance tracking results are written to; if not provided, the results are dumped to stderr", cxxopts::value()) #endif @@ -101,18 +105,38 @@ parser_predict::parser_predict(int argc, char **argv) { target = result["target_platform"].as(); #if defined(PLSSVM_HAS_SYCL_BACKEND) - // parse SYCL implementation used in the SYCL backend - sycl_implementation_type = result["sycl_implementation_type"].as(); - - // assembly warning condition - const std::vector target_platforms = { target == target_platform::automatic ? determine_default_target_platform() : target }; - const bool sycl_backend_is_used = backend == backend_type::sycl || (backend == backend_type::automatic && determine_default_backend(list_available_backends(), target_platforms) == backend_type::sycl); - - // warn if a SYCL implementation type is explicitly set but SYCL isn't the current (automatic) backend - if (!sycl_backend_is_used && sycl_implementation_type != sycl::implementation_type::automatic) { - detail::log_untracked(verbosity_level::full | verbosity_level::warning, - "WARNING: explicitly set a SYCL implementation type but the current backend isn't SYCL; ignoring --sycl_implementation_type={}\n", - sycl_implementation_type); + { + // parse SYCL implementation used in the SYCL backend + sycl_implementation_type = result["sycl_implementation_type"].as(); + + // assembly warning condition + const std::vector target_platforms = { target == target_platform::automatic ? determine_default_target_platform() : target }; + const bool sycl_backend_is_used = backend == backend_type::sycl || (backend == backend_type::automatic && determine_default_backend(list_available_backends(), target_platforms) == backend_type::sycl); + + // warn if a SYCL implementation type is explicitly set but SYCL isn't the current (automatic) backend + if (!sycl_backend_is_used && sycl_implementation_type != sycl::implementation_type::automatic) { + detail::log_untracked(verbosity_level::full | verbosity_level::warning, + "WARNING: explicitly set a SYCL implementation type but the current backend isn't SYCL; ignoring --sycl_implementation_type={}\n", + sycl_implementation_type); + } + } +#endif + +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + { + // parse execution space when using Kokkos as backend + kokkos_execution_space = result["kokkos_execution_space"].as(); + + // assemble warning condition + const std::vector target_platforms = { target == target_platform::automatic ? determine_default_target_platform() : target }; + const bool kokkos_backend_is_used = backend == backend_type::kokkos || (backend == backend_type::automatic && determine_default_backend(list_available_backends(), target_platforms) == backend_type::kokkos); + + // warn if the kokkos execution space is explicitly set but Kokkos isn't the current (automatic) backend + if (!kokkos_backend_is_used && kokkos_execution_space != kokkos::execution_space::automatic) { + detail::log_untracked(verbosity_level::full | verbosity_level::warning, + "WARNING: explicitly set a Kokkos execution space but the current backend isn't Kokkos; ignoring --kokkos_execution_space={}\n", + kokkos_execution_space); + } } #endif diff --git a/src/plssvm/detail/cmd/parser_train.cpp b/src/plssvm/detail/cmd/parser_train.cpp index d0cc4cb26..31964a897 100644 --- a/src/plssvm/detail/cmd/parser_train.cpp +++ b/src/plssvm/detail/cmd/parser_train.cpp @@ -9,6 +9,7 @@ #include "plssvm/detail/cmd/parser_train.hpp" #include "plssvm/backend_types.hpp" // plssvm::list_available_backends, plssvm::determine_default_backend +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::{list_available_execution_spaces, execution_space} #include "plssvm/backends/SYCL/implementation_types.hpp" // plssvm::sycl::{list_available_sycl_implementations, implementation_type} #include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type #include "plssvm/classification_types.hpp" // plssvm::classification_type, plssvm::classification_type_to_full_string @@ -77,6 +78,9 @@ parser_train::parser_train(int argc, char **argv) { ("sycl_kernel_invocation_type", "choose the kernel invocation type when using SYCL as backend: automatic|nd_range", cxxopts::value()->default_value(fmt::format("{}", sycl_kernel_invocation_type))) ("sycl_implementation_type", fmt::format("choose the SYCL implementation to be used in the SYCL backend: {}", fmt::join(sycl::list_available_sycl_implementations(), "|")), cxxopts::value()->default_value(fmt::format("{}", sycl_implementation_type))) #endif +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + ("kokkos_execution_space", fmt::format("choose the Kokkos execution space to be used in the Kokkos backend: {}", fmt::join(kokkos::list_available_execution_spaces(), "|")), cxxopts::value()->default_value(fmt::format("{}", kokkos_execution_space))) +#endif #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED) ("performance_tracking", "the output YAML file where the performance tracking results are written to; if not provided, the results are dumped to stderr", cxxopts::value()) #endif @@ -185,28 +189,48 @@ parser_train::parser_train(int argc, char **argv) { solver = result["solver"].as(); #if defined(PLSSVM_HAS_SYCL_BACKEND) - // parse kernel invocation type when using SYCL as backend - sycl_kernel_invocation_type = result["sycl_kernel_invocation_type"].as(); - - // assembly warning condition - const std::vector target_platforms = { target == target_platform::automatic ? determine_default_target_platform() : target }; - const bool sycl_backend_is_used = backend == backend_type::sycl || (backend == backend_type::automatic && determine_default_backend(list_available_backends(), target_platforms) == backend_type::sycl); - - // warn if kernel invocation type is explicitly set but SYCL isn't the current (automatic) backend - if (!sycl_backend_is_used && sycl_kernel_invocation_type != sycl::kernel_invocation_type::automatic) { - detail::log_untracked(verbosity_level::full | verbosity_level::warning, - "WARNING: explicitly set a SYCL kernel invocation type but the current backend isn't SYCL; ignoring --sycl_kernel_invocation_type={}\n", - sycl_kernel_invocation_type); + { + // parse kernel invocation type when using SYCL as backend + sycl_kernel_invocation_type = result["sycl_kernel_invocation_type"].as(); + + // assemble warning condition + const std::vector target_platforms = { target == target_platform::automatic ? determine_default_target_platform() : target }; + const bool sycl_backend_is_used = backend == backend_type::sycl || (backend == backend_type::automatic && determine_default_backend(list_available_backends(), target_platforms) == backend_type::sycl); + + // warn if kernel invocation type is explicitly set but SYCL isn't the current (automatic) backend + if (!sycl_backend_is_used && sycl_kernel_invocation_type != sycl::kernel_invocation_type::automatic) { + detail::log_untracked(verbosity_level::full | verbosity_level::warning, + "WARNING: explicitly set a SYCL kernel invocation type but the current backend isn't SYCL; ignoring --sycl_kernel_invocation_type={}\n", + sycl_kernel_invocation_type); + } + + // parse SYCL implementation used in the SYCL backend + sycl_implementation_type = result["sycl_implementation_type"].as(); + + // warn if a SYCL implementation type is explicitly set but SYCL isn't the current (automatic) backend + if (!sycl_backend_is_used && sycl_implementation_type != sycl::implementation_type::automatic) { + detail::log_untracked(verbosity_level::full | verbosity_level::warning, + "WARNING: explicitly set a SYCL implementation type but the current backend isn't SYCL; ignoring --sycl_implementation_type={}\n", + sycl_implementation_type); + } } +#endif - // parse SYCL implementation used in the SYCL backend - sycl_implementation_type = result["sycl_implementation_type"].as(); +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + { + // parse execution space when using Kokkos as backend + kokkos_execution_space = result["kokkos_execution_space"].as(); - // warn if a SYCL implementation type is explicitly set but SYCL isn't the current (automatic) backend - if (!sycl_backend_is_used && sycl_implementation_type != sycl::implementation_type::automatic) { - detail::log_untracked(verbosity_level::full | verbosity_level::warning, - "WARNING: explicitly set a SYCL implementation type but the current backend isn't SYCL; ignoring --sycl_implementation_type={}\n", - sycl_implementation_type); + // assemble warning condition + const std::vector target_platforms = { target == target_platform::automatic ? determine_default_target_platform() : target }; + const bool kokkos_backend_is_used = backend == backend_type::kokkos || (backend == backend_type::automatic && determine_default_backend(list_available_backends(), target_platforms) == backend_type::kokkos); + + // warn if the kokkos execution space is explicitly set but Kokkos isn't the current (automatic) backend + if (!kokkos_backend_is_used && kokkos_execution_space != kokkos::execution_space::automatic) { + detail::log_untracked(verbosity_level::full | verbosity_level::warning, + "WARNING: explicitly set a Kokkos execution space but the current backend isn't Kokkos; ignoring --kokkos_execution_space={}\n", + kokkos_execution_space); + } } #endif From 46da12f1e77afc9b8132be142e695cc26a6912f5 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 16:22:35 +0100 Subject: [PATCH 067/123] Update manpage. --- docs/plssvm-train.1.in | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/plssvm-train.1.in b/docs/plssvm-train.1.in index b52853dac..e34541848 100644 --- a/docs/plssvm-train.1.in +++ b/docs/plssvm-train.1.in @@ -17,7 +17,10 @@ plssvm-train is a utility to train an LS-SVM using different backends to target set type of kernel function. 0 -- linear: u'*v 1 -- polynomial: (gamma*u'*v + coef0)^degree - 2 -- radial basis function: exp(-gamma*|u-v|^2) (default: 2) + 2 -- radial basis function: exp(-gamma*|u-v|^2) + 3 -- sigmoid: tanh(gamma*u'*v+coef0) + 4 -- laplacian: exp(-gamma*|u-v|_1) + 5 -- chi_squared: exp(-gamma*sum_i((x[i]-y[i])^2/(x[i]+y[i]))) (default: 2) .TP .B -d, --degree arg @@ -25,7 +28,7 @@ set degree in kernel function (default: 3) .TP .B -g, --gamma arg -set gamma in kernel function (default: 1 / num_features) +set gamma in kernel function (default: automatic) .TP .B -r, --coef0 arg From 85c581dadebcaff1d73e2bf95f8f4af944dc219e Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 16:23:03 +0100 Subject: [PATCH 068/123] Add available Kokkos execution spaces to the Kokkos CMake summary string. --- src/plssvm/backends/Kokkos/CMakeLists.txt | 33 ++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/plssvm/backends/Kokkos/CMakeLists.txt b/src/plssvm/backends/Kokkos/CMakeLists.txt index b66927fa9..90a1f4e74 100644 --- a/src/plssvm/backends/Kokkos/CMakeLists.txt +++ b/src/plssvm/backends/Kokkos/CMakeLists.txt @@ -122,8 +122,39 @@ target_link_libraries(${PLSSVM_ALL_LIBRARY_NAME} INTERFACE ${PLSSVM_KOKKOS_BACKE # mark backend library as install target append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME}) +# assemble Kokkos available execution space string +set(PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "") +if (Kokkos_ENABLE_CUDA) + list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "Cuda") +endif () +if (Kokkos_ENABLE_HIP) + list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "HIP") +endif () +if (Kokkos_ENABLE_SYCL) + list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "SYCL") +endif () +if (Kokkos_ENABLE_HPX) + list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "HPX") +endif () +if (Kokkos_ENABLE_OPENMP) + list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "OpenMP") +endif () +if (Kokkos_ENABLE_OPENMPTARGET) + list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "OpenMPTarget") +endif () +if (Kokkos_ENABLE_OPENACC) + list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "OpenACC") +endif () +if (Kokkos_ENABLE_THREADS) + list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "Threads") +endif () +if (Kokkos_ENABLE_SERIAL) + list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "Serial") +endif () +set(PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "${PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES}" PARENT_SCOPE) + # generate summary string -set(PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_COMPILER " - Kokkos:") +set(PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_COMPILER " - Kokkos (${PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES}):") include(${PROJECT_SOURCE_DIR}/cmake/assemble_summary_string.cmake) assemble_summary_string(PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_ARCHS) # do not print any special target architecture information From 5ed9e536b1600bf395042d9bcd0275a7d89d049f Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 16:36:57 +0100 Subject: [PATCH 069/123] Update README to also include the new Kokkos backend. --- README.md | 67 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index e7de709b3..71a96e499 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,7 @@ The main highlights of our SVM implementations are: - [HIP](https://github.com/ROCm-Developer-Tools/HIP) - [OpenCL](https://www.khronos.org/opencl/) - [SYCL](https://www.khronos.org/sycl/) (supported implementations are [DPC++](https://github.com/intel/llvm) and [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp) (formerly known as hipSYCL); specifically the versions [sycl-nightly/20231201](https://github.com/intel/llvm/tree/sycl-nightly/20230110) and AdaptiveCpp release [v24.06.0](https://github.com/AdaptiveCpp/AdaptiveCpp/releases/tag/v23.10.0)) + - [Kokkos](https://github.com/kokkos/kokkos) (all execution spaces supported except `OpenMPTarget` and `OpenACC`); specifically the version [d50de97](https://github.com/kokkos/kokkos/commit/d50de979b4d095dc32dba80f72a5e009f3615db1) 3. Six different kernel functions to be able to classify a large variety of different problems: - linear: $\vec{u}^T$ $\cdot$ $\vec{v}$ - polynomial: $(\gamma$ $\cdot$ $\vec{u}^T$ $\cdot$ $\vec{v}$ $+$ $coef0)^{d}$ @@ -122,6 +123,10 @@ Additional dependencies for the SYCL backend: - the code must be compiled with a SYCL capable compiler; currently supported are [DPC++](https://github.com/intel/llvm) and [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp) +Additional dependencies for the Kokkos backend: + +- a Kokkos installation with the respective execution spaces enabled; currently all execution spaces are supported except `OpenMPTarget` and `OpenACC` + Additional dependencies for the stdpar backend: - the code must be compiled with a stdpar capable compiler; currently supported are [nvc++](https://developer.nvidia.com/hpc-sdk), [roc-stdpar](https://github.com/ROCm/roc-stdpar), [icpx](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler.html), [AdaptiveCpp](https://github.com/AdaptiveCpp/AdaptiveCpp), and [GNU GCC](https://gcc.gnu.org/)) @@ -262,6 +267,11 @@ The `[optional_options]` can be one or multiple of: - `AUTO`: check for the SYCL backend but **do not** fail if not available - `OFF`: do not check for the SYCL backend +- `PLSSVM_ENABLE_KOKKOS_BACKEND=ON|OFF|AUTO` (default: `AUTO`): + - `ON`: check for the Kokkos backend and fail if not available + - `AUTO`: check for the Kokkos backend but **do not** fail if not available + - `OFF`: do not check for the Kokkos backend + **Attention:** at least one backend must be enabled and available! - `PLSSVM_ENABLE_FAST_MATH=ON|OFF` (default depending on `CMAKE_BUILD_TYPE`: `ON` for Release or RelWithDebInfo, `OFF` otherwise): enable `fast-math` compiler flags for all backends @@ -337,6 +347,10 @@ If more than one SYCL implementation is available the environment variables `PLS - `PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION` (`dpcpp`|`adaptivecpp`): specify the preferred SYCL implementation if the `sycl_implementation_type` option is set to `automatic`; additional the specified SYCL implementation is used in the `plssvm::sycl` namespace, the other implementations are available in the `plssvm::dpcpp` and `plssvm::adaptivecpp` namespace respectively +If the Kokkos backend is available the following additional option is available (**note**: this option takes only effect if the Kokkos SYCL execution space is available): + +- `PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT` (default: `ON`): enable Ahead-of-Time (AOT) compilation for the specified target platforms + If the stdpar backend is available, an additional options can be set. - `PLSSVM_STDPAR_BACKEND_IMPLEMENTATION` (default: `AUTO`): explicitly specify the used stdpar implementation; must be one of: `AUTO`, `NVHPC`, `roc-stdpar`, `IntelLLVM`, `ACPP`, `GNU_TBB`. @@ -353,24 +367,6 @@ Available configure presets: "openmp" - OpenMP backend "openmp_python" - OpenMP backend + Python bindings "openmp_test" - OpenMP backend tests - "cuda" - CUDA backend - "cuda_python" - CUDA backend + Python bindings - "cuda_test" - CUDA backend tests - "hip" - HIP backend - "hip_python" - HIP backend + Python bindings - "hip_test" - HIP backend tests - "opencl" - OpenCL backend - "opencl_python" - OpenCL backend + Python bindings - "opencl_test" - OpenCL backend tests - "acpp" - AdaptiveCpp SYCL backend - "acpp_python" - AdaptiveCpp SYCL backend + Python bindings - "acpp_test" - AdaptiveCpp SYCL backend tests - "dpcpp" - DPC++/icpx SYCL backend - "dpcpp_python" - DPC++/icpx backend + Python bindings - "dpcpp_test" - DPC++/icpx backend tests - "all" - All available backends - "all_python" - All available backends + Python bindings - "all_test" - All available backends tests "stdpar" - stdpar backend "stdpar_python" - stdpar backend + Python bindings "stdpar_test" - stdpar backend tests @@ -389,6 +385,27 @@ Available configure presets: "stdpar_intelllvm" - stdpar IntelLLVM (icpx) backend "stdpar_intelllvm_python" - stdpar IntelLLVM (icpx) backend + Python bindings "stdpar_intelllvm_test" - stdpar IntelLLVM (icpx) backend tests + "cuda" - CUDA backend + "cuda_python" - CUDA backend + Python bindings + "cuda_test" - CUDA backend tests + "hip" - HIP backend + "hip_python" - HIP backend + Python bindings + "hip_test" - HIP backend tests + "opencl" - OpenCL backend + "opencl_python" - OpenCL backend + Python bindings + "opencl_test" - OpenCL backend tests + "acpp" - AdaptiveCpp SYCL backend + "acpp_python" - AdaptiveCpp SYCL backend + Python bindings + "acpp_test" - AdaptiveCpp SYCL backend tests + "dpcpp" - DPC++/icpx SYCL backend + "dpcpp_python" - DPC++/icpx backend + Python bindings + "dpcpp_test" - DPC++/icpx backend tests + "kokkos" - Kokkos backend + "kokkos_python" - Kokkos backend + Python bindings + "kokkos_test" - Kokkos backend tests + "all" - All available backends + "all_python" - All available backends + Python bindings + "all_test" - All available backends tests ``` With these presets, building and testing, e.g., our CUDA backend is as simple as typing (in the PLSSVM root directory): @@ -532,6 +549,8 @@ Usage: choose the kernel invocation type when using SYCL as backend: automatic|nd_range (default: automatic) --sycl_implementation_type arg choose the SYCL implementation to be used in the SYCL backend: automatic|dpcpp|adaptivecpp (default: automatic) + --kokkos_execution_space arg + choose the Kokkos execution space to be used in the Kokkos backend: automatic|Cuda|OpenMP|Serial (default: automatic) --performance_tracking arg the output YAML file where the performance tracking results are written to; if not provided, the results are dumped to stderr --use_strings_as_labels use strings as labels instead of plane numbers @@ -567,10 +586,10 @@ Another example targeting NVIDIA GPUs using the SYCL backend looks like: The `--backend=automatic` option works as follows: -- if the `gpu_nvidia` target is available, check for existing backends in order `cuda` 🠦 `hip` 🠦 `opencl` 🠦 `sycl` 🠦 `stdpar` -- otherwise, if the `gpu_amd` target is available, check for existing backends in order `hip` 🠦 `opencl` 🠦 `sycl` 🠦 `stdpar` -- otherwise, if the `gpu_intel` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `stdpar` -- otherwise, if the `cpu` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `openmp` 🠦 `stdpar` +- if the `gpu_nvidia` target is available, check for existing backends in order `cuda` 🠦 `hip` 🠦 `opencl` 🠦 `sycl` 🠦 `kokkos` 🠦 `stdpar` +- otherwise, if the `gpu_amd` target is available, check for existing backends in order `hip` 🠦 `opencl` 🠦 `sycl` 🠦 `kokkos` 🠦 `stdpar` +- otherwise, if the `gpu_intel` target is available, check for existing backends in order `sycl` 🠦 `opencl` 🠦 `kokkos` 🠦 `stdpar` +- otherwise, if the `cpu` target is available, check for existing backends in order `sycl` 🠦 `kokkos` 🠦 `opencl` 🠦 `openmp` 🠦 `stdpar` Note that during CMake configuration it is guaranteed that at least one of the above combinations does exist. @@ -581,11 +600,13 @@ The `--target_platform=automatic` option works for the different backends as fol - `HIP`: always selects an AMD GPU (if no AMD GPU is available, throws an exception) - `OpenCL`: tries to find available devices in the following order: NVIDIA GPUs 🠦 AMD GPUs 🠦 Intel GPUs 🠦 CPU - `SYCL`: tries to find available devices in the following order: NVIDIA GPUs 🠦 AMD GPUs 🠦 Intel GPUs 🠦 CPU +- `Kokkos`: checks which execution spaces are available and which target platforms they support and then tries to find available devices in the following order: NVIDIA GPUs 🠦 AMD GPUs 🠦 Intel GPUs 🠦 CPU - `stdpar`: target device must be selected at compile time (using `PLSSVM_TARGET_PLATFORMS`) or using environment variables at runtime The `--sycl_kernel_invocation_type` and `--sycl_implementation_type` flags are only used if the `--backend` is `sycl`, otherwise a warning is emitted on `stderr`. If the `--sycl_kernel_invocation_type` is `automatic`, the `nd_range` invocation type is currently always used. If the `--sycl_implementation_type` is `automatic`, the used SYCL implementation is determined by the `PLSSVM_SYCL_BACKEND_PREFERRED_IMPLEMENTATION` CMake flag. +If the `--kokkos_execution_space` is `automatic`, uses the best fitting execution space based on the provided and/or available target platforms. ### Predicting using `plssvm-predict` @@ -604,6 +625,8 @@ Usage: -p, --target_platform arg choose the target platform: automatic|cpu|gpu_nvidia|gpu_amd|gpu_intel (default: automatic) --sycl_implementation_type arg choose the SYCL implementation to be used in the SYCL backend: automatic|dpcpp|adaptivecpp (default: automatic) + --kokkos_execution_space arg + choose the Kokkos execution space to be used in the Kokkos backend: automatic|Cuda|OpenMP|Serial (default: automatic) --performance_tracking arg the output YAML file where the performance tracking results are written to; if not provided, the results are dumped to stderr --use_strings_as_labels use strings as labels instead of plane numbers From 66dfee6e5ca1ba92249d561bfabd2a9d0df6d37f Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 16:41:09 +0100 Subject: [PATCH 070/123] Add support for the Kokkos backend as PLSSVM install component. --- cmake/plssvm/plssvmConfig.cmake.in | 2 +- cmake/plssvm/plssvmKokkosTargets.cmake | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 cmake/plssvm/plssvmKokkosTargets.cmake diff --git a/cmake/plssvm/plssvmConfig.cmake.in b/cmake/plssvm/plssvmConfig.cmake.in index e6be17d15..0e4f989ec 100644 --- a/cmake/plssvm/plssvmConfig.cmake.in +++ b/cmake/plssvm/plssvmConfig.cmake.in @@ -25,7 +25,7 @@ find_dependency(fmt REQUIRED) include("${CMAKE_CURRENT_LIST_DIR}/plssvmTargets.cmake") # list all available libraries -set(PLSSVM_SUPPORTED_COMPONENTS "OpenMP;CUDA;HIP;OpenCL;DPCPP;AdaptiveCpp;stdpar") +set(PLSSVM_SUPPORTED_COMPONENTS "OpenMP;CUDA;HIP;OpenCL;DPCPP;AdaptiveCpp;Kokkos;stdpar") set(PLSSVM_DISABLED_COMPONENTS "${PLSSVM_SUPPORTED_COMPONENTS}") # check which libraries are available diff --git a/cmake/plssvm/plssvmKokkosTargets.cmake b/cmake/plssvm/plssvmKokkosTargets.cmake new file mode 100644 index 000000000..7ec32069a --- /dev/null +++ b/cmake/plssvm/plssvmKokkosTargets.cmake @@ -0,0 +1,21 @@ +## Authors: Alexander Van Craen, Marcel Breyer +## Copyright (C): 2018-today The PLSSVM project - All Rights Reserved +## License: This file is part of the PLSSVM project which is released under the MIT license. +## See the LICENSE.md file in the project root for full license information. +######################################################################################################################## + +include(CMakeFindDependencyMacro) + +# check if the Kokkos backend is available +if (TARGET plssvm::plssvm-Kokkos) + # enable Kokkos + find_dependency(Kokkos CONFIG) + # set alias targets + add_library(plssvm::Kokkos ALIAS plssvm::plssvm-Kokkos) + add_library(plssvm::kokkos ALIAS plssvm::plssvm-Kokkos) + # set COMPONENT to be found + set(plssvm_Kokkos_FOUND ON) +else () + # set COMPONENT to be NOT found + set(plssvm_Kokkos_FOUND OFF) +endif () \ No newline at end of file From e30677fd894c527a1a30e99218641b40a77d1d7f Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 16:47:08 +0100 Subject: [PATCH 071/123] Update manpages to include the Kokkos backend. --- CMakeLists.txt | 12 +++++++++++- docs/plssvm-predict.1.in | 2 ++ docs/plssvm-train.1.in | 2 ++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 19309a9eb..d555bd8e4 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -729,8 +729,8 @@ message(STATUS "Generating manpage files.") string(TIMESTAMP PLSSVM_CURRENT_BUILD_TIME "%d. %B %Y") string(REPLACE ";" "|" PLSSVM_PLATFORM_NAME_LIST "${PLSSVM_PLATFORM_NAME_LIST}") string(REPLACE ";" "|" PLSSVM_BACKEND_NAME_LIST "${PLSSVM_BACKEND_NAME_LIST}") -string(REPLACE ";" "|" PLSSVM_SYCL_BACKEND_NAME_LIST "${PLSSVM_SYCL_BACKEND_NAME_LIST}") if (TARGET ${PLSSVM_SYCL_BACKEND_LIBRARY_NAME}) + string(REPLACE ";" "|" PLSSVM_SYCL_BACKEND_NAME_LIST "${PLSSVM_SYCL_BACKEND_NAME_LIST}") set(PLSSVM_SYCL_IMPLEMENTATION_TYPE_MANPAGE_ENTRY " .TP .B --sycl_implementation_type @@ -743,6 +743,15 @@ choose the kernel invocation type when using SYCL as backend: automatic|nd_range ") endif () set(PLSSVM_SYCL_MANPAGE_ENTRY "${PLSSVM_SYCL_KERNEL_INVOCATION_TYPE_MANPAGE_ENTRY}${PLSSVM_SYCL_IMPLEMENTATION_TYPE_MANPAGE_ENTRY}") +## assemble the Kokkos manpage entry +if (TARGET ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME}) + string(REPLACE ";" "|" PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "${PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES}") + set(PLSSVM_KOKKOS_MANPAGE_ENTRY " +.TP +.B --kokkos_execution_space +choose the Kokkos execution space to be used in the Kokkos backend: automatic|${PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES} (default: automatic) +") +endif () ## assemble the performance tracker manpage entry if (PLSSVM_ENABLE_PERFORMANCE_TRACKING) set(PLSSVM_PERFORMANCE_TRACKER_MANPAGE_ENTRY " @@ -758,6 +767,7 @@ configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/docs/plssvm-train.1 @ONLY ) +# update manpage entry since plssvm-predict can't recognize the SYCL kernel invocation type set(PLSSVM_SYCL_MANPAGE_ENTRY "${PLSSVM_SYCL_IMPLEMENTATION_TYPE_MANPAGE_ENTRY}") configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/docs/plssvm-predict.1.in diff --git a/docs/plssvm-predict.1.in b/docs/plssvm-predict.1.in index bb9e29b6b..17d6081fa 100644 --- a/docs/plssvm-predict.1.in +++ b/docs/plssvm-predict.1.in @@ -22,6 +22,8 @@ choose the target platform: @PLSSVM_PLATFORM_NAME_LIST@ (default: automatic) @PLSSVM_SYCL_MANPAGE_ENTRY@ +@PLSSVM_KOKKOS_MANPAGE_ENTRY@ + @PLSSVM_PERFORMANCE_TRACKER_MANPAGE_ENTRY@ .TP diff --git a/docs/plssvm-train.1.in b/docs/plssvm-train.1.in index e34541848..fad2e4fba 100644 --- a/docs/plssvm-train.1.in +++ b/docs/plssvm-train.1.in @@ -64,6 +64,8 @@ choose the target platform: @PLSSVM_PLATFORM_NAME_LIST@ (default: automatic) @PLSSVM_SYCL_MANPAGE_ENTRY@ +@PLSSVM_KOKKOS_MANPAGE_ENTRY@ + @PLSSVM_PERFORMANCE_TRACKER_MANPAGE_ENTRY@ .TP From 7be8170ffdf42c4ba01bffd546459089beda8665 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 16:48:07 +0100 Subject: [PATCH 072/123] Add kokkos to the --help examples. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 71a96e499..047a9fd23 100644 --- a/README.md +++ b/README.md @@ -543,7 +543,7 @@ Usage: -i, --max_iter arg set the maximum number of CG iterations (default: num_features) -l, --solver arg choose the solver: automatic|cg_explicit|cg_implicit (default: automatic) -a, --classification arg the classification strategy to use for multi-class classification: oaa|oao (default: oaa) - -b, --backend arg choose the backend: automatic|openmp|cuda|hip|opencl|sycl|stdpar (default: automatic) + -b, --backend arg choose the backend: automatic|openmp|cuda|hip|opencl|sycl|kokkos|stdpar (default: automatic) -p, --target_platform arg choose the target platform: automatic|cpu|gpu_nvidia|gpu_amd|gpu_intel (default: automatic) --sycl_kernel_invocation_type arg choose the kernel invocation type when using SYCL as backend: automatic|nd_range (default: automatic) @@ -621,7 +621,7 @@ LS-SVM with multiple (GPU-)backends Usage: ./plssvm-predict [OPTION...] test_file model_file [output_file] - -b, --backend arg choose the backend: automatic|openmp|cuda|hip|opencl|sycl|stdpar (default: automatic) + -b, --backend arg choose the backend: automatic|openmp|cuda|hip|opencl|sycl|kokkos|stdpar (default: automatic) -p, --target_platform arg choose the target platform: automatic|cpu|gpu_nvidia|gpu_amd|gpu_intel (default: automatic) --sycl_implementation_type arg choose the SYCL implementation to be used in the SYCL backend: automatic|dpcpp|adaptivecpp (default: automatic) From f967c47bae9a0009338c497eb67a09839fd283e8 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 16:57:04 +0100 Subject: [PATCH 073/123] Add missing include documentation. --- bindings/Python/backends/adaptivecpp_csvm.cpp | 2 +- bindings/Python/backends/dpcpp_csvm.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/Python/backends/adaptivecpp_csvm.cpp b/bindings/Python/backends/adaptivecpp_csvm.cpp index 767853757..bf81b11ae 100644 --- a/bindings/Python/backends/adaptivecpp_csvm.cpp +++ b/bindings/Python/backends/adaptivecpp_csvm.cpp @@ -11,7 +11,7 @@ #include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type #include "plssvm/csvm.hpp" // plssvm::csvm #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception -#include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/parameter.hpp" // plssvm::parameter, plssvm::sycl_kernel_invocation_type #include "plssvm/target_platforms.hpp" // plssvm::target_platform #include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception diff --git a/bindings/Python/backends/dpcpp_csvm.cpp b/bindings/Python/backends/dpcpp_csvm.cpp index 882d6ea37..906cb5979 100644 --- a/bindings/Python/backends/dpcpp_csvm.cpp +++ b/bindings/Python/backends/dpcpp_csvm.cpp @@ -11,7 +11,7 @@ #include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type #include "plssvm/csvm.hpp" // plssvm::csvm #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception -#include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/parameter.hpp" // plssvm::parameter, plssvm::sycl_kernel_invocation_type #include "plssvm/target_platforms.hpp" // plssvm::target_platform #include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception From d56397ad570d7da0264d6d2dae937e8bfc7a554e Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 16:58:09 +0100 Subject: [PATCH 074/123] Add kokkos_execution_space named parameter to the Kokkos python bindings. --- bindings/Python/backends/kokkos_csvm.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/bindings/Python/backends/kokkos_csvm.cpp b/bindings/Python/backends/kokkos_csvm.cpp index ea0dc17e6..ea6c4af80 100644 --- a/bindings/Python/backends/kokkos_csvm.cpp +++ b/bindings/Python/backends/kokkos_csvm.cpp @@ -11,7 +11,7 @@ #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/csvm.hpp" // plssvm::csvm #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception -#include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/parameter.hpp" // plssvm::parameter, plssvm::kokkos_execution_space #include "plssvm/target_platforms.hpp" // plssvm::target_platform #include "bindings/Python/utility.hpp" // check_kwargs_for_correctness, convert_kwargs_to_parameter, register_py_exception @@ -35,20 +35,24 @@ void init_kokkos_csvm(py::module_ &m, const py::exception &ba .def(py::init(), "create an SVM with the provided target platform and parameter object") .def(py::init([](const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "kokkos_execution_space" }); // if one of the value keyword parameter is provided, set the respective value const plssvm::parameter params = convert_kwargs_to_parameter(args); + // set Kokkos execution space + const plssvm::kokkos::execution_space space = args.contains("kokkos_execution_space") ? args["kokkos_execution_space"].cast() : plssvm::kokkos::execution_space::automatic; // create CSVM with the default target platform - return std::make_unique(params); + return std::make_unique(params, plssvm::kokkos_execution_space = space); }), "create an SVM with the default target platform and keyword arguments") .def(py::init([](const plssvm::target_platform target, const py::kwargs &args) { // check for valid keys - check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost" }); + check_kwargs_for_correctness(args, { "kernel_type", "degree", "gamma", "coef0", "cost", "kokkos_execution_space" }); // if one of the value keyword parameter is provided, set the respective value const plssvm::parameter params = convert_kwargs_to_parameter(args); + // set Kokkos execution space + const plssvm::kokkos::execution_space space = args.contains("kokkos_execution_space") ? args["kokkos_execution_space"].cast() : plssvm::kokkos::execution_space::automatic; // create CSVM with the provided target platform - return std::make_unique(target, params); + return std::make_unique(target, params, plssvm::kokkos_execution_space = space); }), "create an SVM with the provided target platform and keyword arguments") .def("get_execution_space", &plssvm::kokkos::csvm::get_execution_space, "get the Kokkos execution space used in this Kokkos SVM"); @@ -58,6 +62,7 @@ void init_kokkos_csvm(py::module_ &m, const py::exception &ba // bind the execution space enum classes py::enum_(kokkos_module, "ExecutionSpace") + .value("AUTOMATIC", plssvm::kokkos::execution_space::cuda, "automatically determine the used Kokkos execution space (note: this does not necessarily correspond to Kokkos::DefaultExecutionSpace)") .value("CUDA", plssvm::kokkos::execution_space::cuda, "execution space representing execution on a CUDA device") .value("HIP", plssvm::kokkos::execution_space::hip, "execution space representing execution on a device supported by HIP") .value("SYCL", plssvm::kokkos::execution_space::sycl, "execution space representing execution on a device supported by SYCL") From 2809daacbbcc243c25a6407a8e33e558e1db62c2 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 17:06:14 +0100 Subject: [PATCH 075/123] Allow Kokkos to be built in the all preset. --- cmake/presets/all.json | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cmake/presets/all.json b/cmake/presets/all.json index 76528069b..f0794f2ba 100644 --- a/cmake/presets/all.json +++ b/cmake/presets/all.json @@ -13,7 +13,8 @@ "PLSSVM_ENABLE_CUDA_BACKEND": "AUTO", "PLSSVM_ENABLE_HIP_BACKEND": "AUTO", "PLSSVM_ENABLE_OPENCL_BACKEND": "AUTO", - "PLSSVM_ENABLE_SYCL_BACKEND": "AUTO" + "PLSSVM_ENABLE_SYCL_BACKEND": "AUTO", + "PLSSVM_ENABLE_KOKKOS_BACKEND": "AUTO" } }, { @@ -28,6 +29,7 @@ "PLSSVM_ENABLE_HIP_BACKEND": "AUTO", "PLSSVM_ENABLE_OPENCL_BACKEND": "AUTO", "PLSSVM_ENABLE_SYCL_BACKEND": "AUTO", + "PLSSVM_ENABLE_KOKKOS_BACKEND": "AUTO", "PLSSVM_ENABLE_LANGUAGE_BINDINGS": "ON", "PLSSVM_ENABLE_PYTHON_BINDINGS": "ON" } @@ -43,7 +45,8 @@ "PLSSVM_ENABLE_CUDA_BACKEND": "AUTO", "PLSSVM_ENABLE_HIP_BACKEND": "AUTO", "PLSSVM_ENABLE_OPENCL_BACKEND": "AUTO", - "PLSSVM_ENABLE_SYCL_BACKEND": "AUTO" + "PLSSVM_ENABLE_SYCL_BACKEND": "AUTO", + "PLSSVM_ENABLE_KOKKOS_BACKEND": "AUTO" } } ], @@ -84,7 +87,7 @@ "inherits": "common", "filter": { "include": { - "name": "OpenMP.*|CUDA.*|HIP.*|OpenCL.*|AdaptiveCpp.*|DPCPP.*" + "name": "OpenMP.*|CUDA.*|HIP.*|OpenCL.*|AdaptiveCpp.*|DPCPP.*|Kokkos.*" } } } From dcbbe102dd6d19a56d18ce91380877ed544a39fd Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 17:12:48 +0100 Subject: [PATCH 076/123] Update TODOs. --- tests/backends/Kokkos/detail/device_wrapper.cpp | 2 +- tests/backends/Kokkos/detail/pinned_memory.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/backends/Kokkos/detail/device_wrapper.cpp b/tests/backends/Kokkos/detail/device_wrapper.cpp index 4547281ff..ca644ece7 100644 --- a/tests/backends/Kokkos/detail/device_wrapper.cpp +++ b/tests/backends/Kokkos/detail/device_wrapper.cpp @@ -99,7 +99,7 @@ struct device_list_test { // check the number of returned devices if (space == plssvm::kokkos::execution_space::cuda || space == plssvm::kokkos::execution_space::hip || space == plssvm::kokkos::execution_space::sycl) { - // TODO: OpenMP Target Offloading / OpenACC + // TODO: Change if multi-GPU support for Kokkos::Experimental::OpenMPTarget and/or Kokkos::Experimental::OpenACC is implemented // for the device execution spaces AT LEAST ONE device must be found EXPECT_GE(devices.size(), 1); } else { diff --git a/tests/backends/Kokkos/detail/pinned_memory.cpp b/tests/backends/Kokkos/detail/pinned_memory.cpp index aa91612d7..2569e68e7 100644 --- a/tests/backends/Kokkos/detail/pinned_memory.cpp +++ b/tests/backends/Kokkos/detail/pinned_memory.cpp @@ -22,7 +22,7 @@ template struct kokkos_pinned_memory_test_type { using pinned_memory_type = plssvm::kokkos::detail::pinned_memory; - constexpr static bool can_pin = false; // TODO: try implementing in Kokkos? + constexpr static bool can_pin = false; }; using kokkos_pinned_memory_tuple = std::tuple, kokkos_pinned_memory_test_type>; From 9a53e7fce2404519151f3aa6e134538be27c4861 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 17:32:07 +0100 Subject: [PATCH 077/123] Add missing Kokkos related performance tracking entries. --- src/plssvm/detail/tracking/performance_tracker.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/plssvm/detail/tracking/performance_tracker.cpp b/src/plssvm/detail/tracking/performance_tracker.cpp index 26ebda7d3..6d1323e8e 100644 --- a/src/plssvm/detail/tracking/performance_tracker.cpp +++ b/src/plssvm/detail/tracking/performance_tracker.cpp @@ -116,6 +116,7 @@ void performance_tracker::add_tracking_entry(const tracking_entry{ fmt::format("{}", entry.entry_value.target) }); tracking_entries_[entry.entry_category].emplace("sycl_kernel_invocation_type", std::vector{ fmt::format("{}", entry.entry_value.sycl_kernel_invocation_type) }); tracking_entries_[entry.entry_category].emplace("sycl_implementation_type", std::vector{ fmt::format("{}", entry.entry_value.sycl_implementation_type) }); + tracking_entries_[entry.entry_category].emplace("kokkos_execution_space", std::vector{ fmt::format("{}", entry.entry_value.kokkos_execution_space) }); tracking_entries_[entry.entry_category].emplace("strings_as_labels", std::vector{ fmt::format("{}", entry.entry_value.strings_as_labels) }); tracking_entries_[entry.entry_category].emplace("real_type", std::vector{ std::string{ arithmetic_type_name() } }); tracking_entries_[entry.entry_category].emplace("input_filename", std::vector{ fmt::format("\"{}\"", entry.entry_value.input_filename) }); @@ -133,6 +134,7 @@ void performance_tracker::add_tracking_entry(const tracking_entry{ fmt::format("{}", entry.entry_value.backend) }); tracking_entries_[entry.entry_category].emplace("target", std::vector{ fmt::format("{}", entry.entry_value.target) }); tracking_entries_[entry.entry_category].emplace("sycl_implementation_type", std::vector{ fmt::format("{}", entry.entry_value.sycl_implementation_type) }); + tracking_entries_[entry.entry_category].emplace("kokkos_execution_space", std::vector{ fmt::format("{}", entry.entry_value.kokkos_execution_space) }); tracking_entries_[entry.entry_category].emplace("strings_as_labels", std::vector{ fmt::format("{}", entry.entry_value.strings_as_labels) }); tracking_entries_[entry.entry_category].emplace("real_type", std::vector{ std::string{ arithmetic_type_name() } }); tracking_entries_[entry.entry_category].emplace("input_filename", std::vector{ fmt::format("\"{}\"", entry.entry_value.input_filename) }); @@ -297,6 +299,14 @@ void performance_tracker::save(std::ostream &out) { " ADAPTIVECPP_with_accelerated_CPU: {}\n", adaptivecpp_sscp, adaptivecpp_accelerated_cpu); +#endif +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + // check whether Kokkos::SYCL AOT has been enabled + constexpr bool kokkos_sycl_aot = PLSSVM_IS_DEFINED(PLSSVM_KOKKOS_BACKEND_INTEL_LLVM_ENABLE_AOT); + + out << fmt::format( + " KOKKOS_sycl_intel_llvm_with_aot: {}\n", + kokkos_sycl_aot); #endif out << "\n"; From 5636ea14b70142782948a78a4edbb74588f8461f Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 17:47:19 +0100 Subject: [PATCH 078/123] Add missing Kokkos entry to Python bindings backend type enumeration. --- bindings/Python/backend_types.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bindings/Python/backend_types.cpp b/bindings/Python/backend_types.cpp index 8a1fa29fb..8c6b97e85 100644 --- a/bindings/Python/backend_types.cpp +++ b/bindings/Python/backend_types.cpp @@ -24,7 +24,8 @@ void init_backend_types(py::module_ &m) { .value("CUDA", plssvm::backend_type::cuda, "CUDA to target NVIDIA GPUs only") .value("HIP", plssvm::backend_type::hip, "HIP to target AMD and NVIDIA GPUs") .value("OPENCL", plssvm::backend_type::opencl, "OpenCL to target CPUs and GPUs from different vendors") - .value("SYCL", plssvm::backend_type::sycl, "SYCL to target CPUs and GPUs from different vendors; currently tested SYCL implementations are DPC++ and AdaptiveCpp"); + .value("SYCL", plssvm::backend_type::sycl, "SYCL to target CPUs and GPUs from different vendors; currently tested SYCL implementations are DPC++ and AdaptiveCpp") + .value("KOKKOS", plssvm::backend_type::kokkos, "Kokkos to target CPUs and GPUs from different vendors; currently all Kokkos execution spaces except Kokkos::Experimental::OpenMPTarget and Kokkos::Experimental::OpenACC are supported"); // bind free functions m.def("list_available_backends", &plssvm::list_available_backends, "list the available backends (as found during CMake configuration)"); From c187ec05353e2029a216187b2e11fed179ccd6c9 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 17:54:15 +0100 Subject: [PATCH 079/123] Add missing operator<< output for the new kokkos_execution_space option. --- src/plssvm/detail/cmd/parser_predict.cpp | 4 ++++ src/plssvm/detail/cmd/parser_train.cpp | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/plssvm/detail/cmd/parser_predict.cpp b/src/plssvm/detail/cmd/parser_predict.cpp index 298c90da0..656d9a76d 100644 --- a/src/plssvm/detail/cmd/parser_predict.cpp +++ b/src/plssvm/detail/cmd/parser_predict.cpp @@ -202,6 +202,10 @@ std::ostream &operator<<(std::ostream &out, const parser_predict ¶ms) { out << fmt::format("SYCL implementation type: {}\n", params.sycl_implementation_type); } + if (params.backend == backend_type::kokkos || params.backend == backend_type::automatic) { + out << fmt::format("Kokkos execution space: {}\n", params.kokkos_execution_space); + } + out << fmt::format( "label_type: {}\n" "real_type: {}\n" diff --git a/src/plssvm/detail/cmd/parser_train.cpp b/src/plssvm/detail/cmd/parser_train.cpp index 31964a897..31d5b8719 100644 --- a/src/plssvm/detail/cmd/parser_train.cpp +++ b/src/plssvm/detail/cmd/parser_train.cpp @@ -326,6 +326,10 @@ std::ostream &operator<<(std::ostream &out, const parser_train ¶ms) { params.sycl_kernel_invocation_type); } + if (params.backend == backend_type::kokkos || params.backend == backend_type::automatic) { + out << fmt::format("Kokkos execution space: {}\n", params.kokkos_execution_space); + } + out << fmt::format( "classification_type: {}\n" "label_type: {}\n" From de69b870fa732c192055423ea5f91309a06d2b91 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 18:07:30 +0100 Subject: [PATCH 080/123] Fix using wrong executable in cmd parser test. --- tests/detail/cmd/parser_predict.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/detail/cmd/parser_predict.cpp b/tests/detail/cmd/parser_predict.cpp index 8a04c3b1d..747e1d2bc 100644 --- a/tests/detail/cmd/parser_predict.cpp +++ b/tests/detail/cmd/parser_predict.cpp @@ -270,7 +270,7 @@ class ParserPredictVerbosity : public ParserPredict, TEST_P(ParserPredictVerbosity, parsing) { const auto &[flag, value] = GetParam(); // create artificial command line arguments in test fixture - this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm", "data.libsvm.model" }); + this->CreateCMDArgs({ "./plssvm-predict", flag, value, "data.libsvm", "data.libsvm.model" }); // create parameter object const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() }; // test for correctness From 6a18cc650510a016fb0e2c46cf64ee46affde9f2 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 7 Nov 2024 18:18:43 +0100 Subject: [PATCH 081/123] Update parameter and cmd parser tests to reflect new kokkos_execution_space option. --- tests/detail/cmd/parser_predict.cpp | 71 +++++++++++++++++++++++++---- tests/detail/cmd/parser_train.cpp | 50 +++++++++++++++++++- tests/parameter.cpp | 4 +- 3 files changed, 113 insertions(+), 12 deletions(-) diff --git a/tests/detail/cmd/parser_predict.cpp b/tests/detail/cmd/parser_predict.cpp index 747e1d2bc..fccf2f005 100644 --- a/tests/detail/cmd/parser_predict.cpp +++ b/tests/detail/cmd/parser_predict.cpp @@ -11,6 +11,7 @@ #include "plssvm/detail/cmd/parser_predict.hpp" #include "plssvm/backend_types.hpp" // plssvm::backend_type +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/backends/SYCL/implementation_types.hpp" // plssvm::sycl::implementation_type #include "plssvm/constants.hpp" // plssvm::real_type #include "plssvm/target_platforms.hpp" // plssvm::target_platform @@ -67,6 +68,7 @@ TEST_F(ParserPredict, minimal_output) { "backend: automatic\n" "target platform: automatic\n" "SYCL implementation type: automatic\n" + "Kokkos execution space: automatic\n" "label_type: int (default)\n" "real_type: {}\n" "input file (data set): 'data.libsvm'\n" @@ -85,6 +87,10 @@ TEST_F(ParserPredict, all_arguments) { #if defined(PLSSVM_HAS_SYCL_BACKEND) cmd_args.insert(cmd_args.end(), { "--sycl_implementation_type", "dpcpp" }); #endif +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + const plssvm::kokkos::execution_space space = plssvm::kokkos::list_available_execution_spaces()[1]; // [0] would be automatic + cmd_args.insert(cmd_args.end(), { "--kokkos_execution_space", fmt::format("{}", space) }); +#endif #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED) cmd_args.insert(cmd_args.end(), { "--performance_tracking", "tracking.yaml" }); #endif @@ -101,6 +107,11 @@ TEST_F(ParserPredict, all_arguments) { EXPECT_EQ(parser.sycl_implementation_type, plssvm::sycl::implementation_type::dpcpp); #else EXPECT_EQ(parser.sycl_implementation_type, plssvm::sycl::implementation_type::automatic); +#endif +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + EXPECT_EQ(parser.kokkos_execution_space, space); +#else + EXPECT_EQ(parser.kokkos_execution_space, plssvm::kokkos::execution_space::automatic); #endif EXPECT_TRUE(parser.strings_as_labels); EXPECT_EQ(parser.input_filename, "data.libsvm"); @@ -117,10 +128,14 @@ TEST_F(ParserPredict, all_arguments) { TEST_F(ParserPredict, all_arguments_output) { // create artificial command line arguments in test fixture - std::vector cmd_args = { "./plssvm-predict", "--backend", "cuda", "--target_platform", "gpu_nvidia", "--use_strings_as_labels", "--verbosity", "libsvm" }; + std::vector cmd_args = { "./plssvm-predict", "--backend", "automatic", "--target_platform", "gpu_nvidia", "--use_strings_as_labels", "--verbosity", "libsvm" }; #if defined(PLSSVM_HAS_SYCL_BACKEND) cmd_args.insert(cmd_args.end(), { "--sycl_implementation_type", "dpcpp" }); #endif +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + const plssvm::kokkos::execution_space space = plssvm::kokkos::list_available_execution_spaces()[1]; // [0] would be automatic + cmd_args.insert(cmd_args.end(), { "--kokkos_execution_space", fmt::format("{}", space) }); +#endif #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED) cmd_args.insert(cmd_args.end(), { "--performance_tracking", "tracking.yaml" }); #endif @@ -131,15 +146,27 @@ TEST_F(ParserPredict, all_arguments_output) { const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() }; // test output string - std::string correct = fmt::format( - "backend: cuda\n" + std::string correct{ + "backend: automatic\n" "target platform: gpu_nvidia\n" - "label_type: std::string\n" - "real_type: {}\n" - "input file (data set): 'data1.libsvm'\n" - "input file (model): 'data2.libsvm.model'\n" - "output file (prediction): 'data3.libsvm.predict'\n", - std::is_same_v ? "float" : "double (default)"); + }; +#if defined(PLSSVM_HAS_SYCL_BACKEND) + correct += "SYCL implementation type: dpcpp\n"; +#else + correct += "SYCL implementation type: automatic\n"; +#endif +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + correct += fmt::format("Kokkos execution space: {}\n", space); +#else + correct += "Kokkos execution space: automatic\n"; +#endif + correct += fmt::format("label_type: std::string\n" + "real_type: {}\n" + "input file (data set): 'data1.libsvm'\n" + "input file (model): 'data2.libsvm.model'\n" + "output file (prediction): 'data3.libsvm.predict'\n", + std::is_same_v ? "float" : "double (default)"); + #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED) correct += "performance tracking file: 'tracking.yaml'\n"; #endif @@ -220,6 +247,32 @@ INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictSYCLImplementation, ::testi #endif // PLSSVM_HAS_SYCL_BACKEND +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + +class ParserPredictKokkosExecutionSpace : public ParserPredict, + public ::testing::WithParamInterface> { }; + +TEST_P(ParserPredictKokkosExecutionSpace, parsing) { + const auto &[flag, value] = GetParam(); + // convert string to kokkos::execution_space + const auto kokkos_execution_space = util::convert_from_string(value); + // create artificial command line arguments in test fixture + this->CreateCMDArgs({ "./plssvm-predict", flag, value, "data.libsvm", "data.libsvm.model" }); + // create parameter object + const plssvm::detail::cmd::parser_predict parser{ this->get_argc(), this->get_argv() }; + // test for correctness + EXPECT_EQ(parser.kokkos_execution_space, kokkos_execution_space); +} + +// clang-format off +INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserPredictKokkosExecutionSpace, ::testing::Combine( + ::testing::Values("--kokkos_execution_space"), + ::testing::Values("automatic", "Cuda", "HIP", "SYCL", "HPX", "OpenMP", "OpenMPTarget", "OpenACC", "Threads", "Serial")), + naming::pretty_print_parameter_flag_and_value); +// clang-format on + +#endif // PLSSVM_HAS_KOKKOS_BACKEND + #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED) class ParserPredictPerformanceTrackingFilename : public ParserPredict, diff --git a/tests/detail/cmd/parser_train.cpp b/tests/detail/cmd/parser_train.cpp index ba1392d75..78d43a25f 100644 --- a/tests/detail/cmd/parser_train.cpp +++ b/tests/detail/cmd/parser_train.cpp @@ -11,6 +11,7 @@ #include "plssvm/detail/cmd/parser_train.hpp" #include "plssvm/backend_types.hpp" // plssvm::backend_type +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/backends/SYCL/implementation_types.hpp" // plssvm::sycl::implementation_type #include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type #include "plssvm/classification_types.hpp" // plssvm::classification_type @@ -88,6 +89,7 @@ TEST_F(ParserTrain, minimal_output) { "solver: automatic\n" "SYCL implementation type: automatic\n" "SYCL kernel invocation type: automatic\n" + "Kokkos execution space: automatic\n" "classification_type: one vs. all\n" "label_type: int\n" "real_type: {}\n" @@ -105,6 +107,10 @@ TEST_F(ParserTrain, all_arguments) { #if defined(PLSSVM_HAS_SYCL_BACKEND) cmd_args.insert(cmd_args.end(), { "--sycl_kernel_invocation_type", "nd_range", "--sycl_implementation_type", "dpcpp" }); #endif +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + const plssvm::kokkos::execution_space space = plssvm::kokkos::list_available_execution_spaces()[1]; // [0] would be automatic + cmd_args.insert(cmd_args.end(), { "--kokkos_execution_space", fmt::format("{}", space) }); +#endif #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED) cmd_args.insert(cmd_args.end(), { "--performance_tracking", "tracking.yaml" }); #endif @@ -134,6 +140,11 @@ TEST_F(ParserTrain, all_arguments) { #else EXPECT_EQ(parser.sycl_kernel_invocation_type, plssvm::sycl::kernel_invocation_type::automatic); EXPECT_EQ(parser.sycl_implementation_type, plssvm::sycl::implementation_type::automatic); +#endif +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + EXPECT_EQ(parser.kokkos_execution_space, space); +#else + EXPECT_EQ(parser.kokkos_execution_space, plssvm::kokkos::execution_space::automatic); #endif EXPECT_TRUE(parser.strings_as_labels); EXPECT_EQ(parser.input_filename, "data.libsvm"); @@ -147,10 +158,14 @@ TEST_F(ParserTrain, all_arguments) { TEST_F(ParserTrain, all_arguments_output) { // create artificial command line arguments in test fixture - std::vector cmd_args = { "./plssvm-train", "--kernel_type", "1", "--degree", "2", "--gamma", "1.5", "--coef0", "-1.5", "--cost", "2", "--epsilon", "1e-10", "--max_iter", "100", "--classification", "oao", "--solver", "cg_implicit", "--backend", "sycl", "--target_platform", "gpu_nvidia", "--use_strings_as_labels", "--verbosity", "libsvm" }; + std::vector cmd_args = { "./plssvm-train", "--kernel_type", "1", "--degree", "2", "--gamma", "1.5", "--coef0", "-1.5", "--cost", "2", "--epsilon", "1e-10", "--max_iter", "100", "--classification", "oao", "--solver", "cg_implicit", "--backend", "automatic", "--target_platform", "gpu_nvidia", "--use_strings_as_labels", "--verbosity", "libsvm" }; #if defined(PLSSVM_HAS_SYCL_BACKEND) cmd_args.insert(cmd_args.end(), { "--sycl_kernel_invocation_type", "nd_range", "--sycl_implementation_type", "dpcpp" }); #endif +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + const std::string space = fmt::format("{}", plssvm::kokkos::list_available_execution_spaces()[1]); // [0] would be automatic + cmd_args.insert(cmd_args.end(), { "--kokkos_execution_space", space }); +#endif #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED) cmd_args.insert(cmd_args.end(), { "--performance_tracking", "tracking.yaml" }); #endif @@ -169,7 +184,7 @@ TEST_F(ParserTrain, all_arguments_output) { "cost: 2\n" "epsilon: 1e-10\n" "max_iter: 100\n" - "backend: sycl\n" + "backend: automatic\n" "target platform: gpu_nvidia\n" "solver: cg_implicit\n"; #if defined(PLSSVM_HAS_SYCL_BACKEND) @@ -178,6 +193,11 @@ TEST_F(ParserTrain, all_arguments_output) { #else correct += "SYCL implementation type: automatic\n" "SYCL kernel invocation type: automatic\n"; +#endif +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + correct += fmt::format("Kokkos execution space: {}\n", space); +#else + correct += "Kokkos execution space: automatic\n"; #endif correct += fmt::format( "classification_type: one vs. one\n" @@ -517,6 +537,32 @@ INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainSYCLImplementation, ::testing:: #endif // PLSSVM_HAS_SYCL_BACKEND +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + +class ParserTrainKokkosExecutionSpace : public ParserTrain, + public ::testing::WithParamInterface> { }; + +TEST_P(ParserTrainKokkosExecutionSpace, parsing) { + const auto &[flag, value] = GetParam(); + // convert string to kokkos::execution_space + const auto kokkos_execution_space = util::convert_from_string(value); + // create artificial command line arguments in test fixture + this->CreateCMDArgs({ "./plssvm-train", flag, value, "data.libsvm" }); + // create parameter object + const plssvm::detail::cmd::parser_train parser{ this->get_argc(), this->get_argv() }; + // test for correctness + EXPECT_EQ(parser.kokkos_execution_space, kokkos_execution_space); +} + +// clang-format off +INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainKokkosExecutionSpace, ::testing::Combine( + ::testing::Values("--kokkos_execution_space"), + ::testing::Values("automatic", "Cuda", "HIP", "SYCL", "HPX", "OpenMP", "OpenMPTarget", "OpenACC", "Threads", "Serial")), + naming::pretty_print_parameter_flag_and_value); +// clang-format on + +#endif // PLSSVM_HAS_KOKKOS_BACKEND + #if defined(PLSSVM_PERFORMANCE_TRACKER_ENABLED) class ParserTrainPerformanceTrackingFilename : public ParserTrain, diff --git a/tests/parameter.cpp b/tests/parameter.cpp index 588fc703c..7db96c6bf 100644 --- a/tests/parameter.cpp +++ b/tests/parameter.cpp @@ -10,6 +10,7 @@ #include "plssvm/parameter.hpp" +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/backends/SYCL/implementation_types.hpp" // plssvm::sycl::implementation_type #include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type #include "plssvm/constants.hpp" // plssvm::real_type @@ -98,7 +99,8 @@ TEST(Parameter, construct_parameter_and_named_args) { const plssvm::parameter param{ param_base, plssvm::kernel_type = plssvm::kernel_function_type::rbf, plssvm::sycl_implementation_type = plssvm::sycl::implementation_type::adaptivecpp, - plssvm::sycl_kernel_invocation_type = plssvm::sycl::kernel_invocation_type::nd_range }; + plssvm::sycl_kernel_invocation_type = plssvm::sycl::kernel_invocation_type::nd_range, + plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::cuda }; // test default values EXPECT_EQ(param.kernel_type, plssvm::kernel_function_type::rbf); From 0ac498b0d4a50c436e286fd4a54b5ce9c80d960b Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Fri, 8 Nov 2024 12:09:43 +0100 Subject: [PATCH 082/123] Add missing command line parameters to tests. --- tests/backends/Kokkos/kokkos_csvm.cpp | 775 +++++++++++++++++++++----- tests/detail/cmd/parser_predict.cpp | 2 +- tests/detail/cmd/parser_train.cpp | 2 +- 3 files changed, 648 insertions(+), 131 deletions(-) diff --git a/tests/backends/Kokkos/kokkos_csvm.cpp b/tests/backends/Kokkos/kokkos_csvm.cpp index e7af88d5b..5fe50d46e 100644 --- a/tests/backends/Kokkos/kokkos_csvm.cpp +++ b/tests/backends/Kokkos/kokkos_csvm.cpp @@ -8,155 +8,672 @@ * @brief Tests for the functionality related to the Kokkos backend. */ -#include "plssvm/backends/Kokkos/csvm.hpp" // plssvm::kokkos::csvm -#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception -#include "plssvm/detail/type_list.hpp" // plssvm::detail::label_type_list -#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type -#include "plssvm/parameter.hpp" // plssvm::parameter -#include "plssvm/target_platforms.hpp" // plssvm::target_platform +#include "plssvm/backends/Kokkos/csvm.hpp" // plssvm::kokkos::csvm +#include "plssvm/backends/Kokkos/detail/utility.hpp" // plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping +#include "plssvm/backends/Kokkos/exceptions.hpp" // plssvm::kokkos::backend_exception +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/detail/type_list.hpp" // plssvm::detail::label_type_list +#include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type +#include "plssvm/parameter.hpp" // plssvm::parameter +#include "plssvm/target_platforms.hpp" // plssvm::target_platform, plssvm::list_available_target_platforms -#include "tests/backends/Kokkos/mock_kokkos_csvm.hpp" #include "tests/backends/generic_csvm_tests.hpp" // generic CSVM tests to instantiate #include "tests/backends/generic_gpu_csvm_tests.hpp" // generic GPU CSVM tests to instantiate -#include "tests/custom_test_macros.hpp" // EXPECT_THROW_WHAT -#include "tests/naming.hpp" // naming::test_parameter_to_name -#include "tests/types_to_test.hpp" // util::{cartesian_type_product_t, combine_test_parameters_gtest_t} -#include "tests/utility.hpp" // util::redirect_output +#include "tests/backends/Kokkos/mock_kokkos_csvm.hpp" +#include "tests/custom_test_macros.hpp" // EXPECT_THROW_WHAT +#include "tests/naming.hpp" // naming::test_parameter_to_name +#include "tests/types_to_test.hpp" // util::{cartesian_type_product_t, combine_test_parameters_gtest_t} +#include "tests/utility.hpp" // util::redirect_output #include "gtest/gtest.h" // TEST_F, EXPECT_NO_THROW, INSTANTIATE_TYPED_TEST_SUITE_P, ::testing::Test -#include // std::make_tuple, std::tuple +#include // std::map +#include // std::make_tuple, std::tuple +#include // std::vector class KokkosCSVM : public ::testing::Test, - private util::redirect_output<> { }; - -//// check whether the constructor correctly fails when using an incompatible target platform -//TEST_F(CUDACSVM, construct_parameter) { -//#if defined(PLSSVM_HAS_NVIDIA_TARGET) -// // the automatic target platform must always be available -// EXPECT_NO_THROW(plssvm::cuda::csvm{ plssvm::parameter{} }); -//#else -// EXPECT_THROW_WHAT(plssvm::cuda::csvm{ plssvm::parameter{} }, -// plssvm::cuda::backend_exception, -// "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); -//#endif -//} -// -//TEST_F(CUDACSVM, construct_target_and_parameter) { -// // create parameter struct -// const plssvm::parameter params{}; -// -//#if defined(PLSSVM_HAS_NVIDIA_TARGET) -// // only automatic or gpu_nvidia are allowed as target platform for the CUDA backend -// EXPECT_NO_THROW((plssvm::cuda::csvm{ plssvm::target_platform::automatic, params })); -// EXPECT_NO_THROW((plssvm::cuda::csvm{ plssvm::target_platform::gpu_nvidia, params })); -//#else -// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::automatic, params }), -// plssvm::cuda::backend_exception, -// "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); -// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::gpu_nvidia, params }), -// plssvm::cuda::backend_exception, -// "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); -//#endif -// -// // all other target platforms must throw -// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::cpu, params }), -// plssvm::cuda::backend_exception, -// "Invalid target platform 'cpu' for the CUDA backend!"); -// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::gpu_amd, params }), -// plssvm::cuda::backend_exception, -// "Invalid target platform 'gpu_amd' for the CUDA backend!"); -// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::gpu_intel, params }), -// plssvm::cuda::backend_exception, -// "Invalid target platform 'gpu_intel' for the CUDA backend!"); -//} -// -//TEST_F(CUDACSVM, construct_named_args) { -//#if defined(PLSSVM_HAS_NVIDIA_TARGET) -// // only automatic or gpu_nvidia are allowed as target platform for the CUDA backend -// EXPECT_NO_THROW((plssvm::cuda::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 })); -// EXPECT_NO_THROW((plssvm::cuda::csvm{ plssvm::cost = 2.0 })); -//#else -// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }), -// plssvm::cuda::backend_exception, -// "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); -//#endif -//} -// -//TEST_F(CUDACSVM, construct_target_and_named_args) { -//#if defined(PLSSVM_HAS_NVIDIA_TARGET) -// // only automatic or gpu_nvidia are allowed as target platform for the CUDA backend -// EXPECT_NO_THROW((plssvm::cuda::csvm{ plssvm::target_platform::automatic, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 })); -// EXPECT_NO_THROW((plssvm::cuda::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::cost = 2.0 })); -//#else -// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::automatic, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }), -// plssvm::cuda::backend_exception, -// "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); -// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::cost = 2.0 }), -// plssvm::cuda::backend_exception, -// "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); -//#endif -// -// // all other target platforms must throw -// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::cpu, plssvm::cost = 2.0 }), -// plssvm::cuda::backend_exception, -// "Invalid target platform 'cpu' for the CUDA backend!"); -// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::gpu_amd, plssvm::cost = 2.0 }), -// plssvm::cuda::backend_exception, -// "Invalid target platform 'gpu_amd' for the CUDA backend!"); -// EXPECT_THROW_WHAT((plssvm::cuda::csvm{ plssvm::target_platform::gpu_intel, plssvm::cost = 2.0 }), -// plssvm::cuda::backend_exception, -// "Invalid target platform 'gpu_intel' for the CUDA backend!"); -//} - -template -struct kokkos_csvm_test_type { - using mock_csvm_type = mock_kokkos_csvm; - using csvm_type = plssvm::kokkos::csvm; - using device_ptr_type = typename csvm_type::device_ptr_type; - inline constexpr static auto additional_arguments = std::make_tuple(); -}; - -using kokkos_csvm_test_tuple = std::tuple>; -using kokkos_csvm_test_label_type_list = util::cartesian_type_product_t; -using kokkos_csvm_test_type_list = util::cartesian_type_product_t; + private util::redirect_output<> { }; + +TEST_F(KokkosCSVM, construct_parameter) { // execution_space automatic, target_platform automatic + // check whether the execution space would be automatically determined as either OpenMPTarget or OpenACC + const std::map> available_combinations = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping(); + plssvm::kokkos::execution_space space{}; + for (const plssvm::target_platform target : plssvm::list_available_target_platforms()) { + if (plssvm::detail::contains(available_combinations, target)) { + space = available_combinations.at(target).front(); + break; + } + } + + // must throw an exception if the execution space would be OpenMPTarget or OpenACC + if (space == plssvm::kokkos::execution_space::openmp_target || space == plssvm::kokkos::execution_space::openacc) { + EXPECT_THROW_WHAT(plssvm::kokkos::csvm{ plssvm::parameter{} }, + plssvm::kokkos::backend_exception, + fmt::format("The Kokkos execution space {} is currently not supported !", space)); + } else { + EXPECT_NO_THROW(plssvm::kokkos::csvm{ plssvm::parameter{} }); + } +} + +TEST_F(KokkosCSVM, construct_target_and_parameter) { // execution_space automatic, target_platform explicit + // create parameter struct + const plssvm::parameter params{}; + + // automatic should always work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::automatic, params })); + + const std::map> available_combinations = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping(); + const auto target_supported = [&](const plssvm::target_platform target) { + return plssvm::detail::contains(available_combinations, target); + }; + +#if defined(PLSSVM_HAS_CPU_TARGET) + if (target_supported(plssvm::target_platform::cpu)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, params })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, params }), + plssvm::kokkos::backend_exception, + fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform cpu!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, params }), + plssvm::kokkos::backend_exception, + "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); +#endif + +#if defined(PLSSVM_HAS_NVIDIA_TARGET) + if (target_supported(plssvm::target_platform::gpu_nvidia)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, params })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, params }), + plssvm::kokkos::backend_exception, + fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform gpu_nvidia!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, params }), + plssvm::kokkos::backend_exception, + "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); +#endif + +#if defined(PLSSVM_HAS_AMD_TARGET) + if (target_supported(plssvm::target_platform::gpu_amd)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, params })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, params }), + plssvm::kokkos::backend_exception, + fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform gpu_amd!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, params }), + plssvm::kokkos::backend_exception, + "Requested target platform 'gpu_amd' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); +#endif + +#if defined(PLSSVM_HAS_INTEL_TARGET) + if (target_supported(plssvm::target_platform::gpu_intel)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, params })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, params }), + plssvm::kokkos::backend_exception, + fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform gpu_intel!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, params }), + plssvm::kokkos::backend_exception, + "Requested target platform 'gpu_intel' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); +#endif +} + +TEST_F(KokkosCSVM, construct_execution_space_and_parameter) { // execution_space explicit, target_platform automatic + // create parameter struct + const plssvm::parameter params{}; + + // automatic should always work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::automatic })); + +#if defined(KOKKOS_ENABLE_CUDA) + // explicitly providing the Cuda execution space should work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::cuda })); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::cuda }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace Cuda is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_HIP) + // explicitly providing the HIP execution space should work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hip })); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hip }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace HIP is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_SYCL) + // explicitly providing the SYCL execution space should work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl })); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace SYCL is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_HPX) + // explicitly providing the HPX execution space should work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hpx })); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hpx }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace HPX is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) + // explicitly providing the OpenMP execution space should work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp })); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace OpenMP is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + // explicitly providing the OpenMPTarget execution space currently unsupported + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp_target }), + plssvm::kokkos::backend_exception, + "The Kokkos execution space OpenMPTarget is currently not supported !"); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp_target }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace OpenMPTarget is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_OPENACC) + // explicitly providing the OpenACC execution space currently unsupported + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openacc }), + plssvm::kokkos::backend_exception, + "The Kokkos execution space OpenACC is currently not supported !"); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openacc }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace OpenACC is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_THREADS) + // explicitly providing the Threads execution space should work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::threads })); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::threads }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace Threads is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_SERIAL) + // explicitly providing the Serial execution space should work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::serial })); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ params, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::serial }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace Serial is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif +} + +TEST_F(KokkosCSVM, construct_target_and_execution_space_and_parameter) { // execution_space explicit, target_platform explicit + // create parameter struct + const plssvm::parameter params{}; + + // list all possible execution spaces + std::vector all_execution_spaces{ + plssvm::kokkos::execution_space::cuda, + plssvm::kokkos::execution_space::hip, + plssvm::kokkos::execution_space::sycl, + plssvm::kokkos::execution_space::hpx, + plssvm::kokkos::execution_space::openmp, + plssvm::kokkos::execution_space::openmp_target, + plssvm::kokkos::execution_space::openacc, + plssvm::kokkos::execution_space::threads, + plssvm::kokkos::execution_space::serial + }; + const std::map> available_combinations = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping(); + const auto combination_exists = [&](const plssvm::target_platform target, const plssvm::kokkos::execution_space space) { + return plssvm::detail::contains(available_combinations, target) && plssvm::detail::contains(available_combinations.at(target), space); + }; + const auto execution_space_available = [&](const plssvm::kokkos::execution_space space) { + return plssvm::detail::contains(plssvm::kokkos::list_available_execution_spaces(), space); + }; + +#if defined(PLSSVM_HAS_CPU_TARGET) + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + if (!execution_space_available(space)) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, params, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } else if (combination_exists(plssvm::target_platform::cpu, space)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, params, plssvm::kokkos_execution_space = space })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, params, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform cpu!", space)); + } + } +#else + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, params, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); + } +#endif + +#if defined(PLSSVM_HAS_NVIDIA_TARGET) + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + if (!execution_space_available(space)) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, params, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } else if (combination_exists(plssvm::target_platform::gpu_nvidia, space)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, params, plssvm::kokkos_execution_space = space })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, params, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform gpu_nvidia!", space)); + } + } +#else + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, params, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); + } +#endif + +#if defined(PLSSVM_HAS_AMD_TARGET) + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + if (!execution_space_available(space)) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, params, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } else if (combination_exists(plssvm::target_platform::gpu_amd, space)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, params, plssvm::kokkos_execution_space = space })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, params, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform gpu_amd!", space)); + } + } +#else + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, params, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + "Requested target platform 'gpu_amd' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); + } +#endif + +#if defined(PLSSVM_HAS_INTEL_TARGET) + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + if (!execution_space_available(space)) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, params, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } else if (combination_exists(plssvm::target_platform::gpu_intel, space)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, params, plssvm::kokkos_execution_space = space })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, params, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform gpu_intel!", space)); + } + } +#else + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, params, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + "Requested target platform 'gpu_intel' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); + } +#endif +} + +TEST_F(KokkosCSVM, construct_named_args) { // execution_space automatic, target_platform automatic + // check whether the execution space would be automatically determined as either OpenMPTarget or OpenACC + const std::map> available_combinations = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping(); + plssvm::kokkos::execution_space space{}; + for (const plssvm::target_platform target : plssvm::list_available_target_platforms()) { + if (plssvm::detail::contains(available_combinations, target)) { + space = available_combinations.at(target).front(); + break; + } + } + + // must throw an exception if the execution space would be OpenMPTarget or OpenACC + if (space == plssvm::kokkos::execution_space::openmp_target || space == plssvm::kokkos::execution_space::openacc) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }), + plssvm::kokkos::backend_exception, + fmt::format("The Kokkos execution space {} is currently not supported !", space)); + } else { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 })); + EXPECT_NO_THROW(plssvm::kokkos::csvm{ plssvm::cost = 2.0 }); + } +} + +TEST_F(KokkosCSVM, construct_target_and_named_args) { // execution_space automatic, target_platform explicit + // automatic should always work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::automatic, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 })); + + const std::map> available_combinations = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping(); + const auto target_supported = [&](const plssvm::target_platform target) { + return plssvm::detail::contains(available_combinations, target); + }; + +#if defined(PLSSVM_HAS_CPU_TARGET) + if (target_supported(plssvm::target_platform::cpu)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }), + plssvm::kokkos::backend_exception, + fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform cpu!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }), + plssvm::kokkos::backend_exception, + "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); +#endif + +#if defined(PLSSVM_HAS_NVIDIA_TARGET) + if (target_supported(plssvm::target_platform::gpu_nvidia)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }), + plssvm::kokkos::backend_exception, + fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform gpu_nvidia!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }), + plssvm::kokkos::backend_exception, + "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); +#endif + +#if defined(PLSSVM_HAS_AMD_TARGET) + if (target_supported(plssvm::target_platform::gpu_amd)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }), + plssvm::kokkos::backend_exception, + fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform gpu_amd!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }), + plssvm::kokkos::backend_exception, + "Requested target platform 'gpu_amd' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); +#endif + +#if defined(PLSSVM_HAS_INTEL_TARGET) + if (target_supported(plssvm::target_platform::gpu_intel)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }), + plssvm::kokkos::backend_exception, + fmt::format("No Kokkos::ExecutionSpace available ({}) for that requested target platform gpu_intel!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0 }), + plssvm::kokkos::backend_exception, + "Requested target platform 'gpu_intel' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); +#endif +} + +TEST_F(KokkosCSVM, construct_execution_space_and_named_args) { // execution_space explicit, target_platform automatic + // automatic should always work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::automatic })); + +#if defined(KOKKOS_ENABLE_CUDA) + // explicitly providing the Cuda execution space should work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::cuda })); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::cuda }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace Cuda is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_HIP) + // explicitly providing the HIP execution space should work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hip })); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hip }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace HIP is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_SYCL) + // explicitly providing the SYCL execution space should work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl })); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::sycl }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace SYCL is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_HPX) + // explicitly providing the HPX execution space should work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hpx })); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::hpx }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace HPX is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) + // explicitly providing the OpenMP execution space should work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp })); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace OpenMP is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + // explicitly providing the OpenMPTarget execution space currently unsupported + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp_target }), + plssvm::kokkos::backend_exception, + "The Kokkos execution space OpenMPTarget is currently not supported !"); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openmp_target }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace OpenMPTarget is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_OPENACC) + // explicitly providing the OpenACC execution space currently unsupported + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openacc }), + plssvm::kokkos::backend_exception, + "The Kokkos execution space OpenACC is currently not supported !"); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::openacc }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace OpenACC is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_THREADS) + // explicitly providing the Threads execution space should work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::threads })); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::threads }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace Threads is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif + +#if defined(KOKKOS_ENABLE_SERIAL) + // explicitly providing the Serial execution space should work + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::serial })); +#else + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = plssvm::kokkos::execution_space::serial }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace Serial is not available, available are: {}!", fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); +#endif +} + +TEST_F(KokkosCSVM, construct_target_and_execution_space_and_named_args) { // execution_space explicit, target_platform explicit + // list all possible execution spaces + std::vector all_execution_spaces{ + plssvm::kokkos::execution_space::cuda, + plssvm::kokkos::execution_space::hip, + plssvm::kokkos::execution_space::sycl, + plssvm::kokkos::execution_space::hpx, + plssvm::kokkos::execution_space::openmp, + plssvm::kokkos::execution_space::openmp_target, + plssvm::kokkos::execution_space::openacc, + plssvm::kokkos::execution_space::threads, + plssvm::kokkos::execution_space::serial + }; + const std::map> available_combinations = plssvm::kokkos::detail::available_target_platform_to_execution_space_mapping(); + const auto combination_exists = [&](const plssvm::target_platform target, const plssvm::kokkos::execution_space space) { + return plssvm::detail::contains(available_combinations, target) && plssvm::detail::contains(available_combinations.at(target), space); + }; + const auto execution_space_available = [&](const plssvm::kokkos::execution_space space) { + return plssvm::detail::contains(plssvm::kokkos::list_available_execution_spaces(), space); + }; + +#if defined(PLSSVM_HAS_CPU_TARGET) + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + if (!execution_space_available(space)) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } else if (combination_exists(plssvm::target_platform::cpu, space)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform cpu!", space)); + } + } +#else + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::cpu, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + "Requested target platform 'cpu' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); + } +#endif + +#if defined(PLSSVM_HAS_NVIDIA_TARGET) + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + if (!execution_space_available(space)) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } else if (combination_exists(plssvm::target_platform::gpu_nvidia, space)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform gpu_nvidia!", space)); + } + } +#else + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_nvidia, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + "Requested target platform 'gpu_nvidia' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); + } +#endif + +#if defined(PLSSVM_HAS_AMD_TARGET) + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + if (!execution_space_available(space)) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } else if (combination_exists(plssvm::target_platform::gpu_amd, space)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform gpu_amd!", space)); + } + } +#else + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_amd, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + "Requested target platform 'gpu_amd' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); + } +#endif + +#if defined(PLSSVM_HAS_INTEL_TARGET) + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + if (!execution_space_available(space)) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} is not available, available are: {}!", space, fmt::join(plssvm::kokkos::list_available_execution_spaces(), ", "))); + } else if (combination_exists(plssvm::target_platform::gpu_intel, space)) { + EXPECT_NO_THROW((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space })); + } else { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + fmt::format("The provided Kokkos::ExecutionSpace {} does not support the requested target platform gpu_intel!", space)); + } + } +#else + for (const plssvm::kokkos::execution_space space : all_execution_spaces) { + EXPECT_THROW_WHAT((plssvm::kokkos::csvm{ plssvm::target_platform::gpu_intel, plssvm::kernel_type = plssvm::kernel_function_type::linear, plssvm::cost = 2.0, plssvm::kokkos_execution_space = space }), + plssvm::kokkos::backend_exception, + "Requested target platform 'gpu_intel' that hasn't been enabled using PLSSVM_TARGET_PLATFORMS!"); + } +#endif +} + +TEST_F(KokkosCSVM, get_execution_space) { + // construct default CSVM + const plssvm::kokkos::csvm svm{ plssvm::parameter{} }; + + // after construction: get_execution_space must refer to a plssvm::kokkos::execution_space that is not automatic + EXPECT_NE(svm.get_execution_space(), plssvm::kokkos::execution_space::automatic); +} + + template + struct kokkos_csvm_test_type { + using mock_csvm_type = mock_kokkos_csvm; + using csvm_type = plssvm::kokkos::csvm; + using device_ptr_type = typename csvm_type::device_ptr_type; + inline constexpr static auto additional_arguments = std::make_tuple(); + }; + + using kokkos_csvm_test_tuple = std::tuple>; + using kokkos_csvm_test_label_type_list = util::cartesian_type_product_t; + using kokkos_csvm_test_type_list = util::cartesian_type_product_t; // the tests used in the instantiated GTest test suites -using kokkos_csvm_test_type_gtest = util::combine_test_parameters_gtest_t; -using kokkos_solver_type_gtest = util::combine_test_parameters_gtest_t; -using kokkos_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; -using kokkos_solver_and_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; -using kokkos_label_type_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t; -using kokkos_label_type_solver_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t; + using kokkos_csvm_test_type_gtest = util::combine_test_parameters_gtest_t; + using kokkos_solver_type_gtest = util::combine_test_parameters_gtest_t; + using kokkos_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; + using kokkos_solver_and_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; + using kokkos_label_type_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t; + using kokkos_label_type_solver_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t; // instantiate type-parameterized tests // generic CSVM tests -INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVM, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); -INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMKernelFunction, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); -INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolver, kokkos_solver_type_gtest, naming::test_parameter_to_name); -INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunction, kokkos_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name); -INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMKernelFunctionClassification, kokkos_label_type_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name); -//INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunctionClassification, kokkos_label_type_solver_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name); + INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVM, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); + INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMKernelFunction, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); + INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolver, kokkos_solver_type_gtest, naming::test_parameter_to_name); + INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunction, kokkos_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name); + INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMKernelFunctionClassification, kokkos_label_type_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name); +// INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunctionClassification, kokkos_label_type_solver_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name); // generic CSVM DeathTests -INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMDeathTest, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); -INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMSolverDeathTest, kokkos_solver_type_gtest, naming::test_parameter_to_name); -INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMKernelFunctionDeathTest, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); -INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMSolverKernelFunctionDeathTest, kokkos_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name); + INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMDeathTest, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); + INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMSolverDeathTest, kokkos_solver_type_gtest, naming::test_parameter_to_name); + INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMKernelFunctionDeathTest, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); + INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMSolverKernelFunctionDeathTest, kokkos_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name); // generic GPU CSVM tests - correct grid sizes -INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericGPUCSVM, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); -INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericGPUCSVMKernelFunction, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); + INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericGPUCSVM, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); + INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericGPUCSVMKernelFunction, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); // generic GPU CSVM DeathTests - correct grid sizes -INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericGPUCSVMDeathTest, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); + INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericGPUCSVMDeathTest, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); -using kokkos_mock_csvm_test_tuple = std::tuple>; -using kokkos_mock_csvm_test_type_list = util::cartesian_type_product_t; + using kokkos_mock_csvm_test_tuple = std::tuple>; + using kokkos_mock_csvm_test_type_list = util::cartesian_type_product_t; -using kokkos_mock_csvm_test_type_gtest = util::combine_test_parameters_gtest_t; -using kokkos_mock_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; + using kokkos_mock_csvm_test_type_gtest = util::combine_test_parameters_gtest_t; + using kokkos_mock_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; // generic GPU CSVM tests - mocked grid sizes -INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMFakedGridSize, GenericGPUCSVM, kokkos_mock_csvm_test_type_gtest, naming::test_parameter_to_name); -INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMFakedGridSize, GenericGPUCSVMKernelFunction, kokkos_mock_kernel_function_type_gtest, naming::test_parameter_to_name); + INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMFakedGridSize, GenericGPUCSVM, kokkos_mock_csvm_test_type_gtest, naming::test_parameter_to_name); + INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMFakedGridSize, GenericGPUCSVMKernelFunction, kokkos_mock_kernel_function_type_gtest, naming::test_parameter_to_name); diff --git a/tests/detail/cmd/parser_predict.cpp b/tests/detail/cmd/parser_predict.cpp index fccf2f005..72311f89b 100644 --- a/tests/detail/cmd/parser_predict.cpp +++ b/tests/detail/cmd/parser_predict.cpp @@ -195,7 +195,7 @@ TEST_P(ParserPredictBackend, parsing) { // clang-format off INSTANTIATE_TEST_SUITE_P(ParserPredict, ParserPredictBackend, ::testing::Combine( ::testing::Values("-b", "--backend"), - ::testing::Values("automatic", "OpenMP", "CUDA", "HIP", "OpenCL", "SYCL")), + ::testing::Values("automatic", "OpenMP", "stdpar", "CUDA", "HIP", "OpenCL", "SYCL", "Kokkos")), naming::pretty_print_parameter_flag_and_value); // clang-format on diff --git a/tests/detail/cmd/parser_train.cpp b/tests/detail/cmd/parser_train.cpp index 78d43a25f..071e867a0 100644 --- a/tests/detail/cmd/parser_train.cpp +++ b/tests/detail/cmd/parser_train.cpp @@ -463,7 +463,7 @@ TEST_P(ParserTrainBackend, parsing) { // clang-format off INSTANTIATE_TEST_SUITE_P(ParserTrain, ParserTrainBackend, ::testing::Combine( ::testing::Values("-b", "--backend"), - ::testing::Values("automatic", "OpenMP", "CUDA", "HIP", "OpenCL", "SYCL")), + ::testing::Values("automatic", "OpenMP", "stdpar", "CUDA", "HIP", "OpenCL", "SYCL", "Kokkos")), naming::pretty_print_parameter_flag_and_value); // clang-format on From c27943507be3abe65534ee13439c4b3ea7564f17 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Fri, 8 Nov 2024 12:10:06 +0100 Subject: [PATCH 083/123] Fix formatting. --- src/main_scale.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main_scale.cpp b/src/main_scale.cpp index 1253df454..365f2eb2c 100644 --- a/src/main_scale.cpp +++ b/src/main_scale.cpp @@ -17,7 +17,7 @@ #include "plssvm/detail/utility.hpp" // PLSSVM_IS_DEFINED #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED) - #include "hws/system_hardware_sampler.hpp" // hws::system_hardware_sampler + #include "hws/system_hardware_sampler.hpp" // hws::system_hardware_sampler #endif #include // std::for_each From 9288a33c45615a749812f3422ab56efe2fe00f7f Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Fri, 8 Nov 2024 12:23:07 +0100 Subject: [PATCH 084/123] Fix failing tests. --- tests/detail/tracking/performance_tracker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/detail/tracking/performance_tracker.cpp b/tests/detail/tracking/performance_tracker.cpp index b81542ee7..b74016dea 100644 --- a/tests/detail/tracking/performance_tracker.cpp +++ b/tests/detail/tracking/performance_tracker.cpp @@ -340,7 +340,7 @@ TEST_F(PerformanceTracker, add_parser_train_tracking_entry) { // check entries for correctness EXPECT_EQ(entries.size(), 1); - ASSERT_EQ(entries.at("parameter").size(), 17); + ASSERT_EQ(entries.at("parameter").size(), 18); } TEST_F(PerformanceTracker, add_parser_predict_tracking_entry) { @@ -363,7 +363,7 @@ TEST_F(PerformanceTracker, add_parser_predict_tracking_entry) { // check entries for correctness EXPECT_EQ(entries.size(), 1); - ASSERT_EQ(entries.at("parameter").size(), 9); + ASSERT_EQ(entries.at("parameter").size(), 10); } TEST_F(PerformanceTracker, add_parser_scale_tracking_entry) { From 1e36ab1386d2551548b2e5ec4e4405727531797c Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Fri, 8 Nov 2024 23:55:27 +0100 Subject: [PATCH 085/123] The execution space implementation must always be available -> moved the file to the base sources and conditionally disabled Kokkos includes to be able to compile the file even if Kokkos is disabled. --- CMakeLists.txt | 1 + src/plssvm/backends/Kokkos/CMakeLists.txt | 1 - src/plssvm/backends/Kokkos/execution_space.cpp | 8 ++++++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d555bd8e4..790ec4268 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,6 +72,7 @@ endif () ######################################################################################################################## ## set base sources set(PLSSVM_BASE_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/Kokkos/execution_space.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/SYCL/implementation_types.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/SYCL/kernel_invocation_types.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/plssvm/backends/stdpar/implementation_types.cpp diff --git a/src/plssvm/backends/Kokkos/CMakeLists.txt b/src/plssvm/backends/Kokkos/CMakeLists.txt index 90a1f4e74..960df04b1 100644 --- a/src/plssvm/backends/Kokkos/CMakeLists.txt +++ b/src/plssvm/backends/Kokkos/CMakeLists.txt @@ -28,7 +28,6 @@ set(PLSSVM_KOKKOS_SOURCES ${CMAKE_CURRENT_LIST_DIR}/detail/utility.cpp ${CMAKE_CURRENT_LIST_DIR}/csvm.cpp ${CMAKE_CURRENT_LIST_DIR}/exceptions.cpp - ${CMAKE_CURRENT_LIST_DIR}/execution_space.cpp ) # set target properties diff --git a/src/plssvm/backends/Kokkos/execution_space.cpp b/src/plssvm/backends/Kokkos/execution_space.cpp index 0caae212f..e6401367c 100644 --- a/src/plssvm/backends/Kokkos/execution_space.cpp +++ b/src/plssvm/backends/Kokkos/execution_space.cpp @@ -8,8 +8,10 @@ #include "plssvm/backends/Kokkos/execution_space.hpp" -#include "plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp" // plssvm::kokkos::detail::constexpr_available_execution_spaces -#include "plssvm/detail/string_utility.hpp" // plssvm::detail::to_lower_case +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) + #include "plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp" // plssvm::kokkos::detail::constexpr_available_execution_spaces +#endif +#include "plssvm/detail/string_utility.hpp" // plssvm::detail::to_lower_case #include // std::array #include // std::ios::failbit @@ -80,9 +82,11 @@ std::istream &operator>>(std::istream &in, execution_space &space) { std::vector list_available_execution_spaces() { // always add the automatic execution space std::vector spaces{ execution_space::automatic }; +#if defined(PLSSVM_HAS_KOKKOS_BACKEND) // add all other available execution spaces constexpr auto arr = detail::constexpr_available_execution_spaces(); spaces.insert(spaces.cend(), arr.begin(), arr.end()); +#endif return spaces; } From 1a8e092c5975009576f71a2a84372dad8c0f9f55 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Sat, 9 Nov 2024 00:24:13 +0100 Subject: [PATCH 086/123] Now the execution space file can also be compiled if Kokkos is not available. --- .../constexpr_available_execution_spaces.hpp | 28 +++++++++++-------- src/plssvm/backends/Kokkos/CMakeLists.txt | 16 +++++++++++ .../backends/Kokkos/execution_space.cpp | 8 ++---- 3 files changed, 34 insertions(+), 18 deletions(-) diff --git a/include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp b/include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp index ea5dafb02..92f908fa7 100644 --- a/include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp +++ b/include/plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp @@ -13,9 +13,13 @@ #ifndef PLSSVM_BACKENDS_KOKKOS_DETAIL_CONSTEXPR_AVAILABLE_EXECUTION_SPACES_HPP_ #define PLSSVM_BACKENDS_KOKKOS_DETAIL_CONSTEXPR_AVAILABLE_EXECUTION_SPACES_HPP_ -#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +// if the variable isn't set, no Kokkos execution space is available +// -> explicitly set it to 0! +#if !defined(PLSSVM_KOKKOS_BACKEND_NUM_AVAILABLE_EXECUTION_SPACES) + #define PLSSVM_KOKKOS_BACKEND_NUM_AVAILABLE_EXECUTION_SPACES 0 +#endif -#include "Kokkos_Core.hpp" // Kokkos macros, Kokkos ExecutionSpace types +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include // std::array @@ -30,32 +34,32 @@ namespace plssvm::kokkos::detail { // Note: The execution_space::automatic value may NEVER be added here! // Note: the trailing comma is explicitly allowed by the standard // Note: the order is intentionally chosen this way -> the order of the entries determines the priority when using a backend to run our code - return std::array{ -#if defined(KOKKOS_ENABLE_CUDA) + return std::array{ +#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_CUDA) execution_space::cuda, #endif -#if defined(KOKKOS_ENABLE_HIP) +#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_HIP) execution_space::hip, #endif -#if defined(KOKKOS_ENABLE_SYCL) +#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_SYCL) execution_space::sycl, #endif -#if defined(KOKKOS_ENABLE_OPENMPTARGET) +#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_OPENMPTARGET) execution_space::openmp_target, #endif -#if defined(KOKKOS_ENABLE_OPENACC) +#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_OPENACC) execution_space::openacc, #endif -#if defined(KOKKOS_ENABLE_OPENMP) +#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_OPENMP) execution_space::openmp, #endif -#if defined(KOKKOS_ENABLE_THREADS) +#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_THREADS) execution_space::threads, #endif -#if defined(KOKKOS_ENABLE_HPX) +#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_HPX) execution_space::hpx, #endif -#if defined(KOKKOS_ENABLE_SERIAL) +#if defined(PLSSVM_KOKKOS_BACKEND_ENABLE_SERIAL) execution_space::serial, #endif }; diff --git a/src/plssvm/backends/Kokkos/CMakeLists.txt b/src/plssvm/backends/Kokkos/CMakeLists.txt index 960df04b1..bf37122f2 100644 --- a/src/plssvm/backends/Kokkos/CMakeLists.txt +++ b/src/plssvm/backends/Kokkos/CMakeLists.txt @@ -122,36 +122,52 @@ target_link_libraries(${PLSSVM_ALL_LIBRARY_NAME} INTERFACE ${PLSSVM_KOKKOS_BACKE append_local_and_parent(PLSSVM_TARGETS_TO_INSTALL ${PLSSVM_KOKKOS_BACKEND_LIBRARY_NAME}) # assemble Kokkos available execution space string +# also set compile definitions -> can't use KOKKOS_ENABLE_* directly inside the "constexpr_available_execution_space.hpp" +# header since we can't include "Kokkos_Core.hpp" there (transitively used in the base library that doesn't know anything about Kokkos set(PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "") if (Kokkos_ENABLE_CUDA) + target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_CUDA) list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "Cuda") endif () if (Kokkos_ENABLE_HIP) + target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_HIP) list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "HIP") endif () if (Kokkos_ENABLE_SYCL) + target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_SYCL) list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "SYCL") endif () if (Kokkos_ENABLE_HPX) + target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_HPX) list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "HPX") endif () if (Kokkos_ENABLE_OPENMP) + target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_OPENMP) list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "OpenMP") endif () if (Kokkos_ENABLE_OPENMPTARGET) + target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_OPENMPTARGET) list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "OpenMPTarget") endif () if (Kokkos_ENABLE_OPENACC) + target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_OPENACC) list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "OpenACC") endif () if (Kokkos_ENABLE_THREADS) + target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_THREADS) list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "Threads") endif () if (Kokkos_ENABLE_SERIAL) + target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_ENABLE_SERIAL) list(APPEND PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "Serial") endif () set(PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES "${PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES}" PARENT_SCOPE) +# also set the number of available Kokkos execution spaces to explicitly set the type of the used std::array +# -> necessary if NO Kokkos execution space is available and, therefore, the size of the std::array would be 0 (can't automatically be deduced) +list(LENGTH PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES PLSSVM_KOKKOS_BACKEND_NUM_AVAILABLE_EXECUTION_SPACES) +target_compile_definitions(${PLSSVM_BASE_LIBRARY_NAME} PUBLIC PLSSVM_KOKKOS_BACKEND_NUM_AVAILABLE_EXECUTION_SPACES=${PLSSVM_KOKKOS_BACKEND_NUM_AVAILABLE_EXECUTION_SPACES}) + # generate summary string set(PLSSVM_KOKKOS_BACKEND_SUMMARY_STRING_COMPILER " - Kokkos (${PLSSVM_KOKKOS_BACKEND_AVAILABLE_EXECUTION_SPACES}):") include(${PROJECT_SOURCE_DIR}/cmake/assemble_summary_string.cmake) diff --git a/src/plssvm/backends/Kokkos/execution_space.cpp b/src/plssvm/backends/Kokkos/execution_space.cpp index e6401367c..0caae212f 100644 --- a/src/plssvm/backends/Kokkos/execution_space.cpp +++ b/src/plssvm/backends/Kokkos/execution_space.cpp @@ -8,10 +8,8 @@ #include "plssvm/backends/Kokkos/execution_space.hpp" -#if defined(PLSSVM_HAS_KOKKOS_BACKEND) - #include "plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp" // plssvm::kokkos::detail::constexpr_available_execution_spaces -#endif -#include "plssvm/detail/string_utility.hpp" // plssvm::detail::to_lower_case +#include "plssvm/backends/Kokkos/detail/constexpr_available_execution_spaces.hpp" // plssvm::kokkos::detail::constexpr_available_execution_spaces +#include "plssvm/detail/string_utility.hpp" // plssvm::detail::to_lower_case #include // std::array #include // std::ios::failbit @@ -82,11 +80,9 @@ std::istream &operator>>(std::istream &in, execution_space &space) { std::vector list_available_execution_spaces() { // always add the automatic execution space std::vector spaces{ execution_space::automatic }; -#if defined(PLSSVM_HAS_KOKKOS_BACKEND) // add all other available execution spaces constexpr auto arr = detail::constexpr_available_execution_spaces(); spaces.insert(spaces.cend(), arr.begin(), arr.end()); -#endif return spaces; } From 7f7520f862a3879541df8c471a99b527e38242a8 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Sat, 9 Nov 2024 00:29:05 +0100 Subject: [PATCH 087/123] Fix compilation error involving too many template instantiations. --- tests/backends/Kokkos/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/backends/Kokkos/CMakeLists.txt b/tests/backends/Kokkos/CMakeLists.txt index 34ce3881f..79cb3331e 100644 --- a/tests/backends/Kokkos/CMakeLists.txt +++ b/tests/backends/Kokkos/CMakeLists.txt @@ -27,6 +27,11 @@ find_package(Kokkos REQUIRED) # add test executable add_executable(${PLSSVM_KOKKOS_TEST_NAME} ${CMAKE_CURRENT_LIST_DIR}/../../main.cpp ${PLSSVM_KOKKOS_TEST_SOURCES}) +if (Kokkos_ENABLE_CUDA) + # fix template limit when using Kokkos::Cuda + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xcudafe --pending_instantiations=0") +endif () + # link against test library target_link_libraries(${PLSSVM_KOKKOS_TEST_NAME} PRIVATE ${PLSSVM_BASE_TEST_LIBRARY_NAME}) From 6904c88fee11a5d791d8c5f68ccfc051bb3f3b4c Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Sat, 9 Nov 2024 01:05:46 +0100 Subject: [PATCH 088/123] Use target_compile_options instead of directly changing CMAKE_CXX_FLAGS. --- tests/backends/Kokkos/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/backends/Kokkos/CMakeLists.txt b/tests/backends/Kokkos/CMakeLists.txt index 79cb3331e..c6abf4dbe 100644 --- a/tests/backends/Kokkos/CMakeLists.txt +++ b/tests/backends/Kokkos/CMakeLists.txt @@ -29,7 +29,7 @@ add_executable(${PLSSVM_KOKKOS_TEST_NAME} ${CMAKE_CURRENT_LIST_DIR}/../../main.c if (Kokkos_ENABLE_CUDA) # fix template limit when using Kokkos::Cuda - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xcudafe --pending_instantiations=0") + target_compile_options(${PLSSVM_KOKKOS_TEST_NAME} PRIVATE -Xcudafe --pending_instantiations=0) endif () # link against test library From 0566ba0af13f33f4876ed161e32aad0400dbd78b Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Sat, 9 Nov 2024 01:18:54 +0100 Subject: [PATCH 089/123] Additionally, output all additional_arguments (if any). --- tests/naming.hpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/naming.hpp b/tests/naming.hpp index 811266fe1..dd58c097f 100644 --- a/tests/naming.hpp +++ b/tests/naming.hpp @@ -153,7 +153,15 @@ template } else if constexpr (std::is_base_of_v) { return std::string{ util::exception_type_name() }; } else if constexpr (has_csvm_type_member_typedef_v) { - return fmt::format("{}", plssvm::csvm_to_backend_type_v); + // clang-format off + return fmt::format("{}{}", plssvm::csvm_to_backend_type_v, std::apply([](const auto &...args) { + if constexpr (sizeof...(args) == 0) { + return std::string{}; + } else { + return (fmt::format("_{}", args.second) + ...); + } + }, T::additional_arguments)); + // clang-format on } else if constexpr (has_device_ptr_type_member_typedef_v) { using device_ptr_type = typename T::device_ptr_type; return fmt::format("{}", plssvm::detail::arithmetic_type_name()); From 1d6b814fa3fa0c06e44489aacdda54351a630e12 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Sun, 10 Nov 2024 20:12:09 +0100 Subject: [PATCH 090/123] Fix wrong size of test array. --- tests/types_to_test.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/types_to_test.hpp b/tests/types_to_test.hpp index 44db342b3..b5eb2c1db 100644 --- a/tests/types_to_test.hpp +++ b/tests/types_to_test.hpp @@ -474,7 +474,7 @@ constexpr std::array classification_types_to_tes plssvm::classification_type::oaa, plssvm::classification_type::oao }; /// A list of all available solver types. -constexpr std::array solver_types_to_test = { +constexpr std::array solver_types_to_test{ plssvm::solver_type::automatic, plssvm::solver_type::cg_explicit, plssvm::solver_type::cg_implicit }; From c4dc4ec569e0f8b3760564589426a6104e5daad1 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Sun, 10 Nov 2024 20:33:35 +0100 Subject: [PATCH 091/123] Test all available Kokkos execution spaces. --- tests/backends/Kokkos/kokkos_csvm.cpp | 149 +++++++++++++++++++------- 1 file changed, 112 insertions(+), 37 deletions(-) diff --git a/tests/backends/Kokkos/kokkos_csvm.cpp b/tests/backends/Kokkos/kokkos_csvm.cpp index 5fe50d46e..51a4fb9c6 100644 --- a/tests/backends/Kokkos/kokkos_csvm.cpp +++ b/tests/backends/Kokkos/kokkos_csvm.cpp @@ -27,9 +27,11 @@ #include "gtest/gtest.h" // TEST_F, EXPECT_NO_THROW, INSTANTIATE_TYPED_TEST_SUITE_P, ::testing::Test -#include // std::map -#include // std::make_tuple, std::tuple -#include // std::vector +#include // std::array +#include // std::size_t +#include // std::map +#include // std::make_tuple, std::tuple +#include // std::vector class KokkosCSVM : public ::testing::Test, private util::redirect_output<> { }; @@ -626,54 +628,127 @@ TEST_F(KokkosCSVM, get_execution_space) { EXPECT_NE(svm.get_execution_space(), plssvm::kokkos::execution_space::automatic); } - template - struct kokkos_csvm_test_type { - using mock_csvm_type = mock_kokkos_csvm; - using csvm_type = plssvm::kokkos::csvm; - using device_ptr_type = typename csvm_type::device_ptr_type; - inline constexpr static auto additional_arguments = std::make_tuple(); - }; +template +struct kokkos_csvm_test_type { + using mock_csvm_type = mock_kokkos_csvm; + using csvm_type = plssvm::kokkos::csvm; + using device_ptr_type = typename csvm_type::device_ptr_type; + inline static auto additional_arguments = std::make_tuple(std::make_pair(plssvm::kokkos_execution_space, space)); +}; - using kokkos_csvm_test_tuple = std::tuple>; - using kokkos_csvm_test_label_type_list = util::cartesian_type_product_t; - using kokkos_csvm_test_type_list = util::cartesian_type_product_t; +namespace impl { + +/** + * @brief Determine which execution spaces can be tested based on the available Kokkos::ExecutionSpaces and PLSSVM target platforms. + * @return the available execution spaces for testing (`[[nodiscard]]`) + */ +[[nodiscard]] constexpr auto constexpr_available_execution_spaces_to_test() { + return std::array{ +#if defined(KOKKOS_ENABLE_CUDA) && defined(PLSSVM_HAS_NVIDIA_TARGET) // for Kokkos::Cuda, an NVIDIA target must be available + plssvm::kokkos::execution_space::cuda, +#endif +#if defined(KOKKOS_ENABLE_HIP) && (defined(PLSSVM_HAS_NVIDIA_TARGET) || defined(PLSSVM_HAS_AMD_TARGET)) // for Kokkos::HIP, an NVIDIA or AMD target must be available + plssvm::kokkos::execution_space::hip, +#endif +#if defined(KOKKOS_ENABLE_SYCL) // for Kokkos::SYCL, any target is ok + plssvm::kokkos::execution_space::sycl, +#endif +#if defined(KOKKOS_ENABLE_HPX) && defined(PLSSVM_HAS_CPU_TARGET) // for Kokkos::Experimental::HPX, a CPU target must be available + plssvm::kokkos::execution_space::hpx, +#endif +#if defined(KOKKOS_ENABLE_OPENMP) && defined(PLSSVM_HAS_CPU_TARGET) // for Kokkos::OpenMP, a CPU target must be available + plssvm::kokkos::execution_space::openmp, +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) // for Kokkos::Experimental::OpenMPTarget,any target is ok // TODO: implement correctly based on allowed target platforms + plssvm::kokkos::execution_space::openmp_target, +#endif +#if defined(KOKKOS_ENABLE_OPENACC) // for Kokkos::Experimental::OpenACC,any target is ok // TODO: implement correctly based on allowed target platforms + plssvm::kokkos::execution_space::openacc, +#endif +#if defined(KOKKOS_ENABLE_THREADS) && defined(PLSSVM_HAS_CPU_TARGET) // for Kokkos::Threads, a CPU target must be available + plssvm::kokkos::execution_space::threads, +#endif +#if defined(KOKKOS_ENABLE_SERIAL) && defined(PLSSVM_HAS_CPU_TARGET) // for Kokkos::Serial, a CPU target must be available + plssvm::kokkos::execution_space::serial, +#endif + }; +} + +/** + * @brief Uninstantiated base type to create a `std::tuple` containing all available `kokkos_csvm_test_type` types. + */ +template +struct create_device_tuple_type_helper; + +/** + * @brief Helper struct to create a `std::tuple` containing all available `kokkos_csvm_test_type` types by iterating over the `std::array` of + * `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`. + * @tparam mock_grid_size whether the maximum grid size should be mocked (i.e. in fact reduced) or not + * @tparam Is the indices to index the `std::array` + */ +template +struct create_device_tuple_type_helper> { + /// The array containing all available execution spaces. + constexpr static auto array = constexpr_available_execution_spaces_to_test(); + /// The resulting variant type. + using type = std::tuple...>; +}; + +/** + * @brief Create a `std::tuple` containing all available `kokkos_csvm_test_type` types by iterating over the `std::array` of + * `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`. + * @tparam mock_grid_size whether the maximum grid size should be mocked (i.e. in fact reduced) or not + */ +template +struct create_device_tuple_type { + /// The number of types in the final variant. + constexpr static std::size_t N = constexpr_available_execution_spaces_to_test().size(); + /// The final variant type. + using type = typename create_device_tuple_type_helper>::type; +}; + +} // namespace impl + +using kokkos_csvm_test_tuple = typename impl::create_device_tuple_type::type; +using kokkos_csvm_test_label_type_list = util::cartesian_type_product_t; +using kokkos_csvm_test_type_list = util::cartesian_type_product_t; // the tests used in the instantiated GTest test suites - using kokkos_csvm_test_type_gtest = util::combine_test_parameters_gtest_t; - using kokkos_solver_type_gtest = util::combine_test_parameters_gtest_t; - using kokkos_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; - using kokkos_solver_and_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; - using kokkos_label_type_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t; - using kokkos_label_type_solver_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t; +using kokkos_csvm_test_type_gtest = util::combine_test_parameters_gtest_t; +using kokkos_solver_type_gtest = util::combine_test_parameters_gtest_t; +using kokkos_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; +using kokkos_solver_and_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; +using kokkos_label_type_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t; +using kokkos_label_type_solver_kernel_function_and_classification_type_gtest = util::combine_test_parameters_gtest_t; // instantiate type-parameterized tests // generic CSVM tests - INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVM, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); - INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMKernelFunction, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); - INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolver, kokkos_solver_type_gtest, naming::test_parameter_to_name); - INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunction, kokkos_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name); - INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMKernelFunctionClassification, kokkos_label_type_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVM, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMKernelFunction, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolver, kokkos_solver_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunction, kokkos_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMKernelFunctionClassification, kokkos_label_type_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name); // INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunctionClassification, kokkos_label_type_solver_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name); // generic CSVM DeathTests - INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMDeathTest, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); - INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMSolverDeathTest, kokkos_solver_type_gtest, naming::test_parameter_to_name); - INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMKernelFunctionDeathTest, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); - INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMSolverKernelFunctionDeathTest, kokkos_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMDeathTest, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMSolverDeathTest, kokkos_solver_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMKernelFunctionDeathTest, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMSolverKernelFunctionDeathTest, kokkos_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name); // generic GPU CSVM tests - correct grid sizes - INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericGPUCSVM, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); - INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericGPUCSVMKernelFunction, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericGPUCSVM, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericGPUCSVMKernelFunction, kokkos_kernel_function_type_gtest, naming::test_parameter_to_name); // generic GPU CSVM DeathTests - correct grid sizes - INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericGPUCSVMDeathTest, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericGPUCSVMDeathTest, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); - using kokkos_mock_csvm_test_tuple = std::tuple>; - using kokkos_mock_csvm_test_type_list = util::cartesian_type_product_t; +using kokkos_mock_csvm_test_tuple = typename impl::create_device_tuple_type::type; +using kokkos_mock_csvm_test_type_list = util::cartesian_type_product_t; - using kokkos_mock_csvm_test_type_gtest = util::combine_test_parameters_gtest_t; - using kokkos_mock_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; +using kokkos_mock_csvm_test_type_gtest = util::combine_test_parameters_gtest_t; +using kokkos_mock_kernel_function_type_gtest = util::combine_test_parameters_gtest_t; // generic GPU CSVM tests - mocked grid sizes - INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMFakedGridSize, GenericGPUCSVM, kokkos_mock_csvm_test_type_gtest, naming::test_parameter_to_name); - INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMFakedGridSize, GenericGPUCSVMKernelFunction, kokkos_mock_kernel_function_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMFakedGridSize, GenericGPUCSVM, kokkos_mock_csvm_test_type_gtest, naming::test_parameter_to_name); +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMFakedGridSize, GenericGPUCSVMKernelFunction, kokkos_mock_kernel_function_type_gtest, naming::test_parameter_to_name); From 70dec59104f513a90854c2519078ad7d265d71b8 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Sun, 10 Nov 2024 20:45:07 +0100 Subject: [PATCH 092/123] Reformulate some template meta-programming parts from a recursive implementation to an iterative implementation improving compilation times. --- tests/types_to_test.hpp | 105 +++++++++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 40 deletions(-) diff --git a/tests/types_to_test.hpp b/tests/types_to_test.hpp index b5eb2c1db..4147d9b70 100644 --- a/tests/types_to_test.hpp +++ b/tests/types_to_test.hpp @@ -115,6 +115,33 @@ constexpr auto test_parameter_value_at_v = std::get(ValueList::values); namespace detail { +/** + * @brief Copy all types in the @p Tuple using the indices @p Is. + * @tparam Tuple the tuple types to clone + * @tparam Is the index sequence used to clone the tuple types + * @param[in] tuple the tuple to clone + */ +template +[[nodiscard]] constexpr auto expand_tuple(std::index_sequence, const Tuple &tuple) { + return std::tuple_cat((static_cast(Is), tuple)...); +} + +/** + * @brief Copy all types in the @p Tuple @p N times. + * @tparam N the time how often the types should be cloned + * @tparam Tuple the tuple types to clone + */ +template +struct clone_tuple_types { + using type = decltype(expand_tuple(std::make_index_sequence(), std::declval())); +}; + +/** + * @brief Shorthand for the `typename clone_tuple_types::type` type. + */ +template +using clone_tuple_types_t = typename clone_tuple_types::type; + // convert the types in a tuple to GoogleTests ::testing::Type template struct tuple_to_gtest_types; @@ -196,41 +223,38 @@ struct wrap_in_value_list> { template using wrap_in_value_list_t = typename wrap_in_value_list::type; -template -struct combine_values; +template +struct combine_values_impl { }; /** - * @brief Recursion termination: add the last value in the @p Array to the `value_list`s in the std::tuple. - * @tparam T the type in the array - * @tparam SIZE the size of the array - * @tparam Array the array - * @tparam Types the already existing `value_list`s + * @brief Iteratively add the values in @p Array at position @p IS to the `value_list`s in the std::tuple @p Tuple. + * @tparam Tuple the tuple types + * @tparam Array the array values to add + * @tparam Is the indices in @p Array */ -template &Array, typename... Types> -struct combine_values> { - using type = std::tuple(Array)>...>; +template +struct combine_values_impl> { + constexpr static std::size_t N = Array.size(); + using type = std::tuple, std::get(Array)>...>; }; /** - * @brief Recursively add the value @p I of the @p Array to the `value_list`s in the std::tuple. - * @tparam T the type in the array - * @tparam SIZE the size of the array - * @tparam I the currently investigated array element - * @tparam Array the array - * @tparam Types the already existing `value_list`s + * @brief Add the values in @p Array to the `value_list`s in the std::tuple @p Tuple. + * @tparam Tuple the tuple types + * @tparam Array the array values to add */ -template &Array, typename... Types> -struct combine_values> { - using type = concat_tuple_types_t< - std::tuple(Array)>...>, - typename combine_values>::type>; +template +struct combine_values { + // clone the types in the Tuple N-times where N is the number of values in the Array + using cloned_tuple = clone_tuple_types_t; + using type = typename combine_values_impl>>::type; }; /** * @brief Shorthand for `typename combine_values<...>::type`. */ template -using combine_values_t = typename combine_values::value_type, Array.size(), Array.size() - 1, Array, Tuple>::type; +using combine_values_t = typename combine_values::type; /** * @brief Calculate the cartesian product of the values in @p FirstArray and @p RemainingArrays recursively. @@ -292,37 +316,38 @@ struct wrap_in_type_list> { template using wrap_in_type_list_t = typename wrap_in_type_list::type; -template -struct combine_types; +template +struct combine_types_impl { }; /** - * @brief Recursion termination: add the last type in the @p Tuple to the `type_list`s in the std::tuple. - * @tparam Tuple the std::tuple containing the types to add - * @tparam ResultTupleTypes the already existing `type_list`s + * @brief Iteratively add the types in @p CurrentTuple at position @p IS to the `type_list`s in the std::tuple @p Tuple. + * @tparam Tuple the tuple types + * @tparam CurrentTuple the types in the current tuple + * @tparam Is the indices in @p Array */ -template -struct combine_types<0, Tuple, std::tuple> { - using type = std::tuple>...>; +template +struct combine_types_impl> { + constexpr static std::size_t N = std::tuple_size_v; + using type = std::tuple, std::tuple_element_t>...>; }; /** - * @brief Recursively add the type @p I of the @p Tuple to the `type_list`s in the std::tuple. - * @tparam I the currently investigated tuple element - * @tparam Tuple the tuple - * @tparam ResultTupleTypes the already existing `type_list`s + * @brief Add the types in @p CurrentTuple to the `type_list`s in the std::tuple @p Tuple. + * @tparam Tuple the tuple types + * @tparam CurrentTuple the types in the current tuple */ -template -struct combine_types> { - using type = concat_tuple_types_t< - std::tuple>...>, - typename combine_types>::type>; +template +struct combine_types { + // clone the types in the Tuple N-times where N is the number of types in the CurrentTuple + using cloned_tuple = clone_tuple_types_t, Tuple>; + using type = typename combine_types_impl>>::type; }; /** * @brief Shorthand for `typename combine_types<...>::type`. */ template -using combine_types_t = typename combine_types - 1, Tuple, ResultTuple>::type; +using combine_types_t = typename combine_types::type; /** * @brief Calculate the cartesian product of the types in @p FirstTuple and @p RemainingTuples recursively. From d57e619771b9e30aa7eafb1942d831a3cbaaf81a Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Sun, 10 Nov 2024 23:42:27 +0100 Subject: [PATCH 093/123] Disable some tests if the Kokkos::Cuda execution space is enabled due to template instantiation limits with nvcc. --- tests/backends/Kokkos/CMakeLists.txt | 5 +++++ tests/backends/Kokkos/kokkos_csvm.cpp | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/backends/Kokkos/CMakeLists.txt b/tests/backends/Kokkos/CMakeLists.txt index c6abf4dbe..4a7d23501 100644 --- a/tests/backends/Kokkos/CMakeLists.txt +++ b/tests/backends/Kokkos/CMakeLists.txt @@ -30,6 +30,11 @@ add_executable(${PLSSVM_KOKKOS_TEST_NAME} ${CMAKE_CURRENT_LIST_DIR}/../../main.c if (Kokkos_ENABLE_CUDA) # fix template limit when using Kokkos::Cuda target_compile_options(${PLSSVM_KOKKOS_TEST_NAME} PRIVATE -Xcudafe --pending_instantiations=0) + + # tests won't compile with nvcc + if (NOT PLSSVM_TEST_WITH_REDUCED_LABEL_TYPES) + message(FATAL_ERROR "Due to template instantiation limits within nvcc, only reduced label type tests are currently supported!") + endif () endif () # link against test library diff --git a/tests/backends/Kokkos/kokkos_csvm.cpp b/tests/backends/Kokkos/kokkos_csvm.cpp index 51a4fb9c6..b6e892a6c 100644 --- a/tests/backends/Kokkos/kokkos_csvm.cpp +++ b/tests/backends/Kokkos/kokkos_csvm.cpp @@ -728,7 +728,10 @@ INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMKernelFunction, kokkos_ker INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolver, kokkos_solver_type_gtest, naming::test_parameter_to_name); INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunction, kokkos_solver_and_kernel_function_type_gtest, naming::test_parameter_to_name); INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMKernelFunctionClassification, kokkos_label_type_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name); -// INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunctionClassification, kokkos_label_type_solver_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name); +#if !defined(KOKKOS_ENABLE_CUDA) +// testcase doesn't compile with Kokkos::Cuda's nvcc due to template instantiation limits +INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericCSVMSolverKernelFunctionClassification, kokkos_label_type_solver_kernel_function_and_classification_type_gtest, naming::test_parameter_to_name); +#endif // generic CSVM DeathTests INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericCSVMDeathTest, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); From 6f210ff6f922f58cb219c526677a67e0972fc52d Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 11 Nov 2024 10:59:54 +0100 Subject: [PATCH 094/123] Also test all available execution spaces for the device_ptr class in Kokkos. --- tests/backends/Kokkos/detail/device_ptr.cpp | 23 +++-- tests/backends/Kokkos/kokkos_csvm.cpp | 90 +++---------------- tests/backends/Kokkos/utility.hpp | 96 +++++++++++++++++++++ tests/naming.hpp | 23 ++++- 4 files changed, 145 insertions(+), 87 deletions(-) create mode 100644 tests/backends/Kokkos/utility.hpp diff --git a/tests/backends/Kokkos/detail/device_ptr.cpp b/tests/backends/Kokkos/detail/device_ptr.cpp index c96a1ed87..ec525dad5 100644 --- a/tests/backends/Kokkos/detail/device_ptr.cpp +++ b/tests/backends/Kokkos/detail/device_ptr.cpp @@ -10,30 +10,39 @@ #include "plssvm/backends/Kokkos/detail/device_ptr.hpp" // plssvm::kokkos::detail::device_ptr -#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper - -#include "Kokkos_Core.hpp" // Kokkos::DefaultExecutionSpace +#include "plssvm/backends/Kokkos/detail/device_wrapper.hpp" // plssvm::kokkos::detail::device_wrapper +#include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space +#include "plssvm/backends/Kokkos/execution_space_type_traits.hpp" // plssvm::kokkos::execution_space_to_kokkos_type_t #include "tests/backends/generic_device_ptr_tests.hpp" // generic device pointer tests to instantiate +#include "tests/backends/Kokkos/utility.hpp" // util::create_kokkos_test_tuple_impl #include "tests/naming.hpp" // naming::test_parameter_to_name -#include "tests/types_to_test.hpp" // util::{combine_test_parameters_gtest_t, cartesian_type_product_t, layout_type_list} +#include "tests/types_to_test.hpp" // util::{combine_test_parameters_gtest_t, cartesian_type_product_t, layout_type_list}, + // util::detail::concat_tuple_types_t #include "gtest/gtest.h" // INSTANTIATE_TYPED_TEST_SUITE_P #include // std::tuple -template +template struct kokkos_device_ptr_test_type { using device_ptr_type = plssvm::kokkos::detail::device_ptr; using queue_type = plssvm::kokkos::detail::device_wrapper; + constexpr static plssvm::kokkos::execution_space space = exec_space; static const queue_type &default_queue() { - static const queue_type queue{ Kokkos::DefaultExecutionSpace{} }; + static const queue_type queue{ plssvm::kokkos::execution_space_to_kokkos_type_t{} }; return queue; } }; -using kokkos_device_ptr_tuple = std::tuple, kokkos_device_ptr_test_type>; +template +using kokkos_device_ptr_test_type_float = kokkos_device_ptr_test_type; +template +using kokkos_device_ptr_test_type_double = kokkos_device_ptr_test_type; + +using kokkos_device_ptr_tuple = util::detail::concat_tuple_types_t, + util::create_kokkos_test_tuple_t>; // the tests used in the instantiated GTest test suites using kokkos_device_ptr_type_gtest = util::combine_test_parameters_gtest_t>; diff --git a/tests/backends/Kokkos/kokkos_csvm.cpp b/tests/backends/Kokkos/kokkos_csvm.cpp index b6e892a6c..c1ae0cdb7 100644 --- a/tests/backends/Kokkos/kokkos_csvm.cpp +++ b/tests/backends/Kokkos/kokkos_csvm.cpp @@ -20,10 +20,11 @@ #include "tests/backends/generic_csvm_tests.hpp" // generic CSVM tests to instantiate #include "tests/backends/generic_gpu_csvm_tests.hpp" // generic GPU CSVM tests to instantiate #include "tests/backends/Kokkos/mock_kokkos_csvm.hpp" -#include "tests/custom_test_macros.hpp" // EXPECT_THROW_WHAT -#include "tests/naming.hpp" // naming::test_parameter_to_name -#include "tests/types_to_test.hpp" // util::{cartesian_type_product_t, combine_test_parameters_gtest_t} -#include "tests/utility.hpp" // util::redirect_output +#include "tests/backends/Kokkos/utility.hpp" // util::create_kokkos_test_tuple_impl +#include "tests/custom_test_macros.hpp" // EXPECT_THROW_WHAT +#include "tests/naming.hpp" // naming::test_parameter_to_name +#include "tests/types_to_test.hpp" // util::{cartesian_type_product_t, combine_test_parameters_gtest_t} +#include "tests/utility.hpp" // util::redirect_output #include "gtest/gtest.h" // TEST_F, EXPECT_NO_THROW, INSTANTIATE_TYPED_TEST_SUITE_P, ::testing::Test @@ -636,80 +637,10 @@ struct kokkos_csvm_test_type { inline static auto additional_arguments = std::make_tuple(std::make_pair(plssvm::kokkos_execution_space, space)); }; -namespace impl { +template +using kokkos_csvm_test_type_without_mock = kokkos_csvm_test_type; -/** - * @brief Determine which execution spaces can be tested based on the available Kokkos::ExecutionSpaces and PLSSVM target platforms. - * @return the available execution spaces for testing (`[[nodiscard]]`) - */ -[[nodiscard]] constexpr auto constexpr_available_execution_spaces_to_test() { - return std::array{ -#if defined(KOKKOS_ENABLE_CUDA) && defined(PLSSVM_HAS_NVIDIA_TARGET) // for Kokkos::Cuda, an NVIDIA target must be available - plssvm::kokkos::execution_space::cuda, -#endif -#if defined(KOKKOS_ENABLE_HIP) && (defined(PLSSVM_HAS_NVIDIA_TARGET) || defined(PLSSVM_HAS_AMD_TARGET)) // for Kokkos::HIP, an NVIDIA or AMD target must be available - plssvm::kokkos::execution_space::hip, -#endif -#if defined(KOKKOS_ENABLE_SYCL) // for Kokkos::SYCL, any target is ok - plssvm::kokkos::execution_space::sycl, -#endif -#if defined(KOKKOS_ENABLE_HPX) && defined(PLSSVM_HAS_CPU_TARGET) // for Kokkos::Experimental::HPX, a CPU target must be available - plssvm::kokkos::execution_space::hpx, -#endif -#if defined(KOKKOS_ENABLE_OPENMP) && defined(PLSSVM_HAS_CPU_TARGET) // for Kokkos::OpenMP, a CPU target must be available - plssvm::kokkos::execution_space::openmp, -#endif -#if defined(KOKKOS_ENABLE_OPENMPTARGET) // for Kokkos::Experimental::OpenMPTarget,any target is ok // TODO: implement correctly based on allowed target platforms - plssvm::kokkos::execution_space::openmp_target, -#endif -#if defined(KOKKOS_ENABLE_OPENACC) // for Kokkos::Experimental::OpenACC,any target is ok // TODO: implement correctly based on allowed target platforms - plssvm::kokkos::execution_space::openacc, -#endif -#if defined(KOKKOS_ENABLE_THREADS) && defined(PLSSVM_HAS_CPU_TARGET) // for Kokkos::Threads, a CPU target must be available - plssvm::kokkos::execution_space::threads, -#endif -#if defined(KOKKOS_ENABLE_SERIAL) && defined(PLSSVM_HAS_CPU_TARGET) // for Kokkos::Serial, a CPU target must be available - plssvm::kokkos::execution_space::serial, -#endif - }; -} - -/** - * @brief Uninstantiated base type to create a `std::tuple` containing all available `kokkos_csvm_test_type` types. - */ -template -struct create_device_tuple_type_helper; - -/** - * @brief Helper struct to create a `std::tuple` containing all available `kokkos_csvm_test_type` types by iterating over the `std::array` of - * `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`. - * @tparam mock_grid_size whether the maximum grid size should be mocked (i.e. in fact reduced) or not - * @tparam Is the indices to index the `std::array` - */ -template -struct create_device_tuple_type_helper> { - /// The array containing all available execution spaces. - constexpr static auto array = constexpr_available_execution_spaces_to_test(); - /// The resulting variant type. - using type = std::tuple...>; -}; - -/** - * @brief Create a `std::tuple` containing all available `kokkos_csvm_test_type` types by iterating over the `std::array` of - * `plssvm::kokkos::execution_space` values as returned by `plssvm::kokkos::detail::constexpr_available_execution_spaces()`. - * @tparam mock_grid_size whether the maximum grid size should be mocked (i.e. in fact reduced) or not - */ -template -struct create_device_tuple_type { - /// The number of types in the final variant. - constexpr static std::size_t N = constexpr_available_execution_spaces_to_test().size(); - /// The final variant type. - using type = typename create_device_tuple_type_helper>::type; -}; - -} // namespace impl - -using kokkos_csvm_test_tuple = typename impl::create_device_tuple_type::type; +using kokkos_csvm_test_tuple = util::create_kokkos_test_tuple_t; using kokkos_csvm_test_label_type_list = util::cartesian_type_product_t; using kokkos_csvm_test_type_list = util::cartesian_type_product_t; @@ -746,7 +677,10 @@ INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVM, GenericGPUCSVMKernelFunction, kokkos_ // generic GPU CSVM DeathTests - correct grid sizes INSTANTIATE_TYPED_TEST_SUITE_P(KokkosCSVMDeathTest, GenericGPUCSVMDeathTest, kokkos_csvm_test_type_gtest, naming::test_parameter_to_name); -using kokkos_mock_csvm_test_tuple = typename impl::create_device_tuple_type::type; +template +using kokkos_csvm_test_type_with_mock = kokkos_csvm_test_type; + +using kokkos_mock_csvm_test_tuple = util::create_kokkos_test_tuple_t; using kokkos_mock_csvm_test_type_list = util::cartesian_type_product_t; using kokkos_mock_csvm_test_type_gtest = util::combine_test_parameters_gtest_t; diff --git a/tests/backends/Kokkos/utility.hpp b/tests/backends/Kokkos/utility.hpp new file mode 100644 index 000000000..872d4c624 --- /dev/null +++ b/tests/backends/Kokkos/utility.hpp @@ -0,0 +1,96 @@ +/** +* @file +* @author Alexander Van Craen +* @author Marcel Breyer +* @copyright 2018-today The PLSSVM project - All Rights Reserved +* @license This file is part of the PLSSVM project which is released under the MIT license. +* See the LICENSE.md file in the project root for full license information. +* +* @brief Determine the execution spaces available for tests with the Kokkos backend. +*/ + +#ifndef PLSSVM_TESTS_BACKENDS_KOKKOS_UTILITY_HPP_ +#define PLSSVM_TESTS_BACKENDS_KOKKOS_UTILITY_HPP_ +#pragma once + +namespace util { + +/** + * @brief Determine which execution spaces can be tested based on the available Kokkos::ExecutionSpaces and PLSSVM target platforms. + * @return the available execution spaces for testing (`[[nodiscard]]`) + */ +[[nodiscard]] constexpr auto constexpr_available_execution_spaces_to_test() { + return std::array{ +#if defined(KOKKOS_ENABLE_CUDA) && defined(PLSSVM_HAS_NVIDIA_TARGET) // for Kokkos::Cuda, an NVIDIA target must be available + plssvm::kokkos::execution_space::cuda, +#endif +#if defined(KOKKOS_ENABLE_HIP) && (defined(PLSSVM_HAS_NVIDIA_TARGET) || defined(PLSSVM_HAS_AMD_TARGET)) // for Kokkos::HIP, an NVIDIA or AMD target must be available + plssvm::kokkos::execution_space::hip, +#endif +#if defined(KOKKOS_ENABLE_SYCL) // for Kokkos::SYCL, any target is ok + plssvm::kokkos::execution_space::sycl, +#endif +#if defined(KOKKOS_ENABLE_HPX) && defined(PLSSVM_HAS_CPU_TARGET) // for Kokkos::Experimental::HPX, a CPU target must be available + plssvm::kokkos::execution_space::hpx, +#endif +#if defined(KOKKOS_ENABLE_OPENMP) && defined(PLSSVM_HAS_CPU_TARGET) // for Kokkos::OpenMP, a CPU target must be available + plssvm::kokkos::execution_space::openmp, +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) // for Kokkos::Experimental::OpenMPTarget,any target is ok // TODO: implement correctly based on allowed target platforms + plssvm::kokkos::execution_space::openmp_target, +#endif +#if defined(KOKKOS_ENABLE_OPENACC) // for Kokkos::Experimental::OpenACC,any target is ok // TODO: implement correctly based on allowed target platforms + plssvm::kokkos::execution_space::openacc, +#endif +#if defined(KOKKOS_ENABLE_THREADS) && defined(PLSSVM_HAS_CPU_TARGET) // for Kokkos::Threads, a CPU target must be available + plssvm::kokkos::execution_space::threads, +#endif +#if defined(KOKKOS_ENABLE_SERIAL) && defined(PLSSVM_HAS_CPU_TARGET) // for Kokkos::Serial, a CPU target must be available + plssvm::kokkos::execution_space::serial, +#endif + }; +} + +/** + * @brief Uninstantiated base type to create a `std::tuple` containing all available `kokkos_csvm_test_type` types. + */ +template