diff --git a/resolve/hykkt/CMakeLists.txt b/resolve/hykkt/CMakeLists.txt index 6a67a927a..7bd3c2b92 100644 --- a/resolve/hykkt/CMakeLists.txt +++ b/resolve/hykkt/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(permutation) add_subdirectory(ruiz) add_subdirectory(cholesky) +add_subdirectory(spgemm) diff --git a/resolve/hykkt/README.md b/resolve/hykkt/README.md new file mode 100644 index 000000000..00bfd5cfb --- /dev/null +++ b/resolve/hykkt/README.md @@ -0,0 +1,46 @@ +# HyKKT + +## Description +HyKKT (pronounced as _hiked_) is a package for solving systems of equations and unknowns resulting from an iterative solution of an optimization +problem, for example optimal power flow, which uses hardware accelerators (GPUs) efficiently. + +The HyKKT package contains a linear solver tailored for Karush Kuhn Tucker (KKT) linear systems and +deployment on hardware accelerator hardware such as GPUs. The solver requires +all blocks of the $4\times 4$ block system: + +```math +\begin{bmatrix} + H + D_x & 0 & J_c^T & J_d^T \\ + 0 & D_s & 0 & -I \\ + J_c & 0 & 0 & 0 \\ + J_d & -I & 0 & 0 +\end{bmatrix} +\begin{bmatrix} + \Delta x \\ \Delta s \\ \Delta y_c \\ \Delta y_d +\end{bmatrix} = +\begin{bmatrix} + \tilde{r}_x \\ r_s \\ r_c \\ r_d +\end{bmatrix} +``` + +separately and solves the system to a desired +numerical precision exactly via block reduction and conjugate gradient on the +schur complement. Please see the [HyKKT paper](https://www.tandfonline.com/doi/abs/10.1080/10556788.2022.2124990) for mathematical details. + +## Integration within ReSolve + +HyKKT code within ReSolve is not yet fully integrated. The code is currently experimental, but is actively in development. +The main issue is the requirement that the user pass matrix and vector blocks, rather than the constructed KKT system. +This is currently not available within ReSolve as it is specific to HyKKT and KKT matrices. +TODO: +- Work with AMD to fix bug in Cholesky factorization. +- Allow the representation of a matrix or vector by blocks within ReSolve. +- Schur Compliment Conjugate Gradient within HyKKT. +- Complete HyKKT integration within ReSolve. + + +## Dependencies + +Most HyKKT dependencies are identical to ReSolve. Exceptions are: +- HyKKT does not require KLU +- For AMD builds, HyKKT requires HIP/ROCm >= 6.4 due to a known bug in SpGEMM. diff --git a/resolve/hykkt/spgemm/CMakeLists.txt b/resolve/hykkt/spgemm/CMakeLists.txt new file mode 100644 index 000000000..37cbe654d --- /dev/null +++ b/resolve/hykkt/spgemm/CMakeLists.txt @@ -0,0 +1,56 @@ +#[[ + +@brief Build SpGEMM module for HyKKT solver + +@author Adham Ibrahim + +]] + +# Add source files for the SpGEMM module +set(SPGEMM_SRC + SpGEMM.cpp + SpGEMMCpu.cpp +) +set(SPGEMM_HEADER_INSTALL + SpGEMM.hpp + SpGEMMImpl.hpp + SpGEMMCpu.hpp + SpGEMMCuda.hpp + SpGEMMHip.hpp +) + +set(SPGEMM_CUDA_SRC + SpGEMMCuda.cpp +) + +set(SPGEMM_ROCM_SRC + SpGEMMHip.cpp +) + +# Build shared library ReSolve::hykkt +add_library(resolve_hykkt_spgemm SHARED ${SPGEMM_SRC}) + +target_link_libraries(resolve_hykkt_spgemm PRIVATE resolve_logger ${suitesparse_cholmod}) +target_include_directories(resolve_hykkt_spgemm PUBLIC ${SUITESPARSE_INCLUDE_DIR}) + +if (RESOLVE_USE_CUDA) + target_sources(resolve_hykkt_spgemm PRIVATE ${SPGEMM_CUDA_SRC}) + target_link_libraries(resolve_hykkt_spgemm PUBLIC resolve_backend_cuda) +endif() + +if (RESOLVE_USE_HIP) + target_sources(resolve_hykkt_spgemm PRIVATE ${SPGEMM_ROCM_SRC}) + target_link_libraries(resolve_hykkt_spgemm PUBLIC resolve_backend_hip) +endif() + +# Link to dummy device backend if GPU support is not enabled +if (NOT RESOLVE_USE_GPU) + target_link_libraries(resolve_hykkt_spgemm PUBLIC resolve_backend_cpu) +endif() + +target_include_directories(resolve_hykkt_spgemm INTERFACE + $ + $ +) + +install(FILES ${SPGEMM_HEADER_INSTALL} DESTINATION include/resolve/hykkt) diff --git a/resolve/hykkt/spgemm/SpGEMM.cpp b/resolve/hykkt/spgemm/SpGEMM.cpp new file mode 100644 index 000000000..1170f919f --- /dev/null +++ b/resolve/hykkt/spgemm/SpGEMM.cpp @@ -0,0 +1,93 @@ +/** + * @file SpGEMM.cpp + * @author Adham Ibrahim (ibrahimas@ornl.gov) + */ + +#include "SpGEMM.hpp" + +#include "SpGEMMCpu.hpp" +#ifdef RESOLVE_USE_CUDA +#include "SpGEMMCuda.hpp" +#elif defined(RESOLVE_USE_HIP) +#include "SpGEMMHip.hpp" +#endif + +namespace ReSolve +{ + using real_type = ReSolve::real_type; + using out = ReSolve::io::Logger; + + namespace hykkt + { + /** + * Constructor for SpGEMM + * @param memspace[in] - Memory space for computation. + * @param alpha[in] - Scalar multiplier for the product. + * @param beta[in] - Scalar multiplier for the sum. + */ + SpGEMM::SpGEMM(memory::MemorySpace memspace, real_type alpha, real_type beta) + : memspace_(memspace) + { + if (memspace_ == memory::HOST) + { + impl_ = new SpGEMMCpu(alpha, beta); + } + else + { +#ifdef RESOLVE_USE_CUDA + impl_ = new SpGEMMCuda(alpha, beta); +#elif defined(RESOLVE_USE_HIP) + impl_ = new SpGEMMHip(alpha, beta); +#else + out::error() << "No GPU support enabled, and memory space set to DEVICE.\n"; + exit(1); +#endif + } + } + + /** + * Destructor for SpGEMM + */ + SpGEMM::~SpGEMM() + { + delete impl_; + } + + /** + * Loads the two matrices for the product + * @param A[in] - Pointer to CSR matrix + * @param B[in] - Pointer to CSR matrix + */ + void SpGEMM::loadProductMatrices(matrix::Csr* A, matrix::Csr* B) + { + impl_->loadProductMatrices(A, B); + } + + /** + * Loads the sum matrix for the operation + * @param D[in] - Pointer to CSR matrix + */ + void SpGEMM::loadSumMatrix(matrix::Csr* D) + { + impl_->loadSumMatrix(D); + } + + /** + * Loads the result matrix + * @param E[in] - Pointer to pointer to CSR matrix + */ + void SpGEMM::loadResultMatrix(matrix::Csr** E_ptr) + { + impl_->loadResultMatrix(E_ptr); + } + + /** + * Computes the result of the SpGEMM operation + * E = alpha * A * B + beta * D + */ + void SpGEMM::compute() + { + impl_->compute(); + } + } // namespace hykkt +} // namespace ReSolve diff --git a/resolve/hykkt/spgemm/SpGEMM.hpp b/resolve/hykkt/spgemm/SpGEMM.hpp new file mode 100644 index 000000000..edc3a267a --- /dev/null +++ b/resolve/hykkt/spgemm/SpGEMM.hpp @@ -0,0 +1,37 @@ +/** + * @file SpGEMM.hpp + * @author Adham Ibrahim (ibrahimas@ornl.gov) + */ + +#pragma once +#include "SpGEMMImpl.hpp" +#include +#include +#include + +namespace ReSolve +{ + using real_type = ReSolve::real_type; + + namespace hykkt + { + class SpGEMM + { + public: + // computes E = alpha * A * B + beta * D + SpGEMM(memory::MemorySpace memspace, real_type alpha, real_type beta); + ~SpGEMM(); + + void loadProductMatrices(matrix::Csr* A, matrix::Csr* B); + void loadSumMatrix(matrix::Csr* D); + void loadResultMatrix(matrix::Csr** E_ptr); + + void compute(); + + private: + memory::MemorySpace memspace_; + + SpGEMMImpl* impl_; + }; + } // namespace hykkt +} // namespace ReSolve diff --git a/resolve/hykkt/spgemm/SpGEMMCpu.cpp b/resolve/hykkt/spgemm/SpGEMMCpu.cpp new file mode 100644 index 000000000..8310b80a4 --- /dev/null +++ b/resolve/hykkt/spgemm/SpGEMMCpu.cpp @@ -0,0 +1,139 @@ +/** + * @file SpGEMMCpu.cpp + * @author Adham Ibrahim (ibrahimas@ornl.gov) + * @brief Implementation of SpGEMM using CHOLMOD for CPU + */ + +#include "SpGEMMCpu.hpp" + +namespace ReSolve +{ + using real_type = ReSolve::real_type; + + namespace hykkt + { + /** + * Constructor for SpGEMMCpu + * @param alpha[in] - Scalar multiplier for the product. + * @param beta[in] - Scalar multiplier for the sum. + */ + SpGEMMCpu::SpGEMMCpu(real_type alpha, real_type beta) + : alpha_(alpha), beta_(beta) + { + cholmod_start(&Common_); + + A_ = nullptr; + B_ = nullptr; + D_ = nullptr; + } + + /** + * Destructor for SpGEMMCpu + */ + SpGEMMCpu::~SpGEMMCpu() + { + if (A_) + { + cholmod_free_sparse(&A_, &Common_); + } + if (B_) + { + cholmod_free_sparse(&B_, &Common_); + } + if (D_) + { + cholmod_free_sparse(&D_, &Common_); + } + cholmod_finish(&Common_); + } + + void SpGEMMCpu::loadProductMatrices(matrix::Csr* A, matrix::Csr* B) + { + if (!A_) + { + A_ = allocateCholmodType(A); + } + if (!B_) + { + B_ = allocateCholmodType(B); + } + copyDataToCholmodType(A, A_); + copyDataToCholmodType(B, B_); + } + + void SpGEMMCpu::loadSumMatrix(matrix::Csr* D) + { + if (!D_) + { + D_ = allocateCholmodType(D); + } + copyDataToCholmodType(D, D_); + } + + void SpGEMMCpu::loadResultMatrix(matrix::Csr** E_ptr) + { + E_ptr_ = E_ptr; + } + + /** + * @brief Computes the result of the SpGEMM operation + * + * Does not store partial results for reuse, as is this is not supported + * by the CHOLMOD package. + */ + void SpGEMMCpu::compute() + { + cholmod_sparse* C_chol = cholmod_ssmult(B_, A_, 0, 1, 0, &Common_); + cholmod_sparse* E_chol = cholmod_add(C_chol, D_, &alpha_, &beta_, 1, 0, &Common_); + + if (!(*E_ptr_)) + { + *E_ptr_ = new matrix::Csr((index_type) E_chol->nrow, (index_type) E_chol->ncol, (index_type) E_chol->nzmax); + } + else + { + (*E_ptr_)->destroyMatrixData(memory::HOST); + } + + // Previous data must be de-allocated and new data copied + // Cholmod does not allow for reuse of arrays + (*E_ptr_)->copyDataFrom(static_cast(E_chol->p), + static_cast(E_chol->i), + static_cast(E_chol->x), + memory::HOST, + memory::HOST); + } + + /** + * @brief Allocates a CHOLMOD sparse matrix of the same size as the input CSR matrix + * @param A[in] - Pointer to CSR matrix + * @return Pointer to the allocated CHOLMOD sparse matrix + */ + cholmod_sparse* SpGEMMCpu::allocateCholmodType(matrix::Csr* A) + { + return cholmod_allocate_sparse((size_t) A->getNumRows(), + (size_t) A->getNumColumns(), + (size_t) A->getNnz(), + 1, + 1, + 0, + CHOLMOD_REAL, + &Common_); + } + + /** + * Copies the data from a CSR matrix to a CHOLMOD sparse matrix + * @param A[in] - Pointer to CSR matrix + * @param A_chol[in] - Pointer to CHOLMOD sparse matrix + */ + void SpGEMMCpu::copyDataToCholmodType(matrix::Csr* A, cholmod_sparse* A_chol) + { + mem_.copyArrayHostToHost( + static_cast(A_chol->p), A->getRowData(memory::HOST), A->getNumRows() + 1); + mem_.copyArrayHostToHost( + static_cast(A_chol->i), A->getColData(memory::HOST), A->getNnz()); + mem_.copyArrayHostToHost( + static_cast(A_chol->x), A->getValues(memory::HOST), A->getNnz()); + } + } // namespace hykkt +} // namespace ReSolve diff --git a/resolve/hykkt/spgemm/SpGEMMCpu.hpp b/resolve/hykkt/spgemm/SpGEMMCpu.hpp new file mode 100644 index 000000000..3db705546 --- /dev/null +++ b/resolve/hykkt/spgemm/SpGEMMCpu.hpp @@ -0,0 +1,43 @@ +#pragma once + +#include + +#include "SpGEMMImpl.hpp" + +namespace ReSolve +{ + using real_type = ReSolve::real_type; + + namespace hykkt + { + class SpGEMMCpu : public SpGEMMImpl + { + public: + SpGEMMCpu(real_type alpha, real_type beta); + ~SpGEMMCpu(); + + void loadProductMatrices(matrix::Csr* A, matrix::Csr* B); + void loadSumMatrix(matrix::Csr* D); + void loadResultMatrix(matrix::Csr** E_ptr); + + void compute(); + + private: + MemoryHandler mem_; + + real_type alpha_; + real_type beta_; + + cholmod_common Common_; + + cholmod_sparse* A_; + cholmod_sparse* B_; + cholmod_sparse* D_; + + matrix::Csr** E_ptr_; + + cholmod_sparse* allocateCholmodType(matrix::Csr* X); + void copyDataToCholmodType(matrix::Csr* X, cholmod_sparse* X_chol); + }; + } // namespace hykkt +} // namespace ReSolve diff --git a/resolve/hykkt/spgemm/SpGEMMCuda.cpp b/resolve/hykkt/spgemm/SpGEMMCuda.cpp new file mode 100644 index 000000000..e1283a20a --- /dev/null +++ b/resolve/hykkt/spgemm/SpGEMMCuda.cpp @@ -0,0 +1,301 @@ +/** + * @file SpGEMMCuda.cpp + * @author Adham Ibrahim (ibrahimas@ornl.gov) + * @brief Implementation of SpGEMM using cuSPARSE for GPU + */ + +#include "SpGEMMCuda.hpp" + +namespace ReSolve +{ + using real_type = ReSolve::real_type; + using out = ReSolve::io::Logger; + + namespace hykkt + { + SpGEMMCuda::SpGEMMCuda(real_type alpha, real_type beta) + : alpha_(alpha), beta_(beta) + { + cusparseCreate(&handle_); + cusparseSpGEMM_createDescr(&spgemm_desc_); + cusparseCreateMatDescr(&D_descr_); + cusparseSetMatType(D_descr_, CUSPARSE_MATRIX_TYPE_GENERAL); + cusparseSetMatIndexBase(D_descr_, CUSPARSE_INDEX_BASE_ZERO); + } + + SpGEMMCuda::~SpGEMMCuda() + { + cusparseSpGEMM_destroyDescr(spgemm_desc_); + cusparseDestroy(handle_); + } + + void SpGEMMCuda::loadProductMatrices(matrix::Csr* A, matrix::Csr* B) + { + A_descr_ = convertToCusparseType(A); + B_descr_ = convertToCusparseType(B); + + n_ = A->getNumRows(); + + if (!C_descr_) + { + mem_.allocateArrayOnDevice(&C_row_ptr_, (index_type) n_ + 1); + + cusparseCreateCsr(&C_descr_, + n_, + B->getNumColumns(), + 0, // nnz will be determined later + C_row_ptr_, + nullptr, + nullptr, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F); + } + } + + void SpGEMMCuda::loadSumMatrix(matrix::Csr* D) + { + D_ = D; + } + + void SpGEMMCuda::loadResultMatrix(matrix::Csr** E_ptr) + { + E_ptr_ = E_ptr; + } + + /** + * @brief Computes the result of the SpGEMM operation + * This uses both `cusparseSpGEMMreuse` and `cusparseXcsrgeam` to perform + * the product and sum in separate stages. This is required because + * cusparseSpGEMM does not handle the case where D has a different sparsity + * pattern than the product A * B + */ + void SpGEMMCuda::compute() + { + double beta_product = 0.0; + double alpha_sum = 1.0; + + if (!(*E_ptr_)) + { + size_t temp_buffer_size_1 = 0; + size_t temp_buffer_size_2 = 0; + size_t temp_buffer_size_3 = 0; + + void* temp_buffer_1 = nullptr; + void* temp_buffer_2 = nullptr; + void* temp_buffer_3 = nullptr; + + cusparseSpGEMMreuse_workEstimation(handle_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_descr_, + B_descr_, + C_descr_, + CUSPARSE_SPGEMM_DEFAULT, + spgemm_desc_, + &temp_buffer_size_1, + nullptr); + + mem_.allocateBufferOnDevice(&temp_buffer_1, temp_buffer_size_1); + + cusparseSpGEMMreuse_workEstimation(handle_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_descr_, + B_descr_, + C_descr_, + CUSPARSE_SPGEMM_DEFAULT, + spgemm_desc_, + &temp_buffer_size_1, + temp_buffer_1); + + cusparseSpGEMMreuse_nnz(handle_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_descr_, + B_descr_, + C_descr_, + CUSPARSE_SPGEMM_DEFAULT, + spgemm_desc_, + &temp_buffer_size_2, + nullptr, + &temp_buffer_size_3, + nullptr, + &buffer_size_4_, + nullptr); + + mem_.allocateBufferOnDevice(&temp_buffer_2, temp_buffer_size_2); + mem_.allocateBufferOnDevice(&temp_buffer_3, temp_buffer_size_3); + mem_.allocateBufferOnDevice(&buffer_4_, buffer_size_4_); + + cusparseSpGEMMreuse_nnz(handle_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_descr_, + B_descr_, + C_descr_, + CUSPARSE_SPGEMM_DEFAULT, + spgemm_desc_, + &temp_buffer_size_2, + temp_buffer_2, + &temp_buffer_size_3, + temp_buffer_3, + &buffer_size_4_, + buffer_4_); + + mem_.deleteOnDevice(temp_buffer_1); + mem_.deleteOnDevice(temp_buffer_2); + + int64_t C_num_cols = 0; + int64_t C_nnz_ = 0; + cusparseSpMatGetSize(C_descr_, &n_, &C_num_cols, &C_nnz_); + + mem_.allocateArrayOnDevice(&C_col_ind_, (index_type) C_nnz_); + mem_.allocateArrayOnDevice(&C_val_, (index_type) C_nnz_); + + cusparseCsrSetPointers(C_descr_, + C_row_ptr_, + C_col_ind_, + C_val_); + + cusparseSpGEMMreuse_copy(handle_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_descr_, + B_descr_, + C_descr_, + CUSPARSE_SPGEMM_DEFAULT, + spgemm_desc_, + &buffer_size_5_, + nullptr); + + mem_.allocateBufferOnDevice(&buffer_5_, buffer_size_5_); + + cusparseSpGEMMreuse_copy(handle_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_descr_, + B_descr_, + C_descr_, + CUSPARSE_SPGEMM_DEFAULT, + spgemm_desc_, + &buffer_size_5_, + buffer_5_); + + mem_.deleteOnDevice(temp_buffer_3); + } + + cusparseSpGEMMreuse_compute(handle_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha_, + A_descr_, + B_descr_, + &beta_product, + C_descr_, + CUDA_R_64F, + CUSPARSE_SPGEMM_DEFAULT, + spgemm_desc_); + + if (!(*E_ptr_)) + { + // Begin set up for addition + mem_.allocateArrayOnDevice(&E_row_ptr_, (index_type) n_ + 1); + cusparseSetPointerMode(handle_, CUSPARSE_POINTER_MODE_HOST); + + // pass in D_descr_ as a dummy variable + cusparseDcsrgeam2_bufferSizeExt(handle_, + D_->getNumRows(), + D_->getNumColumns(), + &alpha_sum, + D_descr_, + (int) C_nnz_, + C_val_, + C_row_ptr_, + C_col_ind_, + &beta_, + D_descr_, + D_->getNnz(), + D_->getValues(memory::DEVICE), + D_->getRowData(memory::DEVICE), + D_->getColData(memory::DEVICE), + D_descr_, + E_val_, + E_row_ptr_, + E_col_ind_, + &buffer_add_size_); + + mem_.allocateBufferOnDevice(&buffer_add_, buffer_add_size_); + + cusparseXcsrgeam2Nnz(handle_, + D_->getNumRows(), + D_->getNumColumns(), + D_descr_, + (int) C_nnz_, + C_row_ptr_, + C_col_ind_, + D_descr_, + D_->getNnz(), + D_->getRowData(memory::DEVICE), + D_->getColData(memory::DEVICE), + D_descr_, + E_row_ptr_, + &E_nnz_, + buffer_add_); + + mem_.allocateArrayOnDevice(&E_col_ind_, (index_type) E_nnz_); + mem_.allocateArrayOnDevice(&E_val_, (index_type) E_nnz_); + + (*E_ptr_) = new matrix::Csr((index_type) n_, D_->getNumColumns(), (index_type) E_nnz_); + (*E_ptr_)->setDataPointers(E_row_ptr_, E_col_ind_, E_val_, memory::DEVICE); + } + + cusparseDcsrgeam2(handle_, + D_->getNumRows(), + D_->getNumColumns(), + &alpha_sum, + D_descr_, + (int) C_nnz_, + C_val_, + C_row_ptr_, + C_col_ind_, + &beta_, + D_descr_, + D_->getNnz(), + D_->getValues(memory::DEVICE), + D_->getRowData(memory::DEVICE), + D_->getColData(memory::DEVICE), + D_descr_, + E_val_, + E_row_ptr_, + E_col_ind_, + buffer_add_); + + (*E_ptr_)->setUpdated(memory::DEVICE); + } + + /** + * @brief Converts a CSR matrix to a cuSPARSE sparse matrix descriptor + * @param A[in] - Pointer to CSR matrix + * @return cuSPARSE sparse matrix descriptor + */ + cusparseSpMatDescr_t SpGEMMCuda::convertToCusparseType(matrix::Csr* A) + { + cusparseSpMatDescr_t descr; + // TODO: this hardcodes the types but should be based on ReSolve types + cusparseCreateCsr(&descr, + A->getNumRows(), + A->getNumColumns(), + A->getNnz(), + A->getRowData(memory::DEVICE), + A->getColData(memory::DEVICE), + A->getValues(memory::DEVICE), + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F); + return descr; + } + } // namespace hykkt +} // namespace ReSolve diff --git a/resolve/hykkt/spgemm/SpGEMMCuda.hpp b/resolve/hykkt/spgemm/SpGEMMCuda.hpp new file mode 100644 index 000000000..8544a39c9 --- /dev/null +++ b/resolve/hykkt/spgemm/SpGEMMCuda.hpp @@ -0,0 +1,73 @@ +/** + * @file SpGEMMCuda.hpp + * @author Adham Ibrahim (ibrahimas@ornl.gov) + * @brief Header file for SpGEMM using cuSPARSE for GPU + */ + +#pragma once + +#include + +#include "SpGEMMImpl.hpp" + +namespace ReSolve +{ + using real_type = ReSolve::real_type; + + namespace hykkt + { + class SpGEMMCuda : public SpGEMMImpl + { + public: + SpGEMMCuda(real_type alpha, real_type beta); + ~SpGEMMCuda(); + + void loadProductMatrices(matrix::Csr* A, matrix::Csr* B); + void loadSumMatrix(matrix::Csr* D); + void loadResultMatrix(matrix::Csr** E_ptr); + + void compute(); + + private: + MemoryHandler mem_; + + real_type alpha_; + real_type beta_; + + matrix::Csr* D_ = nullptr; + matrix::Csr** E_ptr_ = nullptr; + + cusparseHandle_t handle_ = nullptr; + cusparseSpGEMMDescr_t spgemm_desc_ = nullptr; + + size_t buffer_size_4_ = 0; + size_t buffer_size_5_ = 0; + void* buffer_4_ = nullptr; + void* buffer_5_ = nullptr; + + size_t buffer_add_size_ = 0; + void* buffer_add_ = nullptr; + + cusparseSpMatDescr_t A_descr_ = nullptr; + cusparseSpMatDescr_t B_descr_ = nullptr; + cusparseSpMatDescr_t C_descr_ = nullptr; + + cusparseMatDescr_t D_descr_ = nullptr; + + int64_t n_ = 0; + int64_t C_nnz_ = 0; + + index_type* C_row_ptr_ = nullptr; + index_type* C_col_ind_ = nullptr; + real_type* C_val_ = nullptr; + + int E_nnz_ = 0; + + index_type* E_row_ptr_ = nullptr; + index_type* E_col_ind_ = nullptr; + real_type* E_val_ = nullptr; + + cusparseSpMatDescr_t convertToCusparseType(matrix::Csr* A); + }; + } // namespace hykkt +} // namespace ReSolve diff --git a/resolve/hykkt/spgemm/SpGEMMHip.cpp b/resolve/hykkt/spgemm/SpGEMMHip.cpp new file mode 100644 index 000000000..8350d43b2 --- /dev/null +++ b/resolve/hykkt/spgemm/SpGEMMHip.cpp @@ -0,0 +1,208 @@ +/** + * @file SpGEMMHip.cpp + * @author Adham Ibrahim (ibrahimas@ornl.gov) + * @brief Implementation of SpGEMM using rocSPARSE for GPU + */ + +#include "SpGEMMHip.hpp" + +namespace ReSolve +{ + using real_type = ReSolve::real_type; + using out = ReSolve::io::Logger; + + namespace hykkt + { + SpGEMMHip::SpGEMMHip(real_type alpha, real_type beta) + : alpha_(alpha), beta_(beta) + { + rocsparse_create_handle(&handle_); + } + + SpGEMMHip::~SpGEMMHip() + { + rocsparse_destroy_spmat_descr(A_descr_); + rocsparse_destroy_spmat_descr(B_descr_); + rocsparse_destroy_spmat_descr(D_descr_); + rocsparse_destroy_spmat_descr(E_descr_); + rocsparse_destroy_handle(handle_); + + mem_.deleteOnDevice(buffer_); + } + + void SpGEMMHip::loadProductMatrices(matrix::Csr* A, matrix::Csr* B) + { + if (A_descr_) + { + rocsparse_destroy_spmat_descr(A_descr_); + } + A_descr_ = convertToRocsparseType(A); + + if (B_descr_) + { + rocsparse_destroy_spmat_descr(B_descr_); + } + B_descr_ = convertToRocsparseType(B); + + E_num_rows_ = A->getNumRows(); + E_num_cols_ = B->getNumColumns(); + } + + void SpGEMMHip::loadSumMatrix(matrix::Csr* D) + { + if (D_descr_) + { + rocsparse_destroy_spmat_descr(D_descr_); + } + D_descr_ = convertToRocsparseType(D); + } + + void SpGEMMHip::loadResultMatrix(matrix::Csr** E_ptr) + { + if (!E_ptr_) + { + mem_.allocateArrayOnDevice(&E_row_ptr_, (index_type) E_num_rows_ + 1); + rocsparse_create_csr_descr(&E_descr_, + E_num_rows_, + E_num_cols_, + E_nnz_, + E_row_ptr_, + nullptr, + nullptr, + rocsparse_indextype_i32, + rocsparse_indextype_i32, + rocsparse_index_base_zero, + rocsparse_datatype_f64_r); + } + E_ptr_ = E_ptr; + } + + /** + * @brief Computes the result of the SpGEMM operation + * This uses `rocsparse_spgemm` with the symbolic and numeric stages + * separated to allow for efficient reuse. + */ + void SpGEMMHip::compute() + { + rocsparse_status status; + if (!buffer_) // first computation + { + // Determine buffer size and allocate + status = rocsparse_spgemm(handle_, + rocsparse_operation_none, + rocsparse_operation_none, + &alpha_, + A_descr_, + B_descr_, + &beta_, + D_descr_, + E_descr_, + rocsparse_datatype_f64_r, + rocsparse_spgemm_alg_default, + rocsparse_spgemm_stage_buffer_size, + &buffer_size_, + nullptr); + if (status != rocsparse_status_success) + { + out::error() << "Failed to determine buffer size. Status: " << status << "\n"; + } + mem_.allocateBufferOnDevice(&buffer_, buffer_size_); + + // Determine number of nonzeros in result + status = rocsparse_spgemm(handle_, + rocsparse_operation_none, + rocsparse_operation_none, + &alpha_, + A_descr_, + B_descr_, + &beta_, + D_descr_, + E_descr_, + rocsparse_datatype_f64_r, + rocsparse_spgemm_alg_default, + rocsparse_spgemm_stage_nnz, + &buffer_size_, + buffer_); + if (status != rocsparse_status_success) + { + out::error() << "Failed to determine number of nonzeros. Status: " << status << "\n"; + } + + rocsparse_spmat_get_size(E_descr_, &E_num_rows_, &E_num_cols_, &E_nnz_); + + mem_.allocateArrayOnDevice(&E_col_ind_, (index_type) E_nnz_); + mem_.allocateArrayOnDevice(&E_val_, (index_type) E_nnz_); + + rocsparse_csr_set_pointers(E_descr_, E_row_ptr_, E_col_ind_, E_val_); + + *E_ptr_ = new matrix::Csr((index_type) E_num_rows_, (index_type) E_num_cols_, (index_type) E_nnz_); + (*E_ptr_)->setDataPointers(E_row_ptr_, E_col_ind_, E_val_, memory::DEVICE); + + // Fill the column indices of the result, the values will be computed next + status = rocsparse_spgemm(handle_, + rocsparse_operation_none, + rocsparse_operation_none, + &alpha_, + A_descr_, + B_descr_, + &beta_, + D_descr_, + E_descr_, + rocsparse_datatype_f64_r, + rocsparse_spgemm_alg_default, + rocsparse_spgemm_stage_symbolic, + &buffer_size_, + buffer_); + if (status != rocsparse_status_success) + { + out::error() << "Failed to perform symbolic stage. Status: " << status << "\n"; + } + } + + // SpGEMM numeric computation + status = rocsparse_spgemm(handle_, + rocsparse_operation_none, + rocsparse_operation_none, + &alpha_, + A_descr_, + B_descr_, + &beta_, + D_descr_, + E_descr_, + rocsparse_datatype_f64_r, + rocsparse_spgemm_alg_default, + rocsparse_spgemm_stage_numeric, + &buffer_size_, + buffer_); + if (status != rocsparse_status_success) + { + out::error() << "Failed to perform numeric stage. Status: " << status << "\n"; + } + + (*E_ptr_)->setUpdated(memory::DEVICE); + } + + /** + * @brief Converts a CSR matrix to a rocSPARSE sparse matrix descriptor + * @param A[in] - Pointer to CSR matrix + * @return rocSPARSE sparse matrix descriptor + */ + rocsparse_spmat_descr SpGEMMHip::convertToRocsparseType(matrix::Csr* A) + { + rocsparse_spmat_descr descr; + // TODO: this hardcodes the types but should be based on ReSolve types + rocsparse_create_csr_descr(&descr, + A->getNumRows(), + A->getNumColumns(), + A->getNnz(), + A->getRowData(memory::DEVICE), + A->getColData(memory::DEVICE), + A->getValues(memory::DEVICE), + rocsparse_indextype_i32, + rocsparse_indextype_i32, + rocsparse_index_base_zero, + rocsparse_datatype_f64_r); + return descr; + } + } // namespace hykkt +} // namespace ReSolve diff --git a/resolve/hykkt/spgemm/SpGEMMHip.hpp b/resolve/hykkt/spgemm/SpGEMMHip.hpp new file mode 100644 index 000000000..00627cd32 --- /dev/null +++ b/resolve/hykkt/spgemm/SpGEMMHip.hpp @@ -0,0 +1,60 @@ +/** + * @file SpGEMMHip.hpp + * @author Adham Ibrahim (ibrahimas@ornl.gov) + * @brief Header file for SpGEMM using rocSPARSE for GPU + */ + +#pragma once + +#include + +#include "SpGEMMImpl.hpp" + +namespace ReSolve +{ + using real_type = ReSolve::real_type; + + namespace hykkt + { + class SpGEMMHip : public SpGEMMImpl + { + public: + SpGEMMHip(real_type alpha, real_type beta); + ~SpGEMMHip(); + + void loadProductMatrices(matrix::Csr* A, matrix::Csr* B); + void loadSumMatrix(matrix::Csr* D); + void loadResultMatrix(matrix::Csr** E_ptr); + + void compute(); + + private: + MemoryHandler mem_; + + real_type alpha_; + real_type beta_; + + rocsparse_handle handle_ = nullptr; + rocsparse_spmat_descr A_descr_ = nullptr; + rocsparse_spmat_descr B_descr_ = nullptr; + rocsparse_spmat_descr D_descr_ = nullptr; + + long E_num_rows_ = 0; + long E_num_cols_ = 0; + long E_nnz_ = 0; + + index_type* E_row_ptr_ = nullptr; + index_type* E_col_ind_ = nullptr; + real_type* E_val_ = nullptr; + + rocsparse_spmat_descr E_descr_ = nullptr; + + matrix::Csr** E_ptr_ = nullptr; + + size_t buffer_size_ = 0; + void* buffer_ = nullptr; + + rocsparse_spmat_descr convertToRocsparseType(matrix::Csr* A); + }; + } // namespace hykkt +} // namespace ReSolve diff --git a/resolve/hykkt/spgemm/SpGEMMImpl.hpp b/resolve/hykkt/spgemm/SpGEMMImpl.hpp new file mode 100644 index 000000000..ee8575dbf --- /dev/null +++ b/resolve/hykkt/spgemm/SpGEMMImpl.hpp @@ -0,0 +1,33 @@ +/** + * @file SpGEMMImpl.hpp + * @author Adham Ibrahim (ibrahimas@ornl.gov) + * @brief Interface for SpGEMM implementations + */ + +#pragma once + +#include +#include +#include +#include + +namespace ReSolve +{ + using real_type = ReSolve::real_type; + + namespace hykkt + { + class SpGEMMImpl + { + public: + SpGEMMImpl() = default; + virtual ~SpGEMMImpl() = default; + + virtual void loadProductMatrices(matrix::Csr* A, matrix::Csr* B) = 0; + virtual void loadSumMatrix(matrix::Csr* D) = 0; + virtual void loadResultMatrix(matrix::Csr** E_ptr) = 0; + + virtual void compute() = 0; + }; + } // namespace hykkt +} // namespace ReSolve diff --git a/tests/unit/hykkt/CMakeLists.txt b/tests/unit/hykkt/CMakeLists.txt index 34296b2b3..161c8f6fa 100644 --- a/tests/unit/hykkt/CMakeLists.txt +++ b/tests/unit/hykkt/CMakeLists.txt @@ -9,11 +9,11 @@ # Build HyKKT tests add_executable(runHykktPermutationTests.exe runHykktPermutationTests.cpp) - add_executable(runHykktRuizScalingTests.exe runHykktRuizScalingTests.cpp) +add_executable(runHykktSpGEMMTests.exe runHykktSpGEMMTests.cpp) target_link_libraries(runHykktPermutationTests.exe PRIVATE resolve_hykkt resolve_matrix) target_link_libraries(runHykktRuizScalingTests.exe PRIVATE resolve_hykkt resolve_hykkt_ruiz resolve_matrix) - +target_link_libraries(runHykktSpGEMMTests.exe PRIVATE resolve_hykkt resolve_hykkt_spgemm resolve_matrix) add_executable(runHykktCholeskyTests.exe runHykktCholeskyTests.cpp) target_link_libraries(runHykktCholeskyTests.exe PRIVATE resolve_hykkt_chol resolve_matrix resolve_vector) @@ -21,7 +21,8 @@ target_link_libraries(runHykktCholeskyTests.exe PRIVATE resolve_hykkt_chol resol # Install tests set(installable_tests runHykktPermutationTests.exe runHykktRuizScalingTests.exe - runHykktCholeskyTests.exe) + runHykktCholeskyTests.exe + runHykktSpGEMMTests.exe) install(TARGETS ${installable_tests} RUNTIME DESTINATION bin/resolve/tests/unit) @@ -29,3 +30,4 @@ install(TARGETS ${installable_tests} add_test(NAME hykkt_perm_test COMMAND $) add_test(NAME hykkt_ruiz_test COMMAND $) add_test(NAME hykkt_chol_test COMMAND $) +add_test(NAME hykkt_spgemm_test COMMAND $) diff --git a/tests/unit/hykkt/HykktSpGEMMTests.hpp b/tests/unit/hykkt/HykktSpGEMMTests.hpp new file mode 100644 index 000000000..d10b0c368 --- /dev/null +++ b/tests/unit/hykkt/HykktSpGEMMTests.hpp @@ -0,0 +1,414 @@ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace ReSolve +{ + namespace tests + { + /** + * @brief Tests for class hykkt::SpGEMM + * + */ + class HykktSpGEMMTests : public TestBase + { + public: + HykktSpGEMMTests(memory::MemorySpace memspace) + : memspace_(memspace) + { + } + + virtual ~HykktSpGEMMTests() + { + } + + /** + * @brief Test the solver on a minimal example + * + * @return TestOutcome the outcome of the test + */ + TestOutcome minimalCorrectness() + { + TestStatus status; + std::string testname(__func__); + + hykkt::SpGEMM spgemm(memspace_, 1.0, 1.0); + + matrix::Csr* A = nullptr; + matrix::Csr* B = nullptr; + matrix::Csr* D = nullptr; + + generateExampleData(&A, &B, &D); + + matrix::Csr* E = nullptr; + + spgemm.loadProductMatrices(A, B); + spgemm.loadSumMatrix(D); + spgemm.loadResultMatrix(&E); + spgemm.compute(); + + if (memspace_ == memory::DEVICE) + { + E->syncData(memory::HOST); + } + + status *= verifyResult(E, 1.0); + + delete A; + delete B; + delete D; + delete E; + + return status.report(testname.c_str()); + } + + /** + * @brief Generate an n by n example with known solution + * + * Generates lower bidiagonal A, upper bidiagonal B, and D with + * ones on upper diagonal. + * + * @param[in] n - The size of the test + */ + TestOutcome symbolic(index_type n) + { + TestStatus status; + std::string testname(__func__); + testname += ", n = " + std::to_string(n); + + hykkt::SpGEMM spgemm(memspace_, 1.0, 1.0); + + index_type nnz = 2 * n - 1; + + index_type* A_row_ptr = new index_type[n + 1]; + index_type* A_col_ind = new index_type[nnz]; + real_type* A_values = new real_type[nnz]; + + index_type* B_row_ptr = new index_type[n + 1]; + index_type* B_col_ind = new index_type[nnz]; + real_type* B_values = new real_type[nnz]; + + index_type* D_row_ptr = new index_type[n + 1]; + index_type* D_col_ind = new index_type[n - 2]; + real_type* D_values = new real_type[n - 2]; + + A_row_ptr[0] = 0; + A_row_ptr[1] = 1; + A_col_ind[0] = 0; + A_values[0] = 1.0; + + // A = [1 ] + // [2 1 ] + // [ 3 1 ] + // [ ... ] + for (index_type i = 1; i < n; i++) + { + A_col_ind[2 * i - 1] = i - 1; + A_values[2 * i - 1] = i + 1; + + A_col_ind[2 * i] = i; + A_values[2 * i] = 1.0; + + A_row_ptr[i + 1] = 2 + A_row_ptr[i]; + } + + // B = [1 2 ] + // [ 1 3 ] + // [ 1 ] + // [ ... ] + B_row_ptr[0] = 0; + for (index_type i = 0; i < n; i++) + { + B_col_ind[2 * i] = i; + B_values[2 * i] = 1.0; + + if (i < n - 1) + { + B_col_ind[2 * i + 1] = i + 1; + B_values[2 * i + 1] = i + 2; + B_row_ptr[i + 1] = 2 + B_row_ptr[i]; + } + else + { + B_row_ptr[i + 1] = 1 + B_row_ptr[i]; + } + } + + // D = [0 0 1 ] + // [ 0 0 1 ] + // [ 0 0 1] + // [ ... ] + D_row_ptr[0] = 0; + for (index_type i = 0; i < n - 2; i++) + { + D_col_ind[i] = i + 2; + D_values[i] = 1.0; + D_row_ptr[i + 1] = 1 + D_row_ptr[i]; + } + D_row_ptr[n - 1] = D_row_ptr[n - 2]; + D_row_ptr[n] = D_row_ptr[n - 1]; + + matrix::Csr* A = new matrix::Csr(n, n, nnz); + matrix::Csr* B = new matrix::Csr(n, n, nnz); + matrix::Csr* D = new matrix::Csr(n, n, n - 2); + + A->copyDataFrom(A_row_ptr, A_col_ind, A_values, memory::HOST, memspace_); + B->copyDataFrom(B_row_ptr, B_col_ind, B_values, memory::HOST, memspace_); + D->copyDataFrom(D_row_ptr, D_col_ind, D_values, memory::HOST, memspace_); + + matrix::Csr* E = nullptr; + + spgemm.loadProductMatrices(A, B); + spgemm.loadSumMatrix(D); + spgemm.loadResultMatrix(&E); + spgemm.compute(); + + if (memspace_ == memory::DEVICE) + { + E->syncData(memory::HOST); + } + + double tol = 1e-6; + if (fabs(E->getValues(memory::HOST)[0] - 1.0) > tol) + { + std::cerr << "Test failed: E[0][0] = " << E->getValues(memory::HOST)[0] << ", expected: 1.0\n"; + status *= false; + } + + if (fabs(E->getValues(memory::HOST)[1] - 2.0) > tol) + { + std::cerr << "Test failed: E[0][1] = " << E->getValues(memory::HOST)[1] << ", expected: 2.0\n"; + status *= false; + } + + if (fabs(E->getValues(memory::HOST)[2] - 1.0) > tol) + { + std::cerr << "Test failed: E[0][2] = " << E->getValues(memory::HOST)[2] << ", expected: 1.0\n"; + status *= false; + } + + index_type j = 3; + for (index_type i = 1; i < n; i++) + { + if (fabs(E->getValues(memory::HOST)[j] - (i + 1)) > tol) + { + std::cerr << "Test failed: E[" << i << "][" << i - 1 << "] = " << E->getValues(memory::HOST)[j] << ", expected: " << (i + 1) << "\n"; + status *= false; + } + j++; + + if (fabs(E->getValues(memory::HOST)[j] - (1 + (i + 1) * (i + 1))) > tol) + { + std::cerr << "Test failed: E[" << i << "][" << i << "] = " << E->getValues(memory::HOST)[j] << ", expected: " << (1 + (i + 1) * (i + 1)) << "\n"; + status *= false; + } + j++; + + if (i < n - 1 && fabs(E->getValues(memory::HOST)[j] - (i + 2)) > tol) + { + std::cerr << "Test failed: E[" << i << "][" << i + 1 << "] = " << E->getValues(memory::HOST)[j] << ", expected: " << (i + 2) << "\n"; + status *= false; + } + if (i < n - 1) + j++; + + if (i < n - 2 && fabs(E->getValues(memory::HOST)[j] - (1.0)) > tol) + { + std::cerr << "Test failed: E[" << i << "][" << i + 2 << "] = " << E->getValues(memory::HOST)[j] << ", expected: " << (1.0) << "\n"; + status *= false; + } + if (i < n - 2) + j++; + } + + delete[] A_row_ptr; + delete[] A_col_ind; + delete[] A_values; + + delete[] B_row_ptr; + delete[] B_col_ind; + delete[] B_values; + + delete[] D_row_ptr; + delete[] D_col_ind; + delete[] D_values; + + delete A; + delete B; + delete D; + delete E; + + return status.report(testname.c_str()); + } + + TestOutcome reuse() + { + TestStatus status; + std::string testname(__func__); + + hykkt::SpGEMM spgemm(memspace_, 2.0, 2.0); + + matrix::Csr* A = nullptr; + matrix::Csr* B = nullptr; + matrix::Csr* D = nullptr; + + generateExampleData(&A, &B, &D); + + matrix::Csr* E = nullptr; + + spgemm.loadProductMatrices(A, B); + spgemm.loadSumMatrix(D); + spgemm.loadResultMatrix(&E); + spgemm.compute(); + + if (memspace_ == memory::DEVICE) + { + E->syncData(memory::HOST); + + A->syncData(memory::HOST); + D->syncData(memory::HOST); + } + + status *= verifyResult(E, 2.0); + + for (index_type j = 0; j < A->getNnz(); j++) + { + A->getValues(memory::HOST)[j] *= 2.0; + } + for (index_type j = 0; j < D->getNnz(); j++) + { + D->getValues(memory::HOST)[j] *= 2.0; + } + A->setUpdated(memory::HOST); + D->setUpdated(memory::HOST); + + if (memspace_ == memory::DEVICE) + { + A->syncData(memory::DEVICE); + D->syncData(memory::DEVICE); + } + + spgemm.loadProductMatrices(A, B); + spgemm.loadSumMatrix(D); + spgemm.compute(); + + if (memspace_ == memory::DEVICE) + { + E->syncData(memory::HOST); + } + + status *= verifyResult(E, 4.0); + + delete A; + delete B; + delete D; + delete E; + + return status.report(testname.c_str()); + } + + private: + memory::MemorySpace memspace_; + + void generateExampleData(matrix::Csr** A, matrix::Csr** B, matrix::Csr** D) + { + index_type* A_row_ptr = new index_type[4]{0, 1, 3, 5}; + index_type* A_col_ind = new index_type[5]{0, 1, 2, 0, 2}; + real_type* A_values = new real_type[5]{1.0, 2.0, -4.0, 5.0, 3.0}; + + index_type* B_row_ptr = new index_type[4]{0, 1, 3, 5}; + index_type* B_col_ind = new index_type[5]{0, 0, 1, 1, 2}; + real_type* B_values = new real_type[5]{-1.0, -2.0, 3.0, 2.0, 4.0}; + + index_type* D_row_ptr = new index_type[4]{0, 2, 4, 6}; + index_type* D_col_ind = new index_type[6]{0, 2, 0, 1, 1, 2}; + real_type* D_values = new real_type[6]{3.0, -4.0, -2.0, 1.0, 5.0, 7.0}; + + *A = new matrix::Csr(3, 3, 5); + *B = new matrix::Csr(3, 3, 5); + *D = new matrix::Csr(3, 3, 6); + + (*A)->copyDataFrom(A_row_ptr, A_col_ind, A_values, memory::HOST, memspace_); + (*B)->copyDataFrom(B_row_ptr, B_col_ind, B_values, memory::HOST, memspace_); + (*D)->copyDataFrom(D_row_ptr, D_col_ind, D_values, memory::HOST, memspace_); + + delete[] A_row_ptr; + delete[] A_col_ind; + delete[] A_values; + + delete[] B_row_ptr; + delete[] B_col_ind; + delete[] B_values; + + delete[] D_row_ptr; + delete[] D_col_ind; + delete[] D_values; + } + + bool verifyResult(matrix::Csr* E, real_type multiplier) + { + bool is_correct = true; + + index_type* E_row_ptr_expected = new index_type[4]{0, 2, 5, 8}; + index_type* E_col_ind_expected = new index_type[8]{0, 2, 0, 1, 2, 0, 1, 2}; + real_type* E_values_expected = new real_type[8]{2.0, -4.0, -6.0, -1.0, -16, -5.0, 11, 19}; + + for (index_type i = 0; i < 8; i++) + { + E_values_expected[i] *= multiplier; + } + + real_type tol = 1e-8; + for (index_type j = 0; j < E->getNumRows() + 1; j++) + { + if (fabs(E->getRowData(memory::HOST)[j] - E_row_ptr_expected[j]) > tol) + { + std::cout << "Row pointer mismatch at index " << j << ": got " + << E->getRowData(memory::HOST)[j] << ", expected " << E_row_ptr_expected[j] << "\n"; + is_correct = false; + } + } + + for (index_type j = 0; j < E->getNnz(); j++) + { + if (fabs(E->getColData(memory::HOST)[j] - E_col_ind_expected[j]) > tol) + { + std::cout << "Column index mismatch at index " << j << ": got " + << E->getColData(memory::HOST)[j] << ", expected " << E_col_ind_expected[j] << "\n"; + is_correct = false; + } + } + + for (index_type j = 0; j < E->getNnz(); j++) + { + if (fabs(E->getValues(memory::HOST)[j] - E_values_expected[j]) > tol) + { + std::cout << "Value mismatch at index " << j << ": got " + << E->getValues(memory::HOST)[j] << ", expected " << E_values_expected[j] << "\n"; + is_correct = false; + } + } + + delete[] E_row_ptr_expected; + delete[] E_col_ind_expected; + delete[] E_values_expected; + + return is_correct; + } + + }; // class HykktCholeskyTests + } // namespace tests +} // namespace ReSolve diff --git a/tests/unit/hykkt/runHykktSpGEMMTests.cpp b/tests/unit/hykkt/runHykktSpGEMMTests.cpp new file mode 100644 index 000000000..d257110f1 --- /dev/null +++ b/tests/unit/hykkt/runHykktSpGEMMTests.cpp @@ -0,0 +1,60 @@ +/** + * @file runHykktSpGEMMScalingTests.hpp + * @author Adham Ibrahim (ibrahimas@ornl.gov) + * @brief Tests for class hykkt::SpGEMM + * + */ +#include +#include +#include + +#include +#include +#ifdef RESOLVE_USE_CUDA +#include +#endif +#ifdef RESOLVE_USE_HIP +#include +#endif + +#include "HykktSpGEMMTests.hpp" + +/** + * @brief Run tests with a given backend + * + * @param backend - string name of the hardware backend + * @param result - test results + */ +template +void runTests(const std::string& backend, ReSolve::memory::MemorySpace memspace, ReSolve::tests::TestingResults& result) +{ + std::cout << "Running tests on " << backend << " device:\n"; + + WorkspaceType workspace; + workspace.initializeHandles(); + + ReSolve::tests::HykktSpGEMMTests test(memspace); + + result += test.minimalCorrectness(); + result += test.symbolic(10); + result += test.symbolic(1000); + result += test.reuse(); + + std::cout << "\n"; +} + +int main(int, char**) +{ + ReSolve::tests::TestingResults result; + runTests("CPU", ReSolve::memory::HOST, result); + +#ifdef RESOLVE_USE_CUDA + runTests("CUDA", ReSolve::memory::DEVICE, result); +#endif + +#ifdef RESOLVE_USE_HIP + runTests("HIP", ReSolve::memory::DEVICE, result); +#endif + + return result.summary(); +}