Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions resolve/hykkt/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
add_subdirectory(permutation)
add_subdirectory(ruiz)
add_subdirectory(cholesky)
add_subdirectory(spgemm)
46 changes: 46 additions & 0 deletions resolve/hykkt/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# HyKKT

## Description
HyKKT (pronounced as _hiked_) is a package for solving systems of equations and unknowns resulting from an iterative solution of an optimization
problem, for example optimal power flow, which uses hardware accelerators (GPUs) efficiently.

The HyKKT package contains a linear solver tailored for Karush Kuhn Tucker (KKT) linear systems and
deployment on hardware accelerator hardware such as GPUs. The solver requires
all blocks of the $4\times 4$ block system:

```math
\begin{bmatrix}
H + D_x & 0 & J_c^T & J_d^T \\
0 & D_s & 0 & -I \\
J_c & 0 & 0 & 0 \\
J_d & -I & 0 & 0
\end{bmatrix}
\begin{bmatrix}
\Delta x \\ \Delta s \\ \Delta y_c \\ \Delta y_d
\end{bmatrix} =
\begin{bmatrix}
\tilde{r}_x \\ r_s \\ r_c \\ r_d
\end{bmatrix}
```

separately and solves the system to a desired
numerical precision exactly via block reduction and conjugate gradient on the
schur complement. Please see the [HyKKT paper](https://www.tandfonline.com/doi/abs/10.1080/10556788.2022.2124990) for mathematical details.

## Integration within ReSolve

HyKKT code within ReSolve is not yet fully integrated. The code is currently experimental, but is actively in development.
The main issue is the requirement that the user pass matrix and vector blocks, rather than the constructed KKT system.
This is currently not available within ReSolve as it is specific to HyKKT and KKT matrices.
TODO:
- Work with AMD to fix bug in Cholesky factorization.
- Allow the representation of a matrix or vector by blocks within ReSolve.
- Schur Compliment Conjugate Gradient within HyKKT.
- Complete HyKKT integration within ReSolve.


## Dependencies

Most HyKKT dependencies are identical to ReSolve. Exceptions are:
- HyKKT does not require KLU
- For AMD builds, HyKKT requires HIP/ROCm >= 6.4 due to a known bug in SpGEMM.
56 changes: 56 additions & 0 deletions resolve/hykkt/spgemm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#[[

@brief Build SpGEMM module for HyKKT solver

@author Adham Ibrahim <ibrahimas@ornl.gov>

]]

# Add source files for the SpGEMM module
set(SPGEMM_SRC
SpGEMM.cpp
SpGEMMCpu.cpp
)
set(SPGEMM_HEADER_INSTALL
SpGEMM.hpp
SpGEMMImpl.hpp
SpGEMMCpu.hpp
SpGEMMCuda.hpp
SpGEMMHip.hpp
)

set(SPGEMM_CUDA_SRC
SpGEMMCuda.cpp
)

set(SPGEMM_ROCM_SRC
SpGEMMHip.cpp
)

# Build shared library ReSolve::hykkt
add_library(resolve_hykkt_spgemm SHARED ${SPGEMM_SRC})

target_link_libraries(resolve_hykkt_spgemm PRIVATE resolve_logger ${suitesparse_cholmod})
target_include_directories(resolve_hykkt_spgemm PUBLIC ${SUITESPARSE_INCLUDE_DIR})

if (RESOLVE_USE_CUDA)
target_sources(resolve_hykkt_spgemm PRIVATE ${SPGEMM_CUDA_SRC})
target_link_libraries(resolve_hykkt_spgemm PUBLIC resolve_backend_cuda)
endif()

if (RESOLVE_USE_HIP)
target_sources(resolve_hykkt_spgemm PRIVATE ${SPGEMM_ROCM_SRC})
target_link_libraries(resolve_hykkt_spgemm PUBLIC resolve_backend_hip)
endif()

# Link to dummy device backend if GPU support is not enabled
if (NOT RESOLVE_USE_GPU)
target_link_libraries(resolve_hykkt_spgemm PUBLIC resolve_backend_cpu)
endif()

target_include_directories(resolve_hykkt_spgemm INTERFACE
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
$<INSTALL_INTERFACE:include>
)

install(FILES ${SPGEMM_HEADER_INSTALL} DESTINATION include/resolve/hykkt)
93 changes: 93 additions & 0 deletions resolve/hykkt/spgemm/SpGEMM.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/**
* @file SpGEMM.cpp
* @author Adham Ibrahim (ibrahimas@ornl.gov)
*/

#include "SpGEMM.hpp"

#include "SpGEMMCpu.hpp"
#ifdef RESOLVE_USE_CUDA
#include "SpGEMMCuda.hpp"
#elif defined(RESOLVE_USE_HIP)
#include "SpGEMMHip.hpp"
#endif

namespace ReSolve
{
using real_type = ReSolve::real_type;
using out = ReSolve::io::Logger;

namespace hykkt
{
/**
* Constructor for SpGEMM
* @param memspace[in] - Memory space for computation.
* @param alpha[in] - Scalar multiplier for the product.
* @param beta[in] - Scalar multiplier for the sum.
*/
SpGEMM::SpGEMM(memory::MemorySpace memspace, real_type alpha, real_type beta)
: memspace_(memspace)
{
if (memspace_ == memory::HOST)
{
impl_ = new SpGEMMCpu(alpha, beta);
}
else
{
#ifdef RESOLVE_USE_CUDA
impl_ = new SpGEMMCuda(alpha, beta);
#elif defined(RESOLVE_USE_HIP)
impl_ = new SpGEMMHip(alpha, beta);
#else
out::error() << "No GPU support enabled, and memory space set to DEVICE.\n";
exit(1);
#endif
}
}

/**
* Destructor for SpGEMM
*/
SpGEMM::~SpGEMM()
{
delete impl_;
}

/**
* Loads the two matrices for the product
* @param A[in] - Pointer to CSR matrix
* @param B[in] - Pointer to CSR matrix
*/
void SpGEMM::loadProductMatrices(matrix::Csr* A, matrix::Csr* B)
{
impl_->loadProductMatrices(A, B);
}

/**
* Loads the sum matrix for the operation
* @param D[in] - Pointer to CSR matrix
*/
void SpGEMM::loadSumMatrix(matrix::Csr* D)
{
impl_->loadSumMatrix(D);
}

/**
* Loads the result matrix
* @param E[in] - Pointer to pointer to CSR matrix
*/
void SpGEMM::loadResultMatrix(matrix::Csr** E_ptr)
{
impl_->loadResultMatrix(E_ptr);
}

/**
* Computes the result of the SpGEMM operation
* E = alpha * A * B + beta * D
*/
void SpGEMM::compute()
{
impl_->compute();
}
} // namespace hykkt
} // namespace ReSolve
37 changes: 37 additions & 0 deletions resolve/hykkt/spgemm/SpGEMM.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/**
* @file SpGEMM.hpp
* @author Adham Ibrahim (ibrahimas@ornl.gov)
*/

#pragma once
#include "SpGEMMImpl.hpp"
#include <resolve/Common.hpp>
#include <resolve/MemoryUtils.hpp>
#include <resolve/matrix/Csr.hpp>

namespace ReSolve
{
using real_type = ReSolve::real_type;

namespace hykkt
{
class SpGEMM
{
public:
// computes E = alpha * A * B + beta * D
SpGEMM(memory::MemorySpace memspace, real_type alpha, real_type beta);
~SpGEMM();

void loadProductMatrices(matrix::Csr* A, matrix::Csr* B);
void loadSumMatrix(matrix::Csr* D);
void loadResultMatrix(matrix::Csr** E_ptr);

void compute();

private:
memory::MemorySpace memspace_;

SpGEMMImpl* impl_;
};
} // namespace hykkt
} // namespace ReSolve
139 changes: 139 additions & 0 deletions resolve/hykkt/spgemm/SpGEMMCpu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/**
* @file SpGEMMCpu.cpp
* @author Adham Ibrahim (ibrahimas@ornl.gov)
* @brief Implementation of SpGEMM using CHOLMOD for CPU
*/

#include "SpGEMMCpu.hpp"

namespace ReSolve
{
using real_type = ReSolve::real_type;

namespace hykkt
{
/**
* Constructor for SpGEMMCpu
* @param alpha[in] - Scalar multiplier for the product.
* @param beta[in] - Scalar multiplier for the sum.
*/
SpGEMMCpu::SpGEMMCpu(real_type alpha, real_type beta)
: alpha_(alpha), beta_(beta)
{
cholmod_start(&Common_);

A_ = nullptr;
B_ = nullptr;
D_ = nullptr;
}

/**
* Destructor for SpGEMMCpu
*/
SpGEMMCpu::~SpGEMMCpu()
{
if (A_)
{
cholmod_free_sparse(&A_, &Common_);
}
if (B_)
{
cholmod_free_sparse(&B_, &Common_);
}
if (D_)
{
cholmod_free_sparse(&D_, &Common_);
}
cholmod_finish(&Common_);
}

void SpGEMMCpu::loadProductMatrices(matrix::Csr* A, matrix::Csr* B)
{
if (!A_)
{
A_ = allocateCholmodType(A);
}
if (!B_)
{
B_ = allocateCholmodType(B);
}
copyDataToCholmodType(A, A_);
copyDataToCholmodType(B, B_);
}

void SpGEMMCpu::loadSumMatrix(matrix::Csr* D)
{
if (!D_)
{
D_ = allocateCholmodType(D);
}
copyDataToCholmodType(D, D_);
}

void SpGEMMCpu::loadResultMatrix(matrix::Csr** E_ptr)
{
E_ptr_ = E_ptr;
}

/**
* @brief Computes the result of the SpGEMM operation
*
* Does not store partial results for reuse, as is this is not supported
* by the CHOLMOD package.
*/
void SpGEMMCpu::compute()
{
cholmod_sparse* C_chol = cholmod_ssmult(B_, A_, 0, 1, 0, &Common_);
cholmod_sparse* E_chol = cholmod_add(C_chol, D_, &alpha_, &beta_, 1, 0, &Common_);

if (!(*E_ptr_))
{
*E_ptr_ = new matrix::Csr((index_type) E_chol->nrow, (index_type) E_chol->ncol, (index_type) E_chol->nzmax);
}
else
{
(*E_ptr_)->destroyMatrixData(memory::HOST);
}

// Previous data must be de-allocated and new data copied
// Cholmod does not allow for reuse of arrays
(*E_ptr_)->copyDataFrom(static_cast<index_type*>(E_chol->p),
static_cast<index_type*>(E_chol->i),
static_cast<real_type*>(E_chol->x),
memory::HOST,
memory::HOST);
}

/**
* @brief Allocates a CHOLMOD sparse matrix of the same size as the input CSR matrix
* @param A[in] - Pointer to CSR matrix
* @return Pointer to the allocated CHOLMOD sparse matrix
*/
cholmod_sparse* SpGEMMCpu::allocateCholmodType(matrix::Csr* A)
{
return cholmod_allocate_sparse((size_t) A->getNumRows(),
(size_t) A->getNumColumns(),
(size_t) A->getNnz(),
1,
1,
0,
CHOLMOD_REAL,
&Common_);
}

/**
* Copies the data from a CSR matrix to a CHOLMOD sparse matrix
* @param A[in] - Pointer to CSR matrix
* @param A_chol[in] - Pointer to CHOLMOD sparse matrix
*/
void SpGEMMCpu::copyDataToCholmodType(matrix::Csr* A, cholmod_sparse* A_chol)
{
mem_.copyArrayHostToHost(
static_cast<int*>(A_chol->p), A->getRowData(memory::HOST), A->getNumRows() + 1);
mem_.copyArrayHostToHost(
static_cast<int*>(A_chol->i), A->getColData(memory::HOST), A->getNnz());
mem_.copyArrayHostToHost(
static_cast<double*>(A_chol->x), A->getValues(memory::HOST), A->getNnz());
}
} // namespace hykkt
} // namespace ReSolve
Loading
Loading