Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 20 additions & 20 deletions cpp/src/dual_simplex/barrier.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1197,12 +1197,13 @@ class iteration_data_t {
// v = alpha * A * Dinv * A^T * y + beta * v
void gpu_adat_multiply(f_t alpha,
const rmm::device_uvector<f_t>& y,
cusparseDnVecDescr_t cusparse_y,
detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& cusparse_y,

f_t beta,
rmm::device_uvector<f_t>& v,
cusparseDnVecDescr_t cusparse_v,
detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& cusparse_v,
rmm::device_uvector<f_t>& u,
cusparseDnVecDescr_t cusparse_u,
detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& cusparse_u,
cusparse_view_t<i_t, f_t>& cusparse_view,
const rmm::device_uvector<f_t>& d_inv_diag) const
{
Expand Down Expand Up @@ -1386,20 +1387,20 @@ class iteration_data_t {
pinned_dense_vector_t<i_t, f_t> dz;
cusparse_info_t<i_t, f_t> cusparse_info;
cusparse_view_t<i_t, f_t> cusparse_view_;
cusparseDnVecDescr_t cusparse_tmp4_;
cusparseDnVecDescr_t cusparse_h_;
cusparseDnVecDescr_t cusparse_dx_residual_;
cusparseDnVecDescr_t cusparse_dy_;
cusparseDnVecDescr_t cusparse_dx_residual_5_;
cusparseDnVecDescr_t cusparse_dx_residual_6_;
cusparseDnVecDescr_t cusparse_dx_;
cusparseDnVecDescr_t cusparse_dx_residual_3_;
cusparseDnVecDescr_t cusparse_dx_residual_4_;
cusparseDnVecDescr_t cusparse_r1_;
cusparseDnVecDescr_t cusparse_dual_residual_;
cusparseDnVecDescr_t cusparse_y_residual_;
detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_tmp4_;
detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_h_;
detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dx_residual_;
detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dy_;
detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dx_residual_5_;
detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dx_residual_6_;
detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dx_;
detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dx_residual_3_;
detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dx_residual_4_;
detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_r1_;
detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dual_residual_;
detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_y_residual_;
// GPU ADAT multiply
cusparseDnVecDescr_t cusparse_u_;
detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_u_;

// Device vectors

Expand Down Expand Up @@ -2279,7 +2280,7 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
raft::copy(data.d_y_residual_.data(), data.d_h_.data(), data.d_h_.size(), stream_view_);

// TMP should be done only once
cusparseDnVecDescr_t cusparse_dy_ = data.cusparse_view_.create_vector(data.d_dy_);
auto cusparse_dy_ = data.cusparse_view_.create_vector(data.d_dy_);

data.gpu_adat_multiply(1.0,
data.d_dy_,
Expand Down Expand Up @@ -2448,9 +2449,8 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_

// TMP data should already be on the GPU
rmm::device_uvector<f_t> d_dx_residual_7(data.d_h_, stream_view_);
cusparseDnVecDescr_t cusparse_dy_ = data.cusparse_view_.create_vector(data.d_dy_);
cusparseDnVecDescr_t cusparse_dx_residual_7 =
data.cusparse_view_.create_vector(d_dx_residual_7);
auto cusparse_dy_ = data.cusparse_view_.create_vector(data.d_dy_);
auto cusparse_dx_residual_7 = data.cusparse_view_.create_vector(d_dx_residual_7);

// matrix_vector_multiply(data.ADAT, 1.0, dy, -1.0, dx_residual_7);
data.gpu_adat_multiply(1.0,
Expand Down
24 changes: 16 additions & 8 deletions cpp/src/dual_simplex/cusparse_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

#pragma once

#include <utilities/macros.cuh>

#include <raft/sparse/detail/cusparse_macros.h>
#include <raft/sparse/detail/cusparse_wrappers.h>
#include <raft/core/handle.hpp>
Expand Down Expand Up @@ -35,16 +37,22 @@ struct cusparse_info_t {

~cusparse_info_t()
{
// RAFT_CUSPARSE_TRY(cusparseSpGEMM_destroyDescr(spgemm_descr));
// RAFT_CUSPARSE_TRY(cusparseDestroySpMat(matA_descr));
// RAFT_CUSPARSE_TRY(cusparseDestroySpMat(matDAT_descr));
// RAFT_CUSPARSE_TRY(cusparseDestroySpMat(matADAT_descr));
if (spgemm_descr != nullptr) {
CUOPT_CUSPARSE_TRY_NO_THROW(cusparseSpGEMM_destroyDescr(spgemm_descr));
}
if (matA_descr != nullptr) { CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA_descr)); }
if (matDAT_descr != nullptr) {
CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matDAT_descr));
}
if (matADAT_descr != nullptr) {
CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matADAT_descr));
}
}

cusparseSpMatDescr_t matA_descr;
cusparseSpMatDescr_t matDAT_descr;
cusparseSpMatDescr_t matADAT_descr;
cusparseSpGEMMDescr_t spgemm_descr;
cusparseSpMatDescr_t matA_descr{nullptr};
cusparseSpMatDescr_t matDAT_descr{nullptr};
cusparseSpMatDescr_t matADAT_descr{nullptr};
cusparseSpGEMMDescr_t spgemm_descr{nullptr};
rmm::device_scalar<f_t> alpha;
rmm::device_scalar<f_t> beta;
rmm::device_uvector<uint8_t> buffer_size;
Expand Down
50 changes: 29 additions & 21 deletions cpp/src/dual_simplex/cusparse_view.cu
Original file line number Diff line number Diff line change
Expand Up @@ -232,17 +232,24 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(raft::handle_t const* handle_ptr,
CUSPARSE_SPMV_CSR_ALG2,
spmv_buffer_transpose_.data(),
handle_ptr->get_stream());
RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(x));
RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(y));
}

template <typename i_t, typename f_t>
cusparseDnVecDescr_t cusparse_view_t<i_t, f_t>::create_vector(const rmm::device_uvector<f_t>& vec)
cusparse_view_t<i_t, f_t>::~cusparse_view_t()
{
// TODO add to RAFT a const version
// No RAFT version without the const so you will get a linktime issuen hence the const_cast
cusparseDnVecDescr_t cusparse_h;
RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(
&cusparse_h, vec.size(), const_cast<f_t*>(vec.data())));
return cusparse_h;
CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(A_));
CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(A_T_));
}

template <typename i_t, typename f_t>
detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_view_t<i_t, f_t>::create_vector(
rmm::device_uvector<f_t> const& vec)
{
detail::cusparse_dn_vec_descr_wrapper_t<f_t> descr;
descr.create(vec.size(), const_cast<f_t*>(vec.data()));
return descr;
}

template <typename i_t, typename f_t>
Expand All @@ -252,19 +259,19 @@ void cusparse_view_t<i_t, f_t>::spmv(f_t alpha,
f_t beta,
std::vector<f_t, AllocatorB>& y)
{
auto d_x = device_copy(x, handle_ptr_->get_stream());
auto d_y = device_copy(y, handle_ptr_->get_stream());
cusparseDnVecDescr_t x_cusparse = create_vector(d_x);
cusparseDnVecDescr_t y_cusparse = create_vector(d_y);
auto d_x = device_copy(x, handle_ptr_->get_stream());
auto d_y = device_copy(y, handle_ptr_->get_stream());
detail::cusparse_dn_vec_descr_wrapper_t<f_t> x_cusparse = create_vector(d_x);
detail::cusparse_dn_vec_descr_wrapper_t<f_t> y_cusparse = create_vector(d_y);
spmv(alpha, x_cusparse, beta, y_cusparse);
y = cuopt::host_copy<f_t, AllocatorB>(d_y, handle_ptr_->get_stream());
}

template <typename i_t, typename f_t>
void cusparse_view_t<i_t, f_t>::spmv(f_t alpha,
cusparseDnVecDescr_t x,
detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& x,
f_t beta,
cusparseDnVecDescr_t y)
detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& y)
{
// Would be simpler if we could pass host data direclty but other cusparse calls with the same
// handler depend on device data
Expand Down Expand Up @@ -295,19 +302,20 @@ void cusparse_view_t<i_t, f_t>::transpose_spmv(f_t alpha,
f_t beta,
std::vector<f_t, AllocatorB>& y)
{
auto d_x = device_copy(x, handle_ptr_->get_stream());
auto d_y = device_copy(y, handle_ptr_->get_stream());
cusparseDnVecDescr_t x_cusparse = create_vector(d_x);
cusparseDnVecDescr_t y_cusparse = create_vector(d_y);
auto d_x = device_copy(x, handle_ptr_->get_stream());
auto d_y = device_copy(y, handle_ptr_->get_stream());
detail::cusparse_dn_vec_descr_wrapper_t<f_t> x_cusparse = create_vector(d_x);
detail::cusparse_dn_vec_descr_wrapper_t<f_t> y_cusparse = create_vector(d_y);
transpose_spmv(alpha, x_cusparse, beta, y_cusparse);
y = cuopt::host_copy<f_t, AllocatorB>(d_y, handle_ptr_->get_stream());
}

template <typename i_t, typename f_t>
void cusparse_view_t<i_t, f_t>::transpose_spmv(f_t alpha,
cusparseDnVecDescr_t x,
f_t beta,
cusparseDnVecDescr_t y)
void cusparse_view_t<i_t, f_t>::transpose_spmv(
f_t alpha,
detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& x,
f_t beta,
detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& y)
{
// Would be simpler if we could pass host data direct;y but other cusparse calls with the same
// handler depend on device data
Expand Down
15 changes: 12 additions & 3 deletions cpp/src/dual_simplex/cusparse_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

#include <dual_simplex/sparse_matrix.hpp>

#include <linear_programming/cusparse_view.hpp>

#include <cusparse_v2.h>

#include <rmm/device_scalar.hpp>
Expand All @@ -25,21 +27,28 @@ class cusparse_view_t {
public:
// TMP matrix data should already be on the GPU and in CSR not CSC
cusparse_view_t(raft::handle_t const* handle_ptr, const csc_matrix_t<i_t, f_t>& A);
~cusparse_view_t();

static cusparseDnVecDescr_t create_vector(const rmm::device_uvector<f_t>& vec);
detail::cusparse_dn_vec_descr_wrapper_t<f_t> create_vector(rmm::device_uvector<f_t> const& vec);

template <typename AllocatorA, typename AllocatorB>
void spmv(f_t alpha,
const std::vector<f_t, AllocatorA>& x,
f_t beta,
std::vector<f_t, AllocatorB>& y);
void spmv(f_t alpha, cusparseDnVecDescr_t x, f_t beta, cusparseDnVecDescr_t y);
void spmv(f_t alpha,
detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& x,
f_t beta,
detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& y);
template <typename AllocatorA, typename AllocatorB>
void transpose_spmv(f_t alpha,
const std::vector<f_t, AllocatorA>& x,
f_t beta,
std::vector<f_t, AllocatorB>& y);
void transpose_spmv(f_t alpha, cusparseDnVecDescr_t x, f_t beta, cusparseDnVecDescr_t y);
void transpose_spmv(f_t alpha,
detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& x,
f_t beta,
detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& y);

raft::handle_t const* handle_ptr_{nullptr};

Expand Down
11 changes: 0 additions & 11 deletions cpp/src/dual_simplex/sparse_cholesky.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -743,17 +743,6 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
CUDSS_CALL_AND_CHECK(
cudssMatrixSetValues(cudss_x, x.data()), status, "cudssMatrixSetValues for x");

i_t ldb = n;
i_t ldx = n;
CUDSS_CALL_AND_CHECK_EXIT(
cudssMatrixCreateDn(&cudss_b, n, 1, ldb, b.data(), CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR),
status,
"cudssMatrixCreateDn for b");
CUDSS_CALL_AND_CHECK_EXIT(
cudssMatrixCreateDn(&cudss_x, n, 1, ldx, x.data(), CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR),
status,
"cudssMatrixCreateDn for x");

status = cudssExecute(handle, CUDSS_PHASE_SOLVE, solverConfig, solverData, A, cudss_x, cudss_b);
if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return -2; }
if (status != CUDSS_STATUS_SUCCESS) {
Expand Down
10 changes: 10 additions & 0 deletions cpp/src/linear_programming/cusparse_view.cu
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@ cusparse_dn_vec_descr_wrapper_t<f_t>::cusparse_dn_vec_descr_wrapper_t(
{
}

template <typename f_t>
cusparse_dn_vec_descr_wrapper_t<f_t>& cusparse_dn_vec_descr_wrapper_t<f_t>::operator=(
cusparse_dn_vec_descr_wrapper_t<f_t>&& other)
{
if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(descr_)); }
descr_ = other.descr_;
other.need_destruction_ = false;
return *this;
}

template <typename f_t>
void cusparse_dn_vec_descr_wrapper_t<f_t>::create(int64_t size, f_t* values)
{
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/linear_programming/cusparse_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class cusparse_dn_vec_descr_wrapper_t {
~cusparse_dn_vec_descr_wrapper_t();

cusparse_dn_vec_descr_wrapper_t(const cusparse_dn_vec_descr_wrapper_t& other);

cusparse_dn_vec_descr_wrapper_t& operator=(cusparse_dn_vec_descr_wrapper_t&& other);
cusparse_dn_vec_descr_wrapper_t& operator=(const cusparse_dn_vec_descr_wrapper_t& other) = delete;

void create(int64_t size, f_t* values);
Expand Down
32 changes: 32 additions & 0 deletions cpp/src/utilities/macros.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,35 @@
fprintf(stderr, "CUDA Error: %s:%i:%s\n", __FILE__, __LINE__, pErrStr); \
} \
} while (0)

#define CUOPT_SET_ERROR_MSG_NO_THROW(msg, location_prefix, fmt, ...) \
do { \
int size1 = std::snprintf(nullptr, 0, "%s", location_prefix); \
int size2 = std::snprintf(nullptr, 0, "file=%s line=%d: ", __FILE__, __LINE__); \
int size3 = std::snprintf(nullptr, 0, fmt, ##__VA_ARGS__); \
if (size1 < 0 || size2 < 0 || size3 < 0) { \
std::cerr << "Error in snprintf, cannot handle CUOPT exception." << std::endl; \
return; \
} \
auto size = size1 + size2 + size3 + 1; /* +1 for final '\0' */ \
std::vector<char> buf(size); \
std::snprintf(buf.data(), size1 + 1 /* +1 for '\0' */, "%s", location_prefix); \
std::snprintf( \
buf.data() + size1, size2 + 1 /* +1 for '\0' */, "file=%s line=%d: ", __FILE__, __LINE__); \
std::snprintf(buf.data() + size1 + size2, size3 + 1 /* +1 for '\0' */, fmt, ##__VA_ARGS__); \
msg += std::string(buf.data(), buf.data() + size - 1); /* -1 to remove final '\0' */ \
} while (0)

#define CUOPT_CUSPARSE_TRY_NO_THROW(call) \
do { \
cusparseStatus_t const status = (call); \
if (CUSPARSE_STATUS_SUCCESS != status) { \
std::string msg{}; \
CUOPT_SET_ERROR_MSG_NO_THROW(msg, \
"cuSparse error encountered at: ", \
"call='%s', Reason=%d:%s", \
#call, \
status, \
raft::sparse::detail::cusparse_error_to_string(status)); \
} \
} while (0)
2 changes: 1 addition & 1 deletion python/libcuopt/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ libcuopt = "libcuopt"
select = [
"distro-too-large-compressed",
]
max_allowed_size_compressed = '605M'
max_allowed_size_compressed = '620M'

[project.scripts]
cuopt_cli = "libcuopt._cli_wrapper:main"
Expand Down
Loading