NVIDIA · GPUtester · Nov 21, 2025 · Nov 21, 2025
@@ -1197,12 +1197,13 @@ class iteration_data_t {
   // v = alpha * A * Dinv * A^T * y + beta * v
   void gpu_adat_multiply(f_t alpha,
                          const rmm::device_uvector<f_t>& y,
-                         cusparseDnVecDescr_t cusparse_y,
+                         detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& cusparse_y,
+
                          f_t beta,
                          rmm::device_uvector<f_t>& v,
-                         cusparseDnVecDescr_t cusparse_v,
+                         detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& cusparse_v,
                          rmm::device_uvector<f_t>& u,
-                         cusparseDnVecDescr_t cusparse_u,
+                         detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& cusparse_u,
                          cusparse_view_t<i_t, f_t>& cusparse_view,
                          const rmm::device_uvector<f_t>& d_inv_diag) const
   {
@@ -1386,20 +1387,20 @@ class iteration_data_t {
   pinned_dense_vector_t<i_t, f_t> dz;
   cusparse_info_t<i_t, f_t> cusparse_info;
   cusparse_view_t<i_t, f_t> cusparse_view_;
-  cusparseDnVecDescr_t cusparse_tmp4_;
-  cusparseDnVecDescr_t cusparse_h_;
-  cusparseDnVecDescr_t cusparse_dx_residual_;
-  cusparseDnVecDescr_t cusparse_dy_;
-  cusparseDnVecDescr_t cusparse_dx_residual_5_;
-  cusparseDnVecDescr_t cusparse_dx_residual_6_;
-  cusparseDnVecDescr_t cusparse_dx_;
-  cusparseDnVecDescr_t cusparse_dx_residual_3_;
-  cusparseDnVecDescr_t cusparse_dx_residual_4_;
-  cusparseDnVecDescr_t cusparse_r1_;
-  cusparseDnVecDescr_t cusparse_dual_residual_;
-  cusparseDnVecDescr_t cusparse_y_residual_;
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_tmp4_;
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_h_;
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dx_residual_;
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dy_;
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dx_residual_5_;
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dx_residual_6_;
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dx_;
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dx_residual_3_;
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dx_residual_4_;
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_r1_;
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_dual_residual_;
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_y_residual_;
   // GPU ADAT multiply
-  cusparseDnVecDescr_t cusparse_u_;
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_u_;
 
   // Device vectors
 
@@ -2279,7 +2280,7 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
       raft::copy(data.d_y_residual_.data(), data.d_h_.data(), data.d_h_.size(), stream_view_);
 
       // TMP should be done only once
-      cusparseDnVecDescr_t cusparse_dy_ = data.cusparse_view_.create_vector(data.d_dy_);
+      auto cusparse_dy_ = data.cusparse_view_.create_vector(data.d_dy_);
 
       data.gpu_adat_multiply(1.0,
                              data.d_dy_,
@@ -2448,9 +2449,8 @@ i_t barrier_solver_t<i_t, f_t>::gpu_compute_search_direction(iteration_data_t<i_
 
       // TMP data should already be on the GPU
       rmm::device_uvector<f_t> d_dx_residual_7(data.d_h_, stream_view_);
-      cusparseDnVecDescr_t cusparse_dy_ = data.cusparse_view_.create_vector(data.d_dy_);
-      cusparseDnVecDescr_t cusparse_dx_residual_7 =
-        data.cusparse_view_.create_vector(d_dx_residual_7);
+      auto cusparse_dy_           = data.cusparse_view_.create_vector(data.d_dy_);
+      auto cusparse_dx_residual_7 = data.cusparse_view_.create_vector(d_dx_residual_7);
 
       // matrix_vector_multiply(data.ADAT, 1.0, dy, -1.0, dx_residual_7);
       data.gpu_adat_multiply(1.0,

@@ -7,6 +7,8 @@
 
 #pragma once
 
+#include <utilities/macros.cuh>
+
 #include <raft/sparse/detail/cusparse_macros.h>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/core/handle.hpp>
@@ -35,16 +37,22 @@ struct cusparse_info_t {
 
   ~cusparse_info_t()
   {
-    //    RAFT_CUSPARSE_TRY(cusparseSpGEMM_destroyDescr(spgemm_descr));
-    //    RAFT_CUSPARSE_TRY(cusparseDestroySpMat(matA_descr));
-    //   RAFT_CUSPARSE_TRY(cusparseDestroySpMat(matDAT_descr));
-    //   RAFT_CUSPARSE_TRY(cusparseDestroySpMat(matADAT_descr));
+    if (spgemm_descr != nullptr) {
+      CUOPT_CUSPARSE_TRY_NO_THROW(cusparseSpGEMM_destroyDescr(spgemm_descr));
+    }
+    if (matA_descr != nullptr) { CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matA_descr)); }
+    if (matDAT_descr != nullptr) {
+      CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matDAT_descr));
+    }
+    if (matADAT_descr != nullptr) {
+      CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(matADAT_descr));
+    }
   }
 
-  cusparseSpMatDescr_t matA_descr;
-  cusparseSpMatDescr_t matDAT_descr;
-  cusparseSpMatDescr_t matADAT_descr;
-  cusparseSpGEMMDescr_t spgemm_descr;
+  cusparseSpMatDescr_t matA_descr{nullptr};
+  cusparseSpMatDescr_t matDAT_descr{nullptr};
+  cusparseSpMatDescr_t matADAT_descr{nullptr};
+  cusparseSpGEMMDescr_t spgemm_descr{nullptr};
   rmm::device_scalar<f_t> alpha;
   rmm::device_scalar<f_t> beta;
   rmm::device_uvector<uint8_t> buffer_size;

@@ -232,17 +232,24 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(raft::handle_t const* handle_ptr,
                              CUSPARSE_SPMV_CSR_ALG2,
                              spmv_buffer_transpose_.data(),
                              handle_ptr->get_stream());
+  RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(x));
+  RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(y));
 }
 
 template <typename i_t, typename f_t>
-cusparseDnVecDescr_t cusparse_view_t<i_t, f_t>::create_vector(const rmm::device_uvector<f_t>& vec)
+cusparse_view_t<i_t, f_t>::~cusparse_view_t()
 {
-  // TODO add to RAFT a const version
-  // No RAFT version without the const so you will get a linktime issuen hence the const_cast
-  cusparseDnVecDescr_t cusparse_h;
-  RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(
-    &cusparse_h, vec.size(), const_cast<f_t*>(vec.data())));
-  return cusparse_h;
+  CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(A_));
+  CUOPT_CUSPARSE_TRY_NO_THROW(cusparseDestroySpMat(A_T_));
+}
+
+template <typename i_t, typename f_t>
+detail::cusparse_dn_vec_descr_wrapper_t<f_t> cusparse_view_t<i_t, f_t>::create_vector(
+  rmm::device_uvector<f_t> const& vec)
+{
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> descr;
+  descr.create(vec.size(), const_cast<f_t*>(vec.data()));
+  return descr;
 }
 
 template <typename i_t, typename f_t>
@@ -252,19 +259,19 @@ void cusparse_view_t<i_t, f_t>::spmv(f_t alpha,
                                      f_t beta,
                                      std::vector<f_t, AllocatorB>& y)
 {
-  auto d_x                        = device_copy(x, handle_ptr_->get_stream());
-  auto d_y                        = device_copy(y, handle_ptr_->get_stream());
-  cusparseDnVecDescr_t x_cusparse = create_vector(d_x);
-  cusparseDnVecDescr_t y_cusparse = create_vector(d_y);
+  auto d_x = device_copy(x, handle_ptr_->get_stream());
+  auto d_y = device_copy(y, handle_ptr_->get_stream());
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> x_cusparse = create_vector(d_x);
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> y_cusparse = create_vector(d_y);
   spmv(alpha, x_cusparse, beta, y_cusparse);
   y = cuopt::host_copy<f_t, AllocatorB>(d_y, handle_ptr_->get_stream());
 }
 
 template <typename i_t, typename f_t>
 void cusparse_view_t<i_t, f_t>::spmv(f_t alpha,
-                                     cusparseDnVecDescr_t x,
+                                     detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& x,
                                      f_t beta,
-                                     cusparseDnVecDescr_t y)
+                                     detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& y)
 {
   // Would be simpler if we could pass host data direclty but other cusparse calls with the same
   // handler depend on device data
@@ -295,19 +302,20 @@ void cusparse_view_t<i_t, f_t>::transpose_spmv(f_t alpha,
                                                f_t beta,
                                                std::vector<f_t, AllocatorB>& y)
 {
-  auto d_x                        = device_copy(x, handle_ptr_->get_stream());
-  auto d_y                        = device_copy(y, handle_ptr_->get_stream());
-  cusparseDnVecDescr_t x_cusparse = create_vector(d_x);
-  cusparseDnVecDescr_t y_cusparse = create_vector(d_y);
+  auto d_x = device_copy(x, handle_ptr_->get_stream());
+  auto d_y = device_copy(y, handle_ptr_->get_stream());
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> x_cusparse = create_vector(d_x);
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> y_cusparse = create_vector(d_y);
   transpose_spmv(alpha, x_cusparse, beta, y_cusparse);
   y = cuopt::host_copy<f_t, AllocatorB>(d_y, handle_ptr_->get_stream());
 }
 
 template <typename i_t, typename f_t>
-void cusparse_view_t<i_t, f_t>::transpose_spmv(f_t alpha,
-                                               cusparseDnVecDescr_t x,
-                                               f_t beta,
-                                               cusparseDnVecDescr_t y)
+void cusparse_view_t<i_t, f_t>::transpose_spmv(
+  f_t alpha,
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& x,
+  f_t beta,
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& y)
 {
   // Would be simpler if we could pass host data direct;y but other cusparse calls with the same
   // handler depend on device data

@@ -8,6 +8,8 @@
 
 #include <dual_simplex/sparse_matrix.hpp>
 
+#include <linear_programming/cusparse_view.hpp>
+
 #include <cusparse_v2.h>
 
 #include <rmm/device_scalar.hpp>
@@ -25,21 +27,28 @@ class cusparse_view_t {
  public:
   // TMP matrix data should already be on the GPU and in CSR not CSC
   cusparse_view_t(raft::handle_t const* handle_ptr, const csc_matrix_t<i_t, f_t>& A);
+  ~cusparse_view_t();
 
-  static cusparseDnVecDescr_t create_vector(const rmm::device_uvector<f_t>& vec);
+  detail::cusparse_dn_vec_descr_wrapper_t<f_t> create_vector(rmm::device_uvector<f_t> const& vec);
 
   template <typename AllocatorA, typename AllocatorB>
   void spmv(f_t alpha,
             const std::vector<f_t, AllocatorA>& x,
             f_t beta,
             std::vector<f_t, AllocatorB>& y);
-  void spmv(f_t alpha, cusparseDnVecDescr_t x, f_t beta, cusparseDnVecDescr_t y);
+  void spmv(f_t alpha,
+            detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& x,
+            f_t beta,
+            detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& y);
   template <typename AllocatorA, typename AllocatorB>
   void transpose_spmv(f_t alpha,
                       const std::vector<f_t, AllocatorA>& x,
                       f_t beta,
                       std::vector<f_t, AllocatorB>& y);
-  void transpose_spmv(f_t alpha, cusparseDnVecDescr_t x, f_t beta, cusparseDnVecDescr_t y);
+  void transpose_spmv(f_t alpha,
+                      detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& x,
+                      f_t beta,
+                      detail::cusparse_dn_vec_descr_wrapper_t<f_t> const& y);
 
   raft::handle_t const* handle_ptr_{nullptr};
 

@@ -743,17 +743,6 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
     CUDSS_CALL_AND_CHECK(
       cudssMatrixSetValues(cudss_x, x.data()), status, "cudssMatrixSetValues for x");
 
-    i_t ldb = n;
-    i_t ldx = n;
-    CUDSS_CALL_AND_CHECK_EXIT(
-      cudssMatrixCreateDn(&cudss_b, n, 1, ldb, b.data(), CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR),
-      status,
-      "cudssMatrixCreateDn for b");
-    CUDSS_CALL_AND_CHECK_EXIT(
-      cudssMatrixCreateDn(&cudss_x, n, 1, ldx, x.data(), CUDA_R_64F, CUDSS_LAYOUT_COL_MAJOR),
-      status,
-      "cudssMatrixCreateDn for x");
-
     status = cudssExecute(handle, CUDSS_PHASE_SOLVE, solverConfig, solverData, A, cudss_x, cudss_b);
     if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return -2; }
     if (status != CUDSS_STATUS_SUCCESS) {

@@ -75,6 +75,16 @@ cusparse_dn_vec_descr_wrapper_t<f_t>::cusparse_dn_vec_descr_wrapper_t(
 {
 }
 
+template <typename f_t>
+cusparse_dn_vec_descr_wrapper_t<f_t>& cusparse_dn_vec_descr_wrapper_t<f_t>::operator=(
+  cusparse_dn_vec_descr_wrapper_t<f_t>&& other)
+{
+  if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(descr_)); }
+  descr_                  = other.descr_;
+  other.need_destruction_ = false;
+  return *this;
+}
+
 template <typename f_t>
 void cusparse_dn_vec_descr_wrapper_t<f_t>::create(int64_t size, f_t* values)
 {

@@ -46,7 +46,7 @@ class cusparse_dn_vec_descr_wrapper_t {
   ~cusparse_dn_vec_descr_wrapper_t();
 
   cusparse_dn_vec_descr_wrapper_t(const cusparse_dn_vec_descr_wrapper_t& other);
-
+  cusparse_dn_vec_descr_wrapper_t& operator=(cusparse_dn_vec_descr_wrapper_t&& other);
   cusparse_dn_vec_descr_wrapper_t& operator=(const cusparse_dn_vec_descr_wrapper_t& other) = delete;
 
   void create(int64_t size, f_t* values);

@@ -37,3 +37,35 @@
       fprintf(stderr, "CUDA Error: %s:%i:%s\n", __FILE__, __LINE__, pErrStr); \
     }                                                                         \
   } while (0)
+
+#define CUOPT_SET_ERROR_MSG_NO_THROW(msg, location_prefix, fmt, ...)                             \
+  do {                                                                                           \
+    int size1 = std::snprintf(nullptr, 0, "%s", location_prefix);                                \
+    int size2 = std::snprintf(nullptr, 0, "file=%s line=%d: ", __FILE__, __LINE__);              \
+    int size3 = std::snprintf(nullptr, 0, fmt, ##__VA_ARGS__);                                   \
+    if (size1 < 0 || size2 < 0 || size3 < 0) {                                                   \
+      std::cerr << "Error in snprintf, cannot handle CUOPT exception." << std::endl;             \
+      return;                                                                                    \
+    }                                                                                            \
+    auto size = size1 + size2 + size3 + 1; /* +1 for final '\0' */                               \
+    std::vector<char> buf(size);                                                                 \
+    std::snprintf(buf.data(), size1 + 1 /* +1 for '\0' */, "%s", location_prefix);               \
+    std::snprintf(                                                                               \
+      buf.data() + size1, size2 + 1 /* +1 for '\0' */, "file=%s line=%d: ", __FILE__, __LINE__); \
+    std::snprintf(buf.data() + size1 + size2, size3 + 1 /* +1 for '\0' */, fmt, ##__VA_ARGS__);  \
+    msg += std::string(buf.data(), buf.data() + size - 1); /* -1 to remove final '\0' */         \
+  } while (0)
+
+#define CUOPT_CUSPARSE_TRY_NO_THROW(call)                                                   \
+  do {                                                                                      \
+    cusparseStatus_t const status = (call);                                                 \
+    if (CUSPARSE_STATUS_SUCCESS != status) {                                                \
+      std::string msg{};                                                                    \
+      CUOPT_SET_ERROR_MSG_NO_THROW(msg,                                                     \
+                                   "cuSparse error encountered at: ",                       \
+                                   "call='%s', Reason=%d:%s",                               \
+                                   #call,                                                   \
+                                   status,                                                  \
+                                   raft::sparse::detail::cusparse_error_to_string(status)); \
+    }                                                                                       \
+  } while (0)
@@ -54,7 +54,7 @@ libcuopt = "libcuopt"
 select = [
     "distro-too-large-compressed",
 ]
-max_allowed_size_compressed = '605M'
+max_allowed_size_compressed = '620M'
 
 [project.scripts]
 cuopt_cli = "libcuopt._cli_wrapper:main"