Skip to content

Commit 660dcae

Browse files
committed
Added cuda-sandbox
1 parent cb5e09a commit 660dcae

File tree

5 files changed

+92
-49
lines changed

5 files changed

+92
-49
lines changed

check/TestPdlp.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@
66
#include "SpecialLps.h"
77
#include "catch.hpp"
88

9+
#ifdef CUPDLP_GPU
10+
#include <cublas_v2.h>
11+
#include <cuda_runtime.h>
12+
#include <cusparse.h>
13+
#endif
14+
915
const bool dev_run = false;
1016
const double double_equal_tolerance = 1e-3;
1117
const double kkt_tolerance = 1e-4;
@@ -419,5 +425,46 @@ TEST_CASE("hi-pdlp-timer", "[pdlp]") {
419425
kPdlpAdaptiveStepSizeOff;
420426
h.setOptionValue("pdlp_features_off", pdlp_features_off);
421427
HighsStatus run_status = h.run();
428+
422429
h.resetGlobalScheduler(true);
423430
}
431+
432+
#ifdef CUPDLP_GPU
433+
TEST_CASE("cuda-sandbox", "[pdlp]") {
434+
printf("Hello World - cuda-sandbox\n");
435+
cusparseHandle_t cusparsehandle;
436+
cusparseCreate(&cusparsehandle);
437+
int v_cuda_runtime = 0;
438+
int v_cuda_driver = 0;
439+
int v_cusparse = 0;
440+
int n_devices = 0;
441+
cudaGetDeviceCount(&n_devices);
442+
assert(n_devices == 1);
443+
cudaDeviceProp prop;
444+
cudaGetDeviceProperties(&prop, 0);
445+
printf("Cuda device %d: %s\n", 0, prop.name);
446+
printf(" Clock rate (KHz): %d\n", prop.clockRate);
447+
printf(" Memory clock rate (KHz): %d\n", prop.memoryClockRate);
448+
printf(" Memory bus width (bits): %d\n", prop.memoryBusWidth);
449+
printf(" Peak memory bandwidth (GB/s): %f\n",
450+
2.0 * prop.memoryClockRate * (prop.memoryBusWidth / 8) / 1.0e6);
451+
printf(" Global memory available on device (GB): %f\n",
452+
prop.totalGlobalMem / 1.0e9);
453+
printf(" Shared memory available per block (B): %zu\n",
454+
prop.sharedMemPerBlock);
455+
printf(" Warp size in threads: %d\n", prop.warpSize);
456+
printf(" Maximum number of threads per block: %d\n",
457+
prop.maxThreadsPerBlock);
458+
printf(" Compute capability: %d.%d\n", prop.major, prop.minor);
459+
printf(" Number of multiprocessors on device: %d\n",
460+
prop.multiProcessorCount);
461+
462+
cudaRuntimeGetVersion(&v_cuda_runtime);
463+
cudaDriverGetVersion(&v_cuda_driver);
464+
465+
cusparseGetVersion(cusparsehandle, &v_cusparse);
466+
printf("Cuda runtime version %d\n", v_cuda_runtime);
467+
printf("Cuda driver version %d\n", v_cuda_driver);
468+
printf("cuSparse version %d\n", v_cusparse);
469+
}
470+
#endif

highs/pdlp/hipdlp/linalg.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ std::vector<double> compute_row_norms(const HighsLp& lp, double p) {
214214
}
215215

216216
std::vector<double> vector_subtrac(const std::vector<double>& a,
217-
const std::vector<double>& b) {
217+
const std::vector<double>& b) {
218218
if (a.size() != b.size()) {
219219
throw std::invalid_argument(
220220
"Vectors must be of the same size for subtraction.");

highs/pdlp/hipdlp/linalg.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ std::vector<double> compute_row_norms(
5454
const HighsLp& lp, double p = std::numeric_limits<double>::infinity());
5555

5656
std::vector<double> vector_subtrac(const std::vector<double>& a,
57-
const std::vector<double>& b);
57+
const std::vector<double>& b);
5858

5959
} // namespace linalg
6060

highs/pdlp/hipdlp/pdhg.cc

Lines changed: 28 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -442,19 +442,19 @@ PostSolveRetcode PDLPSolver::postprocess(HighsSolution& solution) {
442442

443443
// Compute Ax using only the original columns (not slack variables)
444444
for (int col = 0; col < original_num_col_; ++col) {
445-
double x_val = x_current_[col]; // Use unscaled x values
446-
447-
for (int el = orig_matrix.start_[col];
448-
el < orig_matrix.start_[col + 1]; ++el) {
449-
int row = orig_matrix.index_[el];
450-
double a_val = orig_matrix.value_[el];
451-
ax_original[row] += a_val * x_val;
452-
}
445+
double x_val = x_current_[col]; // Use unscaled x values
446+
447+
for (int el = orig_matrix.start_[col]; el < orig_matrix.start_[col + 1];
448+
++el) {
449+
int row = orig_matrix.index_[el];
450+
double a_val = orig_matrix.value_[el];
451+
ax_original[row] += a_val * x_val;
452+
}
453453
}
454454

455455
// Now ax_original contains the correct row activity values
456456
for (int orig_row = 0; orig_row < original_lp_->num_row_; ++orig_row) {
457-
solution.row_value[orig_row] = ax_original[orig_row];
457+
solution.row_value[orig_row] = ax_original[orig_row];
458458
}
459459

460460
// 6. Recover Dual Column Values (Reduced Costs)
@@ -566,16 +566,14 @@ void PDLPSolver::solve(std::vector<double>& x, std::vector<double>& y) {
566566

567567
hipdlpTimerStart(kHipdlpClockConvergenceCheck);
568568
// Compute residuals for current iterate
569-
bool current_converged =
570-
checkConvergence(iter, x_current_, y_current_, Ax_cache_, ATy_cache_,
571-
params_.tolerance, current_results, "[L]",
572-
dSlackPos_, dSlackNeg_);
569+
bool current_converged = checkConvergence(
570+
iter, x_current_, y_current_, Ax_cache_, ATy_cache_,
571+
params_.tolerance, current_results, "[L]", dSlackPos_, dSlackNeg_);
573572

574573
// Compute residuals for average iterate
575-
bool average_converged =
576-
checkConvergence(iter, x_avg_, y_avg_, Ax_avg, ATy_avg,
577-
params_.tolerance, average_results, "[A]",
578-
dSlackPosAvg_, dSlackNegAvg_);
574+
bool average_converged = checkConvergence(
575+
iter, x_avg_, y_avg_, Ax_avg, ATy_avg, params_.tolerance,
576+
average_results, "[A]", dSlackPosAvg_, dSlackNegAvg_);
579577
hipdlpTimerStop(kHipdlpClockConvergenceCheck);
580578

581579
debugPdlpIterHeaderLog(debug_pdlp_log_file_);
@@ -907,14 +905,14 @@ void PDLPSolver::computeDualSlacks(const std::vector<double>& dualResidual,
907905
}
908906
}
909907

910-
double PDLPSolver::computeDualFeasibility(
911-
const std::vector<double>& ATy_vector, std::vector<double>& dSlackPos,
912-
std::vector<double>& dSlackNeg) {
908+
double PDLPSolver::computeDualFeasibility(const std::vector<double>& ATy_vector,
909+
std::vector<double>& dSlackPos,
910+
std::vector<double>& dSlackNeg) {
913911
std::vector<double> dualResidual(lp_.num_col_, 0.0);
914912
// dualResidual = c-A'y
915913
dualResidual = linalg::vector_subtrac(lp_.col_cost_, ATy_vector);
916914
double dualResidualNorm = linalg::vector_norm(dualResidual);
917-
915+
918916
// Call the refactored function to populate dSlackPos and dSlackNeg
919917
computeDualSlacks(dualResidual, dSlackPos, dSlackNeg);
920918

@@ -985,9 +983,9 @@ PDLPSolver::computeDualityGap(const std::vector<double>& x,
985983
cTx);
986984
}
987985

988-
double PDLPSolver::computeDualObjective(
989-
const std::vector<double>& y, const std::vector<double>& dSlackPos,
990-
const std::vector<double>& dSlackNeg) {
986+
double PDLPSolver::computeDualObjective(const std::vector<double>& y,
987+
const std::vector<double>& dSlackPos,
988+
const std::vector<double>& dSlackNeg) {
991989
double dual_obj = lp_.offset_;
992990

993991
// Compute b'y (or rhs'y in cuPDLP notation)
@@ -1012,15 +1010,12 @@ double PDLPSolver::computeDualObjective(
10121010
return dual_obj;
10131011
}
10141012

1015-
bool PDLPSolver::checkConvergence(const int iter, const std::vector<double>& x,
1016-
const std::vector<double>& y,
1017-
const std::vector<double>& ax_vector,
1018-
const std::vector<double>& aty_vector,
1019-
double epsilon, SolverResults& results,
1020-
const char* type,
1021-
// Add slack vectors as non-const references
1022-
std::vector<double>& dSlackPos,
1023-
std::vector<double>& dSlackNeg) {
1013+
bool PDLPSolver::checkConvergence(
1014+
const int iter, const std::vector<double>& x, const std::vector<double>& y,
1015+
const std::vector<double>& ax_vector, const std::vector<double>& aty_vector,
1016+
double epsilon, SolverResults& results, const char* type,
1017+
// Add slack vectors as non-const references
1018+
std::vector<double>& dSlackPos, std::vector<double>& dSlackNeg) {
10241019
// computeDualSlacks is now called inside computeDualFeasibility
10251020

10261021
// Compute primal feasibility

highs/pdlp/hipdlp/pdhg.hpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,13 @@ class PDLPSolver {
6161
void initialize();
6262
void printConstraintInfo();
6363
bool checkConvergence(const int iter, const std::vector<double>& x,
64-
const std::vector<double>& y,
65-
const std::vector<double>& ax_vector,
66-
const std::vector<double>& aty_vector,
67-
double epsilon, SolverResults& results,
68-
const char* type,
69-
// Add slack vectors as non-const references
70-
std::vector<double>& dSlackPos,
71-
std::vector<double>& dSlackNeg);
64+
const std::vector<double>& y,
65+
const std::vector<double>& ax_vector,
66+
const std::vector<double>& aty_vector, double epsilon,
67+
SolverResults& results, const char* type,
68+
// Add slack vectors as non-const references
69+
std::vector<double>& dSlackPos,
70+
std::vector<double>& dSlackNeg);
7271
void updateAverageIterates(const std::vector<double>& x,
7372
const std::vector<double>& y,
7473
const PrimalDualParams& params, int inner_iter);
@@ -100,14 +99,16 @@ class PDLPSolver {
10099
// --- Feasibility, Duality, and KKT Checks ---
101100
std::vector<double> computeLambda(const std::vector<double>& y,
102101
const std::vector<double>& ATy_vector);
103-
double computeDualObjective(const std::vector<double>& y, const std::vector<double>& dSlackPos,
104-
const std::vector<double>& dSlackNeg);
102+
double computeDualObjective(const std::vector<double>& y,
103+
const std::vector<double>& dSlackPos,
104+
const std::vector<double>& dSlackNeg);
105105
double computePrimalFeasibility(const std::vector<double>& Ax_vector);
106106
void computeDualSlacks(const std::vector<double>& dualResidual,
107-
std::vector<double>& dSlackPos,
108-
std::vector<double>& dSlackNeg);
109-
double computeDualFeasibility(const std::vector<double>& ATy_vector, std::vector<double>& dSlackPos,
110-
std::vector<double>& dSlackNeg);
107+
std::vector<double>& dSlackPos,
108+
std::vector<double>& dSlackNeg);
109+
double computeDualFeasibility(const std::vector<double>& ATy_vector,
110+
std::vector<double>& dSlackPos,
111+
std::vector<double>& dSlackNeg);
111112
std::tuple<double, double, double, double, double> computeDualityGap(
112113
const std::vector<double>& x, const std::vector<double>& y,
113114
const std::vector<double>& lambda);

0 commit comments

Comments
 (0)