Skip to content

Commit f3a510b

Browse files
committed
vanilla PDHG GPU checked
1 parent 6f235e5 commit f3a510b

File tree

5 files changed

+37
-3
lines changed

5 files changed

+37
-3
lines changed

check/TestPdlp.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ TEST_CASE("hi-pdlp", "[pdlp]") {
366366
h.setOptionValue("pdlp_scaling_mode", pdlp_scaling);
367367
h.setOptionValue("pdlp_step_size_strategy", 1);
368368
h.setOptionValue("pdlp_restart_strategy", 2);
369-
h.setOptionValue("pdlp_iteration_limit", 10);
369+
h.setOptionValue("pdlp_iteration_limit", 20);
370370
// h.setOptionValue("log_dev_level", kHighsLogDevLevelVerbose);
371371
auto start_hipdlp = std::chrono::high_resolution_clock::now();
372372
HighsStatus run_status = h.run();

highs/pdlp/hipdlp/pdhg.cc

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,9 @@ void PDLPSolver::solve(std::vector<double>& x, std::vector<double>& y) {
574574
CUDA_CHECK(cudaMemcpy(x_avg_.data(), d_x_avg_, a_num_cols_ * sizeof(double), cudaMemcpyDeviceToHost));
575575
CUDA_CHECK(cudaMemcpy(y_avg_.data(), d_y_avg_, a_num_rows_ * sizeof(double), cudaMemcpyDeviceToHost));
576576

577-
577+
double dScale_gpu = sum_weights_gpu_ > 0.0 ? 1.0 / sum_weights_gpu_ : 0.0;
578+
launchKernelScaleVector(d_x_avg_, d_x_sum_, dScale_gpu, lp_.num_col_);
579+
launchKernelScaleVector(d_y_avg_, d_y_sum_, dScale_gpu, lp_.num_row_);
578580
#endif
579581
hipdlpTimerStart(kHipdlpClockAverageIterate);
580582
computeAverageIterate(Ax_avg, ATy_avg);
@@ -671,7 +673,7 @@ void PDLPSolver::solve(std::vector<double>& x, std::vector<double>& y) {
671673
hipdlpTimerStart(kHipdlpClockMatrixMultiply);
672674
linalg::Ax(lp, x_current_, Ax_cache_);
673675

674-
#ifdef CUPDLP_GPU
676+
#ifdef CUPDLP_GPU
675677
launchKernelUpdateX(stepsize_.primal_step);
676678
linalgGpuAx(d_x_next_, d_ax_next_);
677679
launchKernelUpdateY(stepsize_.dual_step);
@@ -1967,4 +1969,9 @@ void PDLPSolver::launchKernelUpdateAverages(double weight) {
19671969
d_x_next_, d_y_next_,
19681970
weight, a_num_cols_, a_num_rows_);
19691971
CUDA_CHECK(cudaGetLastError());
1972+
}
1973+
1974+
void PDLPSolver::launchKernelScaleVector(double* d_out, const double* d_in, double scale, int n) {
1975+
launchKernelScaleVector_wrapper(d_out, d_in, scale, n);
1976+
CUDA_CHECK(cudaGetLastError());
19701977
}

highs/pdlp/hipdlp/pdhg.cu

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,15 @@ __global__ void kernelUpdateAverages(
6565
}
6666
}
6767

68+
__global__ void kernelScaleVector(
69+
double* d_out, const double* d_in,
70+
double scale, int n)
71+
{
72+
CUDA_GRID_STRIDE_LOOP(i, n) {
73+
d_out[i] = d_in[i] * scale;
74+
}
75+
}
76+
6877
// Add C++ wrapper functions to launch the kernels
6978
extern "C" {
7079
void launchKernelUpdateX_wrapper(
@@ -115,4 +124,17 @@ void launchKernelUpdateAverages_wrapper(
115124
weight, n_cols, n_rows);
116125
cudaGetLastError();
117126
}
127+
128+
void launchKernelScaleVector_wrapper(
129+
double* d_out, const double* d_in,
130+
double scale, int n)
131+
{
132+
const int block_size = 256;
133+
dim3 config = GetLaunchConfig(n, block_size);
134+
135+
kernelScaleVector<<<config.x, block_size>>>(
136+
d_out, d_in, scale, n);
137+
138+
cudaGetLastError();
139+
}
118140
} // extern "C"

highs/pdlp/hipdlp/pdhg.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ class PDLPSolver {
243243
void launchKernelUpdateX(double primal_step);
244244
void launchKernelUpdateY(double dual_step);
245245
void launchKernelUpdateAverages(double weight);
246+
void launchKernelScaleVector(double* d_out, const double* d_in, double scale, int n);
246247
};
247248

248249
#endif

highs/pdlp/hipdlp/pdhg_kernels.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ void launchKernelUpdateAverages_wrapper(
2020
const double* d_x_current, const double* d_y_current,
2121
double weight, int n_cols, int n_rows);
2222

23+
void launchKernelScaleVector_wrapper(
24+
double* d_out, const double* d_in,
25+
double scale, int n);
26+
2327
#ifdef __cplusplus
2428
}
2529
#endif

0 commit comments

Comments
 (0)