Skip to content

Commit 21fa43f

Browse files
committed
computeMovementGpu and computeNonlinearityGpu
1 parent b12018c commit 21fa43f

File tree

5 files changed

+99
-5
lines changed

5 files changed

+99
-5
lines changed

check/TestPdlp.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ TEST_CASE("pdlp-restart-add-row", "[pdlp]") {
342342

343343
TEST_CASE("hi-pdlp", "[pdlp]") {
344344
std::string model =
345-
"shell"; //"adlittle";//"afiro";// shell// stair //25fv47 //fit2p
345+
"avgas"; //"adlittle";//"afiro";// shell// stair //25fv47 //fit2p //avgas
346346
std::string model_file =
347347
std::string(HIGHS_DIR) + "/check/instances/" + model + ".mps";
348348
Highs h;
@@ -354,8 +354,8 @@ TEST_CASE("hi-pdlp", "[pdlp]") {
354354

355355
HighsInt pdlp_features_off = 0 +
356356
kPdlpScalingOff +
357-
kPdlpRestartOff +
358-
kPdlpAdaptiveStepSizeOff
357+
kPdlpRestartOff
358+
//kPdlpAdaptiveStepSizeOff
359359
;
360360
h.setOptionValue("pdlp_features_off", pdlp_features_off);
361361

@@ -366,7 +366,7 @@ TEST_CASE("hi-pdlp", "[pdlp]") {
366366
h.setOptionValue("pdlp_scaling_mode", pdlp_scaling);
367367
h.setOptionValue("pdlp_step_size_strategy", 1);
368368
h.setOptionValue("pdlp_restart_strategy", 2);
369-
//h.setOptionValue("pdlp_iteration_limit", 100);
369+
//h.setOptionValue("pdlp_iteration_limit", 10);
370370
// h.setOptionValue("log_dev_level", kHighsLogDevLevelVerbose);
371371
auto start_hipdlp = std::chrono::high_resolution_clock::now();
372372
HighsStatus run_status = h.run();

highs/pdlp/hipdlp/pdhg.cc

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -715,8 +715,14 @@ void PDLPSolver::solve(std::vector<double>& x, std::vector<double>& y) {
715715
// Perform the primal weight update using z^{n,0} and z^{n-1,0}
716716
#ifdef CUPDLP_GPU
717717
computeStepSizeRatioGpu(working_params);
718-
#endif
718+
double cpu_beta = stepsize_.beta;
719+
double cpu_primal_step = stepsize_.primal_step;
720+
double cpu_dual_step = stepsize_.dual_step;
721+
double cpu_eta = working_params.eta;
722+
double cpu_omega = working_params.omega;
723+
#else
719724
computeStepSizeRatio(working_params);
725+
#endif
720726
current_eta_ = working_params.eta;
721727
restart_scheme_.passParams(&working_params);
722728

@@ -1641,6 +1647,25 @@ void PDLPSolver::updateIteratesAdaptive() {
16411647
double primal_step_update = dStepSizeUpdate / std::sqrt(stepsize_.beta);
16421648
double dual_step_update = dStepSizeUpdate * std::sqrt(stepsize_.beta);
16431649

1650+
#ifdef CUPDLP_GPU
1651+
launchKernelUpdateX_wrapper(
1652+
d_x_next_, // Output (Trial)
1653+
d_x_current_, // Input (Base)
1654+
d_aty_current_, // Input (Base ATy)
1655+
d_col_cost_, d_col_lower_, d_col_upper_,
1656+
primal_step_update, a_num_cols_);
1657+
linalgGpuAx(d_x_next_, d_ax_next_);
1658+
launchKernelUpdateY_wrapper(
1659+
d_y_next_, // Output (Trial)
1660+
d_y_current_, // Input (Base)
1661+
d_ax_current_, // Input (Base Ax)
1662+
d_ax_next_, // Input (Trial Ax)
1663+
d_row_lower_, d_is_equality_row_,
1664+
dual_step_update, a_num_rows_);
1665+
linalgGpuATy(d_y_next_, d_aty_next_);
1666+
double movement = computeMovementGpu(d_x_next_, d_x_current_, d_y_next_, d_y_current_);
1667+
double nonlinearity = computeNonlinearityGpu(d_x_next_, d_x_current_, d_aty_next_, d_aty_current_);
1668+
#else
16441669
// Primal update
16451670
hipdlpTimerStart(kHipdlpClockProjectX);
16461671
xupdate = updateX(x_candidate, aty_candidate, primal_step_update);
@@ -1686,6 +1711,7 @@ void PDLPSolver::updateIteratesAdaptive() {
16861711
// Compute movement and nonlinearity
16871712
double movement = computeMovement(delta_x, delta_y);
16881713
double nonlinearity = computeNonlinearity(delta_x, delta_aty);
1714+
#endif
16891715
// Compute step size limit
16901716
double step_size_limit = (nonlinearity != 0.0)
16911717
? (movement / std::fabs(nonlinearity))
@@ -2184,4 +2210,34 @@ void PDLPSolver::computeAverageIterateGpu() {
21842210
linalgGpuAx(d_x_avg_, d_ax_avg_);
21852211
linalgGpuATy(d_y_avg_, d_aty_avg_);
21862212
}
2213+
2214+
double PDLPSolver::computeMovementGpu(const double* d_x_new, const double* d_x_old,
2215+
const double* d_y_new, const double* d_y_old) {
2216+
// 1. Compute ||x_new - x_old||^2
2217+
launchKernelDiffTwoNormSquared_wrapper(d_x_new, d_x_old, d_x_temp_diff_norm_result_, a_num_cols_);
2218+
double primal_diff_sq;
2219+
CUDA_CHECK(cudaMemcpy(&primal_diff_sq, d_x_temp_diff_norm_result_, sizeof(double), cudaMemcpyDeviceToHost));
2220+
2221+
// 2. Compute ||y_new - y_old||^2
2222+
launchKernelDiffTwoNormSquared_wrapper(d_y_new, d_y_old, d_x_temp_diff_norm_result_, a_num_rows_);
2223+
double dual_diff_sq;
2224+
CUDA_CHECK(cudaMemcpy(&dual_diff_sq, d_x_temp_diff_norm_result_, sizeof(double), cudaMemcpyDeviceToHost));
2225+
2226+
// 3. Combine scalar results on CPU
2227+
double primal_weight = std::sqrt(stepsize_.beta);
2228+
return (0.5 * primal_weight * primal_diff_sq) +
2229+
(0.5 / primal_weight) * dual_diff_sq;
2230+
}
2231+
2232+
double PDLPSolver::computeNonlinearityGpu(const double* d_x_new, const double* d_x_old,
2233+
const double* d_aty_new, const double* d_aty_old) {
2234+
// Compute dot( (x_new - x_old), (aty_new - aty_old) )
2235+
launchKernelDiffDotDiff_wrapper(d_x_new, d_x_old, d_aty_new, d_aty_old,
2236+
d_x_temp_diff_norm_result_, a_num_cols_);
2237+
2238+
double interaction;
2239+
CUDA_CHECK(cudaMemcpy(&interaction, d_x_temp_diff_norm_result_, sizeof(double), cudaMemcpyDeviceToHost));
2240+
2241+
return interaction; // cupdlp does not take absolute value here, it handles fabs in the check
2242+
}
21872243
#endif

highs/pdlp/hipdlp/pdhg.cu

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,20 @@ __global__ void kernelDiffTwoNormSquared(
194194
atomicAdd(result, local_diff_sq);
195195
}
196196

197+
// Computes sum( (a_new[i] - a_old[i]) * (b_new[i] - b_old[i]) )
198+
__global__ void kernelDiffDotDiff(
199+
const double* a_new, const double* a_old,
200+
const double* b_new, const double* b_old,
201+
double* result, int n)
202+
{
203+
double local_sum = 0.0;
204+
CUDA_GRID_STRIDE_LOOP(i, n) {
205+
double diff_a = a_new[i] - a_old[i];
206+
double diff_b = b_new[i] - b_old[i];
207+
local_sum += diff_a * diff_b;
208+
}
209+
atomicAdd(result, local_sum);
210+
}
197211

198212
// Add C++ wrapper functions to launch the kernels
199213
extern "C" {
@@ -302,4 +316,18 @@ void launchKernelDiffTwoNormSquared_wrapper(
302316
kernelDiffTwoNormSquared<<<config.x, block_size>>>(d_a, d_b, d_result, n);
303317
cudaGetLastError();
304318
}
319+
320+
void launchKernelDiffDotDiff_wrapper(
321+
const double* d_a_new, const double* d_a_old,
322+
const double* d_b_new, const double* d_b_old,
323+
double* d_result, int n)
324+
{
325+
cudaMemset(d_result, 0, sizeof(double));
326+
const int block_size = 256;
327+
dim3 config = GetLaunchConfig(n, block_size);
328+
329+
kernelDiffDotDiff<<<config.x, block_size>>>(
330+
d_a_new, d_a_old, d_b_new, d_b_old, d_result, n);
331+
cudaGetLastError();
332+
}
305333
} // extern "C"

highs/pdlp/hipdlp/pdhg.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,11 @@ class PDLPSolver {
265265
void computeStepSizeRatioGpu(PrimalDualParams& working_params);
266266
void updateAverageIteratesGpu(int inner_iter);
267267
void computeAverageIterateGpu();
268+
double computeMovementGpu(const double* d_x_new, const double* d_x_old,
269+
const double* d_y_new, const double* d_y_old);
270+
271+
double computeNonlinearityGpu(const double* d_x_new, const double* d_x_old,
272+
const double* d_aty_new, const double* d_aty_old);
268273
#endif
269274
};
270275

highs/pdlp/hipdlp/pdhg_kernels.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ void launchCheckConvergenceKernels_wrapper(
3737

3838
void launchKernelDiffTwoNormSquared_wrapper(
3939
const double* d_a, const double* d_b, double* d_result, int n);
40+
41+
void launchKernelDiffDotDiff_wrapper(
42+
const double* d_a_new, const double* d_a_old,
43+
const double* d_b_new, const double* d_b_old,
44+
double* d_result, int n);
4045
#ifdef __cplusplus
4146
}
4247
#endif

0 commit comments

Comments
 (0)