@@ -715,8 +715,14 @@ void PDLPSolver::solve(std::vector<double>& x, std::vector<double>& y) {
715715 // Perform the primal weight update using z^{n,0} and z^{n-1,0}
716716#ifdef CUPDLP_GPU
717717 computeStepSizeRatioGpu (working_params);
718- #endif
718+ double cpu_beta = stepsize_.beta ;
719+ double cpu_primal_step = stepsize_.primal_step ;
720+ double cpu_dual_step = stepsize_.dual_step ;
721+ double cpu_eta = working_params.eta ;
722+ double cpu_omega = working_params.omega ;
723+ #else
719724 computeStepSizeRatio (working_params);
725+ #endif
720726 current_eta_ = working_params.eta ;
721727 restart_scheme_.passParams (&working_params);
722728
@@ -1641,6 +1647,25 @@ void PDLPSolver::updateIteratesAdaptive() {
16411647 double primal_step_update = dStepSizeUpdate / std::sqrt (stepsize_.beta );
16421648 double dual_step_update = dStepSizeUpdate * std::sqrt (stepsize_.beta );
16431649
1650+ #ifdef CUPDLP_GPU
1651+ launchKernelUpdateX_wrapper (
1652+ d_x_next_, // Output (Trial)
1653+ d_x_current_, // Input (Base)
1654+ d_aty_current_, // Input (Base ATy)
1655+ d_col_cost_, d_col_lower_, d_col_upper_,
1656+ primal_step_update, a_num_cols_);
1657+ linalgGpuAx (d_x_next_, d_ax_next_);
1658+ launchKernelUpdateY_wrapper (
1659+ d_y_next_, // Output (Trial)
1660+ d_y_current_, // Input (Base)
1661+ d_ax_current_, // Input (Base Ax)
1662+ d_ax_next_, // Input (Trial Ax)
1663+ d_row_lower_, d_is_equality_row_,
1664+ dual_step_update, a_num_rows_);
1665+ linalgGpuATy (d_y_next_, d_aty_next_);
1666+ double movement = computeMovementGpu (d_x_next_, d_x_current_, d_y_next_, d_y_current_);
1667+ double nonlinearity = computeNonlinearityGpu (d_x_next_, d_x_current_, d_aty_next_, d_aty_current_);
1668+ #else
16441669 // Primal update
16451670 hipdlpTimerStart (kHipdlpClockProjectX );
16461671 xupdate = updateX (x_candidate, aty_candidate, primal_step_update);
@@ -1686,6 +1711,7 @@ void PDLPSolver::updateIteratesAdaptive() {
16861711 // Compute movement and nonlinearity
16871712 double movement = computeMovement (delta_x, delta_y);
16881713 double nonlinearity = computeNonlinearity (delta_x, delta_aty);
1714+ #endif
16891715 // Compute step size limit
16901716 double step_size_limit = (nonlinearity != 0.0 )
16911717 ? (movement / std::fabs (nonlinearity))
@@ -2184,4 +2210,34 @@ void PDLPSolver::computeAverageIterateGpu() {
21842210 linalgGpuAx (d_x_avg_, d_ax_avg_);
21852211 linalgGpuATy (d_y_avg_, d_aty_avg_);
21862212}
2213+
2214+ double PDLPSolver::computeMovementGpu (const double * d_x_new, const double * d_x_old,
2215+ const double * d_y_new, const double * d_y_old) {
2216+ // 1. Compute ||x_new - x_old||^2
2217+ launchKernelDiffTwoNormSquared_wrapper (d_x_new, d_x_old, d_x_temp_diff_norm_result_, a_num_cols_);
2218+ double primal_diff_sq;
2219+ CUDA_CHECK (cudaMemcpy (&primal_diff_sq, d_x_temp_diff_norm_result_, sizeof (double ), cudaMemcpyDeviceToHost));
2220+
2221+ // 2. Compute ||y_new - y_old||^2
2222+ launchKernelDiffTwoNormSquared_wrapper (d_y_new, d_y_old, d_x_temp_diff_norm_result_, a_num_rows_);
2223+ double dual_diff_sq;
2224+ CUDA_CHECK (cudaMemcpy (&dual_diff_sq, d_x_temp_diff_norm_result_, sizeof (double ), cudaMemcpyDeviceToHost));
2225+
2226+ // 3. Combine scalar results on CPU
2227+ double primal_weight = std::sqrt (stepsize_.beta );
2228+ return (0.5 * primal_weight * primal_diff_sq) +
2229+ (0.5 / primal_weight) * dual_diff_sq;
2230+ }
2231+
2232+ double PDLPSolver::computeNonlinearityGpu (const double * d_x_new, const double * d_x_old,
2233+ const double * d_aty_new, const double * d_aty_old) {
2234+ // Compute dot( (x_new - x_old), (aty_new - aty_old) )
2235+ launchKernelDiffDotDiff_wrapper (d_x_new, d_x_old, d_aty_new, d_aty_old,
2236+ d_x_temp_diff_norm_result_, a_num_cols_);
2237+
2238+ double interaction;
2239+ CUDA_CHECK (cudaMemcpy (&interaction, d_x_temp_diff_norm_result_, sizeof (double ), cudaMemcpyDeviceToHost));
2240+
2241+ return interaction; // cupdlp does not take absolute value here, it handles fabs in the check
2242+ }
21872243#endif
0 commit comments