@@ -574,7 +574,9 @@ void PDLPSolver::solve(std::vector<double>& x, std::vector<double>& y) {
574574 CUDA_CHECK (cudaMemcpy (x_avg_.data (), d_x_avg_, a_num_cols_ * sizeof (double ), cudaMemcpyDeviceToHost));
575575 CUDA_CHECK (cudaMemcpy (y_avg_.data (), d_y_avg_, a_num_rows_ * sizeof (double ), cudaMemcpyDeviceToHost));
576576
577-
577+ double dScale_gpu = sum_weights_gpu_ > 0.0 ? 1.0 / sum_weights_gpu_ : 0.0 ;
578+ launchKernelScaleVector (d_x_avg_, d_x_sum_, dScale_gpu, lp_.num_col_ );
579+ launchKernelScaleVector (d_y_avg_, d_y_sum_, dScale_gpu, lp_.num_row_ );
578580#endif
579581 hipdlpTimerStart (kHipdlpClockAverageIterate );
580582 computeAverageIterate (Ax_avg, ATy_avg);
@@ -671,7 +673,7 @@ void PDLPSolver::solve(std::vector<double>& x, std::vector<double>& y) {
671673 hipdlpTimerStart (kHipdlpClockMatrixMultiply );
672674 linalg::Ax (lp, x_current_, Ax_cache_);
673675
674- #ifdef CUPDLP_GPU
676+ #ifdef CUPDLP_GPU
675677 launchKernelUpdateX (stepsize_.primal_step );
676678 linalgGpuAx (d_x_next_, d_ax_next_);
677679 launchKernelUpdateY (stepsize_.dual_step );
@@ -1967,4 +1969,9 @@ void PDLPSolver::launchKernelUpdateAverages(double weight) {
19671969 d_x_next_, d_y_next_,
19681970 weight, a_num_cols_, a_num_rows_);
19691971 CUDA_CHECK (cudaGetLastError ());
1972+ }
1973+
1974+ void PDLPSolver::launchKernelScaleVector (double * d_out, const double * d_in, double scale, int n) {
1975+ launchKernelScaleVector_wrapper (d_out, d_in, scale, n);
1976+ CUDA_CHECK (cudaGetLastError ());
19701977}
0 commit comments