@@ -571,6 +571,7 @@ void PDLPSolver::solve(std::vector<double>& x, std::vector<double>& y) {
571571 CUDA_CHECK(cudaMemcpy(Ax_cache_.data(), d_ax_current_, a_num_rows_ * sizeof(double), cudaMemcpyDeviceToHost));
572572 CUDA_CHECK(cudaMemcpy(ATy_cache_.data(), d_aty_current_, a_num_cols_ * sizeof(double), cudaMemcpyDeviceToHost));
573573*/
574+ /*
574575 CUDA_CHECK(cudaMemcpy(x_sum_.data(), d_x_sum_, a_num_cols_ * sizeof(double), cudaMemcpyDeviceToHost));
575576 CUDA_CHECK(cudaMemcpy(y_sum_.data(), d_y_sum_, a_num_rows_ * sizeof(double), cudaMemcpyDeviceToHost));
576577 CUDA_CHECK(cudaMemcpy(x_avg_.data(), d_x_avg_, a_num_cols_ * sizeof(double), cudaMemcpyDeviceToHost));
@@ -582,6 +583,8 @@ void PDLPSolver::solve(std::vector<double>& x, std::vector<double>& y) {
582583
583584 linalgGpuAx(d_x_avg_, d_ax_avg_);
584585 linalgGpuATy(d_y_avg_, d_aty_avg_);
586+ */
587+ computeAverageIterateGpu ();
585588#endif
586589 hipdlpTimerStart (kHipdlpClockAverageIterate );
587590 computeAverageIterate (Ax_avg, ATy_avg);
@@ -820,9 +823,7 @@ void PDLPSolver::solve(std::vector<double>& x, std::vector<double>& y) {
820823
821824#ifdef CUPDLP_GPU
822825 // Update average iterates on GPU
823- double dMeanStepSize = std::sqrt (stepsize_.primal_step * stepsize_.dual_step );
824- launchKernelUpdateAverages (dMeanStepSize);
825- sum_weights_gpu_ += dMeanStepSize;
826+ updateAverageIteratesGpu (inner_iter);
826827#endif
827828
828829 // --- 7. Prepare for next iteration ---
@@ -2156,4 +2157,26 @@ void PDLPSolver::computeStepSizeRatioGpu(PrimalDualParams& working_params) {
21562157 working_params.eta = std::sqrt (stepsize_.primal_step * stepsize_.dual_step );
21572158 working_params.omega = std::sqrt (stepsize_.beta );
21582159 restart_scheme_.UpdateBeta (stepsize_.beta );
2160+ }
2161+
2162+ void PDLPSolver::updateAverageIteratesGpu (int inner_iter) {
2163+ double dMeanStepSize = std::sqrt (stepsize_.primal_step * stepsize_.dual_step );
2164+
2165+ launchKernelUpdateAverages (dMeanStepSize);
2166+
2167+ sum_weights_gpu_ += dMeanStepSize;
2168+ }
2169+
2170+ void PDLPSolver::computeAverageIterateGpu () {
2171+ double dScale = sum_weights_gpu_ > 1e-10 ? 1.0 / sum_weights_gpu_ : 1.0 ;
2172+
2173+ // x_avg = x_sum * scale
2174+ launchKernelScaleVector (d_x_avg_, d_x_sum_, dScale, a_num_cols_);
2175+
2176+ // y_avg = y_sum * scale
2177+ launchKernelScaleVector (d_y_avg_, d_y_sum_, dScale, a_num_rows_);
2178+
2179+ // Recompute Ax_avg and ATy_avg on GPU
2180+ linalgGpuAx (d_x_avg_, d_ax_avg_);
2181+ linalgGpuATy (d_y_avg_, d_aty_avg_);
21592182}
0 commit comments