vanilla PDHG GPU checked

Yanyu000 · Yanyu000 · commit f3a510b74ea4 · 2025-11-19T16:28:23.000Z
diff --git a/check/TestPdlp.cpp b/check/TestPdlp.cpp
@@ -366,7 +366,7 @@ TEST_CASE("hi-pdlp", "[pdlp]") {
   h.setOptionValue("pdlp_scaling_mode", pdlp_scaling);
   h.setOptionValue("pdlp_step_size_strategy", 1);
   h.setOptionValue("pdlp_restart_strategy", 2);
-  h.setOptionValue("pdlp_iteration_limit", 10);
+  h.setOptionValue("pdlp_iteration_limit", 20);
   //  h.setOptionValue("log_dev_level", kHighsLogDevLevelVerbose);
   auto start_hipdlp = std::chrono::high_resolution_clock::now();
   HighsStatus run_status = h.run();
diff --git a/highs/pdlp/hipdlp/pdhg.cc b/highs/pdlp/hipdlp/pdhg.cc
@@ -574,7 +574,9 @@ void PDLPSolver::solve(std::vector<double>& x, std::vector<double>& y) {
       CUDA_CHECK(cudaMemcpy(x_avg_.data(), d_x_avg_, a_num_cols_ * sizeof(double), cudaMemcpyDeviceToHost));
       CUDA_CHECK(cudaMemcpy(y_avg_.data(), d_y_avg_, a_num_rows_ * sizeof(double), cudaMemcpyDeviceToHost));
 
-      
+      double dScale_gpu = sum_weights_gpu_ > 0.0 ? 1.0 / sum_weights_gpu_ : 0.0;
+      launchKernelScaleVector(d_x_avg_, d_x_sum_, dScale_gpu, lp_.num_col_);
+      launchKernelScaleVector(d_y_avg_, d_y_sum_, dScale_gpu, lp_.num_row_);
 #endif
       hipdlpTimerStart(kHipdlpClockAverageIterate);
       computeAverageIterate(Ax_avg, ATy_avg);
@@ -671,7 +673,7 @@ void PDLPSolver::solve(std::vector<double>& x, std::vector<double>& y) {
         hipdlpTimerStart(kHipdlpClockMatrixMultiply);
         linalg::Ax(lp, x_current_, Ax_cache_);
 
-#ifdef CUPDLP_GPU
+#ifdef CUPDLP_GPU 
         launchKernelUpdateX(stepsize_.primal_step);
         linalgGpuAx(d_x_next_, d_ax_next_);
         launchKernelUpdateY(stepsize_.dual_step);
@@ -1967,4 +1969,9 @@ void PDLPSolver::launchKernelUpdateAverages(double weight) {
       d_x_next_, d_y_next_,
       weight, a_num_cols_, a_num_rows_);
   CUDA_CHECK(cudaGetLastError());
+}
+
+void PDLPSolver::launchKernelScaleVector(double* d_out, const double* d_in, double scale, int n) {
+    launchKernelScaleVector_wrapper(d_out, d_in, scale, n);
+    CUDA_CHECK(cudaGetLastError());
 }
diff --git a/highs/pdlp/hipdlp/pdhg.cu b/highs/pdlp/hipdlp/pdhg.cu
@@ -65,6 +65,15 @@ __global__ void kernelUpdateAverages(
   }
 }
 
+__global__ void kernelScaleVector(
+    double* d_out, const double* d_in, 
+    double scale, int n)
+{
+  CUDA_GRID_STRIDE_LOOP(i, n) {
+    d_out[i] = d_in[i] * scale;
+  }
+}
+
 // Add C++ wrapper functions to launch the kernels
 extern "C" {
 void launchKernelUpdateX_wrapper(
@@ -115,4 +124,17 @@ void launchKernelUpdateAverages_wrapper(
         weight, n_cols, n_rows);
     cudaGetLastError();
 }
+
+void launchKernelScaleVector_wrapper(
+    double* d_out, const double* d_in, 
+    double scale, int n)
+{
+    const int block_size = 256;
+    dim3 config = GetLaunchConfig(n, block_size);
+    
+    kernelScaleVector<<<config.x, block_size>>>(
+        d_out, d_in, scale, n);
+    
+    cudaGetLastError();
+}
 } // extern "C"
diff --git a/highs/pdlp/hipdlp/pdhg.hpp b/highs/pdlp/hipdlp/pdhg.hpp
@@ -243,6 +243,7 @@ class PDLPSolver {
   void launchKernelUpdateX(double primal_step);
   void launchKernelUpdateY(double dual_step);
   void launchKernelUpdateAverages(double weight);
+  void launchKernelScaleVector(double* d_out, const double* d_in, double scale, int n);
 };
 
 #endif
diff --git a/highs/pdlp/hipdlp/pdhg_kernels.h b/highs/pdlp/hipdlp/pdhg_kernels.h
@@ -20,6 +20,10 @@ void launchKernelUpdateAverages_wrapper(
     const double* d_x_current, const double* d_y_current,
     double weight, int n_cols, int n_rows);
 
+void launchKernelScaleVector_wrapper(
+    double* d_out, const double* d_in, 
+    double scale, int n);
+
 #ifdef __cplusplus
 }
 #endif