Minor Changes and Version Number Fixes

areenraj · areenraj · commit c408c5b65377 · 2025-06-23T09:27:50.000+05:30
diff --git a/Common/include/linear_algebra/CMatrixVectorProduct.hpp b/Common/include/linear_algebra/CMatrixVectorProduct.hpp
@@ -50,12 +50,6 @@
  * handle the different types of matrix-vector products and still be
  * passed to a single implementation of the Krylov solvers.
  * This abstraction may also be used to define matrix-free products.
- *
- * There is also the use of a dummy class being made to select the
- * correct function as defined by the user while deciding between
- * CPU or GPU execution. This dummy class calls the correct member
- * functions from its derived classes to map the suitable path of
- * execution - CPU or GPU.
  */
 
 template <class ScalarType>
@@ -101,14 +95,20 @@ class CSysMatrixVectorProduct final : public CMatrixVectorProduct<ScalarType> {
    * \param[out] v - CSysVector that is the result of the product
    */
   inline void operator()(const CSysVector<ScalarType>& u, CSysVector<ScalarType>& v) const override {
-#ifdef HAVE_CUDA
     if (config->GetCUDA()) {
-      matrix.GPUMatrixVectorProduct(u, v, geometry, config);
-    } else {
+      #ifdef HAVE_CUDA
+        matrix.GPUMatrixVectorProduct(u, v, geometry, config);
+      #else
+        SU2_MPI::Error(
+        "\nError in launching Matrix-Vector Product Function\nENABLE_CUDA is set to YES\nPlease compile with CUDA "
+        "options enabled in Meson to access GPU Functions",
+        CURRENT_FUNCTION);
+      #endif
+  }
+    else
+    {
       matrix.MatrixVectorProduct(u, v, geometry, config);
     }
-#else
-    matrix.MatrixVectorProduct(u, v, geometry, config);
-#endif
+    
   }
-};
+};
diff --git a/Common/include/linear_algebra/CSysMatrix.hpp b/Common/include/linear_algebra/CSysMatrix.hpp
@@ -855,7 +855,6 @@ class CSysMatrix {
    * \param[in] config - Definition of the particular problem.
    * \param[out] prod - Result of the product.
    */
-
   void GPUMatrixVectorProduct(const CSysVector<ScalarType>& vec, CSysVector<ScalarType>& prod, CGeometry* geometry,
                               const CConfig* config) const;
 
@@ -866,7 +865,6 @@ class CSysMatrix {
    * \param[in] config - Definition of the particular problem.
    * \param[out] prod - Result of the product.
    */
-
   void GPUFirstSymmetricIteration(ScalarType& vec, ScalarType& prod, CGeometry* geometry, const CConfig* config) const;
 
   /*!
@@ -875,7 +873,6 @@ class CSysMatrix {
    * \param[in] config - Definition of the particular problem.
    * \param[out] prod - Result of the product.
    */
-
   void GPUSecondSymmetricIteration(ScalarType& prod, CGeometry* geometry, const CConfig* config) const;
 
   /*!
@@ -884,7 +881,6 @@ class CSysMatrix {
    * \param[in] config - Definition of the particular problem.
    * \param[out] prod - Result of the product.
    */
-
   void GPUGaussElimination(ScalarType& prod, CGeometry* geometry, const CConfig* config) const;
 
   /*!
diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp
@@ -217,7 +217,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
   void GPUSetVal(ScalarType val, bool trigger = true) const;
 
   /*!
-   * \brief return the number of local elements in the CSysVector
+   * \brief return device pointer that points to the CSysVector values in GPU memory
    */
   inline ScalarType* GetDevicePointer() const { return d_vec_val; }
 
diff --git a/Common/include/linear_algebra/GPUComms.cuh b/Common/include/linear_algebra/GPUComms.cuh
@@ -2,7 +2,7 @@
 \file GPUComms.cuh
 * \brief Header file containing universal functions that provide basic and essential utilities for other GPU processes
 * \author A. Raj
-* \version 8.1.0 "Harrier"
+* \version 8.2.0 "Harrier"
 *
 * SU2 Project Website: https://su2code.github.io
 *
@@ -26,27 +26,25 @@
 */
 
 #include<cuda_runtime.h>
-#include"iostream"
+#include<iostream>
 
 namespace KernelParameters{
 
-  inline constexpr int round_up_division(const int multiple, int x) { return ((x + multiple - 1) / multiple); }
+inline constexpr int round_up_division(const int multiple, int x) { return ((x + multiple - 1) / multiple); }
 
-  const int MVP_BLOCK_SIZE = 1024;
-  const int MVP_WARP_SIZE = 32;
+static constexpr int MVP_BLOCK_SIZE = 1024;
+static constexpr int MVP_WARP_SIZE = 32;
 }
 /*!
-  * \brief assert style function that reads return codes after intercepting CUDA API calls.
-  *        It returns the result code and its location if the call is unsuccessful.
-  * \param[in] code - result code of CUDA function
-  * \param[in] file - name of file holding the function
-  * \param[in] line - line containing the function
-  */
+* \brief assert style function that reads return codes after intercepting CUDA API calls.
+*        It returns the result code and its location if the call is unsuccessful.
+* \param[in] code - result code of CUDA function
+* \param[in] file - name of file holding the function
+* \param[in] line - line containing the function
+*/
 
-inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
-{
-  if (code != cudaSuccess)
-  {
+inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true){
+  if (code != cudaSuccess){
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   }
diff --git a/Common/include/toolboxes/allocation_toolbox.hpp b/Common/include/toolboxes/allocation_toolbox.hpp
@@ -132,15 +132,12 @@ inline void gpu_free(T* ptr) noexcept {
  * \return Pointer to memory, always use gpu_free to deallocate.
  */
 template <class T>
-inline T* gpu_alloc_cpy(T* src_ptr, size_t size) noexcept {
+inline T* gpu_alloc_cpy(const T* src_ptr, size_t size) noexcept {
   void* ptr = nullptr;
 
 #ifdef HAVE_CUDA
   gpuErrChk(cudaMalloc((void**)(&ptr), size));
   gpuErrChk(cudaMemcpy((void*)(ptr), (void*)src_ptr, size, cudaMemcpyHostToDevice));
-  ;
-#else
-  return 0;
 #endif
 
   return static_cast<T*>(ptr);
diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp
@@ -140,19 +140,25 @@ void CSysMatrix<ScalarType>::Initialize(unsigned long npoint, unsigned long npoi
     ptr = MemoryAllocation::aligned_alloc<ScalarType, true>(64, num * sizeof(ScalarType));
   };
 
-  allocAndInit(matrix, nnz * nVar * nEqn);
 
-  auto GPUAllocAndInit = [](ScalarType*& ptr, unsigned long num) {
-    ptr = GPUMemoryAllocation::gpu_alloc<ScalarType, true>(num * sizeof(ScalarType));
-  };
+  if(config->GetCUDA())
+  {
+    /*--- Allocate GPU data. ---*/
+    allocAndInit(matrix, nnz * nVar * nEqn);
 
-  auto GPUAllocAndCopy = [](const unsigned long*& ptr, const unsigned long*& src_ptr, unsigned long num) {
-    ptr = GPUMemoryAllocation::gpu_alloc_cpy<const unsigned long>(src_ptr, num * sizeof(const unsigned long));
-  };
+    auto GPUAllocAndInit = [](ScalarType*& ptr, unsigned long num) {
+      ptr = GPUMemoryAllocation::gpu_alloc<ScalarType, true>(num * sizeof(ScalarType));
+    };
 
-  GPUAllocAndInit(d_matrix, nnz * nVar * nEqn);
-  GPUAllocAndCopy(d_row_ptr, row_ptr, (nPointDomain + 1.0));
-  GPUAllocAndCopy(d_col_ind, col_ind, nnz);
+    auto GPUAllocAndCopy = [](const unsigned long*& ptr, const unsigned long*& src_ptr, unsigned long num) {
+      ptr = GPUMemoryAllocation::gpu_alloc_cpy<const unsigned long>(src_ptr, num * sizeof(const unsigned long));
+    };
+
+    GPUAllocAndInit(d_matrix, nnz * nVar * nEqn);
+    GPUAllocAndCopy(d_row_ptr, row_ptr, (nPointDomain + 1.0));
+    GPUAllocAndCopy(d_col_ind, col_ind, nnz);
+  }
+  
 
   if (needTranspPtr) col_ptr = geometry->GetTransposeSparsePatternMap(type).data();
 
diff --git a/Common/src/linear_algebra/CSysMatrixGPU.cu b/Common/src/linear_algebra/CSysMatrixGPU.cu
@@ -2,7 +2,7 @@
  * \file CSysMatrixGPU.cu
  * \brief Implementations of Kernels and Functions for Matrix Operations on the GPU
  * \author A. Raj
- * \version 8.1.0 "Harrier"
+ * \version 8.2.0 "Harrier"
  *
  * SU2 Project Website: https://su2code.github.io
  *
diff --git a/Common/src/linear_algebra/CSysVectorGPU.cu b/Common/src/linear_algebra/CSysVectorGPU.cu
@@ -2,7 +2,7 @@
  * \file CSysVectorGPU.cu
  * \brief Implementations of Kernels and Functions for Vector Operations on the GPU
  * \author A. Raj
- * \version 8.1.0 "Harrier"
+ * \version 8.2.0 "Harrier"
  *
  * SU2 Project Website: https://su2code.github.io
  *
diff --git a/TestCases/gpu/flatplate/lam_flatplate.cfg b/TestCases/gpu/flatplate/lam_flatplate.cfg

Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@`
`2`	`2`	`* \file CSysMatrixGPU.cu`
`3`	`3`	`* \brief Implementations of Kernels and Functions for Matrix Operations on the GPU`
`4`	`4`	`* \author A. Raj`
`5`		`- * \version 8.1.0 "Harrier"`
	`5`	`+ * \version 8.2.0 "Harrier"`
`6`	`6`	`*`
`7`	`7`	`* SU2 Project Website: https://su2code.github.io`
`8`	`8`	`*`
Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@`
`2`	`2`	`* \file CSysVectorGPU.cu`
`3`	`3`	`* \brief Implementations of Kernels and Functions for Vector Operations on the GPU`
`4`	`4`	`* \author A. Raj`
`5`		`- * \version 8.1.0 "Harrier"`
	`5`	`+ * \version 8.2.0 "Harrier"`
`6`	`6`	`*`
`7`	`7`	`* SU2 Project Website: https://su2code.github.io`
`8`	`8`	`*`