Skip to content

Commit 9dd3028

Browse files
committed
resolving conflicts
2 parents 6686171 + a0ab3da commit 9dd3028

File tree

9 files changed

+74
-136
lines changed

9 files changed

+74
-136
lines changed

Common/include/linear_algebra/CMatrixVectorProduct.hpp

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,6 @@
5050
* handle the different types of matrix-vector products and still be
5151
* passed to a single implementation of the Krylov solvers.
5252
* This abstraction may also be used to define matrix-free products.
53-
*
54-
* There is also the use of a dummy class being made to select the
55-
* correct function as defined by the user while deciding between
56-
* CPU or GPU execution. This dummy class calls the correct member
57-
* functions from its derived classes to map the suitable path of
58-
* execution - CPU or GPU.
5953
*/
6054

6155
template <class ScalarType>
@@ -101,14 +95,17 @@ class CSysMatrixVectorProduct final : public CMatrixVectorProduct<ScalarType> {
10195
* \param[out] v - CSysVector that is the result of the product
10296
*/
10397
inline void operator()(const CSysVector<ScalarType>& u, CSysVector<ScalarType>& v) const override {
104-
#ifdef HAVE_CUDA
10598
if (config->GetCUDA()) {
99+
#ifdef HAVE_CUDA
106100
matrix.GPUMatrixVectorProduct(u, v, geometry, config);
101+
#else
102+
SU2_MPI::Error(
103+
"\nError in launching Matrix-Vector Product Function\nENABLE_CUDA is set to YES\nPlease compile with CUDA "
104+
"options enabled in Meson to access GPU Functions",
105+
CURRENT_FUNCTION);
106+
#endif
107107
} else {
108108
matrix.MatrixVectorProduct(u, v, geometry, config);
109109
}
110-
#else
111-
matrix.MatrixVectorProduct(u, v, geometry, config);
112-
#endif
113110
}
114111
};

Common/include/linear_algebra/CSysMatrix.hpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,8 @@ class CSysMatrix {
148148
ScalarType* d_matrix; /*!< \brief Device Pointer to store the matrix values on the GPU. */
149149
const unsigned long* d_row_ptr; /*!< \brief Device Pointers to the first element in each row. */
150150
const unsigned long* d_col_ind; /*!< \brief Device Column index for each of the elements in val(). */
151-
const unsigned long* d_dia_ptr; /*!< \brief Device Column index for each of the elements in val(). */
151+
bool useCuda; /*!< \brief Boolean that indicates whether user has enabled CUDA or not.
152+
Mainly used to conditionally free GPU memory in the class destructor. */
152153

153154
ScalarType* ILU_matrix; /*!< \brief Entries of the ILU sparse matrix. */
154155
unsigned long nnz_ilu; /*!< \brief Number of possible nonzero entries in the matrix (ILU). */
@@ -856,11 +857,38 @@ class CSysMatrix {
856857
* \param[in] config - Definition of the particular problem.
857858
* \param[out] prod - Result of the product.
858859
*/
859-
860860
void GPUMatrixVectorProduct(const CSysVector<ScalarType>& vec, CSysVector<ScalarType>& prod, CGeometry* geometry,
861861
const CConfig* config) const;
862862

863863
/*!
864+
<<<<<<< HEAD
865+
=======
866+
* \brief Performs first step of the LU_SGS Preconditioner building
867+
* \param[in] vec - CSysVector to be multiplied by the sparse matrix A.
868+
* \param[in] geometry - Geometrical definition of the problem.
869+
* \param[in] config - Definition of the particular problem.
870+
* \param[out] prod - Result of the product.
871+
*/
872+
void GPUFirstSymmetricIteration(ScalarType& vec, ScalarType& prod, CGeometry* geometry, const CConfig* config) const;
873+
874+
/*!
875+
* \brief Performs second step of the LU_SGS Preconditioner building
876+
* \param[in] geometry - Geometrical definition of the problem.
877+
* \param[in] config - Definition of the particular problem.
878+
* \param[out] prod - Result of the product.
879+
*/
880+
void GPUSecondSymmetricIteration(ScalarType& prod, CGeometry* geometry, const CConfig* config) const;
881+
882+
/*!
883+
* \brief Performs Gaussian Elimination between diagional blocks of the matrix and the prod vector
884+
* \param[in] geometry - Geometrical definition of the problem.
885+
* \param[in] config - Definition of the particular problem.
886+
* \param[out] prod - Result of the product.
887+
*/
888+
void GPUGaussElimination(ScalarType& prod, CGeometry* geometry, const CConfig* config) const;
889+
890+
/*!
891+
>>>>>>> upstream/develop
864892
* \brief Multiply CSysVector by the preconditioner all of which are stored on the device
865893
* \param[in] vec - CSysVector to be multiplied by the preconditioner.
866894
* \param[out] prod - Result of the product A*vec.

Common/include/linear_algebra/CSysVector.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
7272
unsigned long nElmDomain = 0; /*!< \brief Total number of elements without Ghost cells. */
7373
unsigned long nVar = 1; /*!< \brief Number of elements in a block. */
7474

75-
ScalarType* d_vec_val; /*!< \brief Device Pointer to store the vector values on the GPU. */
75+
ScalarType* d_vec_val = nullptr; /*!< \brief Device Pointer to store the vector values on the GPU. */
7676

7777
/*!
7878
* \brief Generic initialization from a scalar or array.
@@ -217,7 +217,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
217217
void GPUSetVal(ScalarType val, bool trigger = true) const;
218218

219219
/*!
220-
* \brief return the number of local elements in the CSysVector
220+
* \brief return device pointer that points to the CSysVector values in GPU memory
221221
*/
222222
inline ScalarType* GetDevicePointer() const { return d_vec_val; }
223223

Common/include/linear_algebra/GPUComms.cuh

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
\file GPUComms.cuh
33
* \brief Header file containing universal functions that provide basic and essential utilities for other GPU processes
44
* \author A. Raj
5-
* \version 8.1.0 "Harrier"
5+
* \version 8.2.0 "Harrier"
66
*
77
* SU2 Project Website: https://su2code.github.io
88
*
@@ -26,7 +26,7 @@
2626
*/
2727

2828
#include<cuda_runtime.h>
29-
#include"iostream"
29+
#include<iostream>
3030

3131
namespace KernelParameters{
3232

@@ -36,21 +36,19 @@ namespace KernelParameters{
3636
/*Returns the rounded down value of the decimal quotient to the previous integer (in all cases)*/
3737
inline constexpr int rounded_down_division(const int divisor, int dividend) { return ((dividend - divisor + 1) / divisor); }
3838

39-
const int MVP_BLOCK_SIZE = 1024;
40-
const int MVP_WARP_SIZE = 32;
39+
static constexpr int MVP_BLOCK_SIZE = 1024;
40+
static constexpr int MVP_WARP_SIZE = 32;
4141
}
4242
/*!
43-
* \brief assert style function that reads return codes after intercepting CUDA API calls.
44-
* It returns the result code and its location if the call is unsuccessful.
45-
* \param[in] code - result code of CUDA function
46-
* \param[in] file - name of file holding the function
47-
* \param[in] line - line containing the function
48-
*/
43+
* \brief assert style function that reads return codes after intercepting CUDA API calls.
44+
* It returns the result code and its location if the call is unsuccessful.
45+
* \param[in] code - result code of CUDA function
46+
* \param[in] file - name of file holding the function
47+
* \param[in] line - line containing the function
48+
*/
4949

50-
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
51-
{
52-
if (code != cudaSuccess)
53-
{
50+
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true){
51+
if (code != cudaSuccess){
5452
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
5553
if (abort) exit(code);
5654
}

Common/include/toolboxes/allocation_toolbox.hpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,15 +132,12 @@ inline void gpu_free(T* ptr) noexcept {
132132
* \return Pointer to memory, always use gpu_free to deallocate.
133133
*/
134134
template <class T>
135-
inline T* gpu_alloc_cpy(T* src_ptr, size_t size) noexcept {
135+
inline T* gpu_alloc_cpy(const T* src_ptr, size_t size) noexcept {
136136
void* ptr = nullptr;
137137

138138
#ifdef HAVE_CUDA
139139
gpuErrChk(cudaMalloc((void**)(&ptr), size));
140140
gpuErrChk(cudaMemcpy((void*)(ptr), (void*)src_ptr, size, cudaMemcpyHostToDevice));
141-
;
142-
#else
143-
return 0;
144141
#endif
145142

146143
return static_cast<T*>(ptr);

Common/src/linear_algebra/CSysMatrix.cpp

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,11 @@ CSysMatrix<ScalarType>::~CSysMatrix() {
6868
MemoryAllocation::aligned_free(matrix);
6969
MemoryAllocation::aligned_free(invM);
7070

71-
GPUMemoryAllocation::gpu_free(d_matrix);
72-
GPUMemoryAllocation::gpu_free(d_row_ptr);
73-
GPUMemoryAllocation::gpu_free(d_col_ind);
71+
if (useCuda) {
72+
GPUMemoryAllocation::gpu_free(d_matrix);
73+
GPUMemoryAllocation::gpu_free(d_row_ptr);
74+
GPUMemoryAllocation::gpu_free(d_col_ind);
75+
}
7476

7577
#ifdef USE_MKL
7678
mkl_jit_destroy(MatrixMatrixProductJitter);
@@ -142,19 +144,23 @@ void CSysMatrix<ScalarType>::Initialize(unsigned long npoint, unsigned long npoi
142144

143145
allocAndInit(matrix, nnz * nVar * nEqn);
144146

145-
auto GPUAllocAndInit = [](ScalarType*& ptr, unsigned long num) {
146-
ptr = GPUMemoryAllocation::gpu_alloc<ScalarType, true>(num * sizeof(ScalarType));
147-
};
147+
useCuda = config->GetCUDA();
148148

149-
auto GPUAllocAndCopy = [](const unsigned long*& ptr, const unsigned long*& src_ptr, unsigned long num) {
150-
ptr = GPUMemoryAllocation::gpu_alloc_cpy<const unsigned long>(src_ptr, num * sizeof(const unsigned long));
151-
};
149+
if (useCuda) {
150+
/*--- Allocate GPU data. ---*/
151+
auto GPUAllocAndInit = [](ScalarType*& ptr, unsigned long num) {
152+
ptr = GPUMemoryAllocation::gpu_alloc<ScalarType, true>(num * sizeof(ScalarType));
153+
};
154+
155+
auto GPUAllocAndCopy = [](const unsigned long*& ptr, const unsigned long*& src_ptr, unsigned long num) {
156+
ptr = GPUMemoryAllocation::gpu_alloc_cpy<const unsigned long>(src_ptr, num * sizeof(const unsigned long));
157+
};
158+
159+
GPUAllocAndInit(d_matrix, nnz * nVar * nEqn);
160+
GPUAllocAndCopy(d_row_ptr, row_ptr, (nPointDomain + 1.0));
161+
GPUAllocAndCopy(d_col_ind, col_ind, nnz);
162+
}
152163

153-
GPUAllocAndInit(d_matrix, nnz * nVar * nEqn);
154-
GPUAllocAndCopy(d_row_ptr, row_ptr, (nPointDomain + 1.0));
155-
GPUAllocAndCopy(d_col_ind, col_ind, nnz);
156-
GPUAllocAndCopy(d_dia_ptr, dia_ptr, nPointDomain);
157-
158164
if (needTranspPtr) col_ptr = geometry->GetTransposeSparsePatternMap(type).data();
159165

160166
if (type == ConnectivityType::FiniteVolume) {

Common/src/linear_algebra/CSysMatrixGPU.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* \file CSysMatrixGPU.cu
33
* \brief Implementations of Kernels and Functions for Matrix Operations on the GPU
44
* \author A. Raj
5-
* \version 8.1.0 "Harrier"
5+
* \version 8.2.0 "Harrier"
66
*
77
* SU2 Project Website: https://su2code.github.io
88
*

Common/src/linear_algebra/CSysVectorGPU.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* \file CSysVectorGPU.cu
33
* \brief Implementations of Kernels and Functions for Vector Operations on the GPU
44
* \author A. Raj
5-
* \version 8.1.0 "Harrier"
5+
* \version 8.2.0 "Harrier"
66
*
77
* SU2 Project Website: https://su2code.github.io
88
*

TestCases/gpu/flatplate/lam_flatplate.cfg

Lines changed: 0 additions & 88 deletions
This file was deleted.

0 commit comments

Comments
 (0)