updating branch

areenraj · areenraj · commit 66861713b564 · 2025-06-10T09:11:54.000+05:30
diff --git a/Common/include/CConfig.hpp b/Common/include/CConfig.hpp
@@ -1145,7 +1145,6 @@ class CConfig {
   su2double Theta_Interior_Penalty_DGFEM;    /*!< \brief Factor for the symmetrizing terms in the DG discretization of the viscous fluxes. */
   unsigned short byteAlignmentMatMul;        /*!< \brief Number of bytes in the vectorization direction for the matrix multiplication. Multipe of 64. */
   unsigned short sizeMatMulPadding;          /*!< \brief The matrix size in the vectorization direction padded to a multiple of 8. Computed from byteAlignmentMatMul. */
-  unsigned short gpuSizeMatMulPadding;
   bool Compute_Entropy;                      /*!< \brief Whether or not to compute the entropy in the fluid model. */
   bool Use_Lumped_MassMatrix_DGFEM;          /*!< \brief Whether or not to use the lumped mass matrix for DGFEM. */
   bool Jacobian_Spatial_Discretization_Only; /*!< \brief Flag to know if only the exact Jacobian of the spatial discretization must be computed. */
@@ -9070,8 +9069,6 @@ class CConfig {
    */
   unsigned short GetSizeMatMulPadding(void) const { return sizeMatMulPadding; }
 
-  unsigned short GetGPUSizeMatMulPadding(void) const { return gpuSizeMatMulPadding; }
-
   /*!
    * \brief Function to make available whether or not the entropy must be computed.
    * \return The boolean whether or not the entropy must be computed.
diff --git a/Common/include/linear_algebra/CMatrixVectorProduct.hpp b/Common/include/linear_algebra/CMatrixVectorProduct.hpp
@@ -58,17 +58,6 @@
  * execution - CPU or GPU.
  */
 
- /*!
- * \class CExecutionPath
- * \brief Dummy super class that holds the correct member functions in its child classes
- */
-template <class ScalarType>
-class CExecutionPath {
- public:
-  virtual void mat_vec_prod(const CSysVector<ScalarType>& u, CSysVector<ScalarType>& v, CGeometry* geometry,
-                            const CConfig* config, const CSysMatrix<ScalarType>& matrix) = 0;
-};
-
 template <class ScalarType>
 class CMatrixVectorProduct {
  public:
@@ -89,7 +78,6 @@ class CSysMatrixVectorProduct final : public CMatrixVectorProduct<ScalarType> {
   const CSysMatrix<ScalarType>& matrix; /*!< \brief pointer to matrix that defines the product. */
   CGeometry* geometry;                  /*!< \brief geometry associated with the matrix. */
   const CConfig* config;                /*!< \brief config of the problem. */
-  CExecutionPath<ScalarType>* exec;     /*!< \brief interface that decides which path of execution to choose from. */
 
  public:
   /*!
@@ -100,9 +88,7 @@ class CSysMatrixVectorProduct final : public CMatrixVectorProduct<ScalarType> {
    */
   inline CSysMatrixVectorProduct(const CSysMatrix<ScalarType>& matrix_ref, CGeometry* geometry_ref,
                                  const CConfig* config_ref)
-      : matrix(matrix_ref), geometry(geometry_ref), config(config_ref) {
-
-  }
+      : matrix(matrix_ref), geometry(geometry_ref), config(config_ref) {}
 
   /*!
    * \note This class cannot be default constructed as that would leave us with invalid pointers.
@@ -115,16 +101,14 @@ class CSysMatrixVectorProduct final : public CMatrixVectorProduct<ScalarType> {
    * \param[out] v - CSysVector that is the result of the product
    */
   inline void operator()(const CSysVector<ScalarType>& u, CSysVector<ScalarType>& v) const override {
-    #ifdef HAVE_CUDA
-    if(config->GetCUDA()) 
-    {
+#ifdef HAVE_CUDA
+    if (config->GetCUDA()) {
       matrix.GPUMatrixVectorProduct(u, v, geometry, config);
+    } else {
+      matrix.MatrixVectorProduct(u, v, geometry, config);
     }
-    else {
+#else
     matrix.MatrixVectorProduct(u, v, geometry, config);
-    }
-    #else
-    matrix.MatrixVectorProduct(u, v, geometry, config)
-    #endif
+#endif
   }
 };
diff --git a/Common/include/template_nvblas.conf b/Common/include/template_nvblas.conf
diff --git a/Common/src/CConfig.cpp b/Common/src/CConfig.cpp
@@ -2432,7 +2432,6 @@ void CConfig::SetConfig_Options() {
   /* DESCRIPTION: Number of aligned bytes for the matrix multiplications. Multiple of 64. (128 by default) */
   addUnsignedShortOption("ALIGNED_BYTES_MATMUL", byteAlignmentMatMul, 128);
 
-  addUnsignedShortOption("GPU Matrix Multiplication Size", gpuSizeMatMulPadding, 65472);
   /*!\par CONFIG_CATEGORY: FEA solver \ingroup Config*/
   /*--- Options related to the FEA solver ---*/
 
diff --git a/Common/src/linear_algebra/meson.build b/Common/src/linear_algebra/meson.build
@@ -2,7 +2,9 @@ common_src += files(['CSysSolve_b.cpp',
                      'CSysSolve.cpp',
                      'CSysVector.cpp',
                      'CSysMatrix.cpp',
-                     'CSysMatrixGPU.cu',
-                     'CSysVectorGPU.cu',
                      'CPastixWrapper.cpp',
                      'blas_structure.cpp'])
+
+  if get_option('enable-cuda')
+    common_src += files(['CSysMatrixGPU.cu', 'CSysVectorGPU.cu',])
+endif
diff --git a/SU2_CFD/src/fluid/CFluidFlamelet.cpp b/SU2_CFD/src/fluid/CFluidFlamelet.cpp
@@ -27,7 +27,7 @@
 
 #include <memory>
 #include <string>
-#include "../include/fluid/CFluidFlamelet.hpp"
+#include "../../include/fluid/CFluidFlamelet.hpp"
 #include "../../../Common/include/containers/CLookUpTable.hpp"
 #if defined(HAVE_MLPCPP)
 #include "../../../subprojects/MLPCpp/include/CLookUp_ANN.hpp"
diff --git a/SU2_CFD/src/solvers/CFEM_DG_NSSolver.cpp b/SU2_CFD/src/solvers/CFEM_DG_NSSolver.cpp
@@ -1671,8 +1671,8 @@ void CFEM_DG_NSSolver::ADER_DG_AliasedPredictorResidual_3D(CConfig
   /* Determine the offset between the r-derivatives and s-derivatives of the
      fluxes in the integration points and the offset between the r-derivatives
      and s-derivatives of the solution in the DOFs. */
-  const unsigned int offDerivSol    = NPad*nDOFs;
-  const unsigned int offDerivFluxes = NPad*nInt;
+  const unsigned short offDerivSol    = NPad*nDOFs;
+  const unsigned short offDerivFluxes = NPad*nInt;
 
   /* Store the number of metric points per integration point/DOF for readability. */
   const unsigned short nMetricPerPoint = 10;  /* nDim*nDim + 1. */
@@ -3869,7 +3869,7 @@ void CFEM_DG_NSSolver::ResidualFaces(CConfig             *config,
     /*--- Subtract half of the viscous fluxes from the inviscid fluxes. The
           factor 0.5 comes from the fact that the average of the viscous fluxes
           of side 0 and side 1 must be taken in the DG-FEM formulation. ---*/
-    for(unsigned int j=0; j<(NPad*nInt); ++j) fluxes[j] -= 0.5*viscFluxes[j];
+    for(unsigned short j=0; j<(NPad*nInt); ++j) fluxes[j] -= 0.5*viscFluxes[j];
 
     /*---------------------------*/
     /*--- Side 1 of the face. ---*/
diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp
@@ -3619,38 +3619,42 @@ void CSolver::LoadInletProfile(CGeometry **geometry,
     columnValue << setprecision(15);
     columnValue << std::scientific;
 
-    su2double p_total, t_total;
-    const su2double* flow_dir = nullptr;
+    // Set the variables to store the flow variables. For a subsonic inlet the total conditions
+    // are stored in p_value and t_value and the flow direction in flow_dir_or_vel, while for a
+    // supersonic inlet the static conditions are stored in p_value and t_value and the flow
+    // velocity in flow_dir_or_vel.
+    su2double p_value, t_value;
+    const su2double* flow_dir_or_vel = nullptr;
 
     if (KIND_MARKER == INLET_FLOW) {
-      p_total = config->GetInletPtotal(Marker_Tag);
-      t_total = config->GetInletTtotal(Marker_Tag);
-      flow_dir = config->GetInletFlowDir(Marker_Tag);
+      p_value = config->GetInletPtotal(Marker_Tag);
+      t_value = config->GetInletTtotal(Marker_Tag);
+      flow_dir_or_vel = config->GetInletFlowDir(Marker_Tag);
     } else if (KIND_MARKER == SUPERSONIC_INLET) {
-      p_total = config->GetInlet_Pressure(Marker_Tag);
-      t_total = config->GetInlet_Temperature(Marker_Tag);
-      flow_dir = config->GetInlet_Velocity(Marker_Tag);
+      p_value = config->GetInlet_Pressure(Marker_Tag);
+      t_value = config->GetInlet_Temperature(Marker_Tag);
+      flow_dir_or_vel = config->GetInlet_Velocity(Marker_Tag);
     } else {
       SU2_MPI::Error("Unsupported type of inlet.", CURRENT_FUNCTION);
     }
-    columnValue << t_total << "\t" << p_total <<"\t";
+    columnValue << t_value << "\t" << p_value << "\t";
     for (unsigned short iDim = 0; iDim < nDim; iDim++) {
-      columnValue << flow_dir[iDim] <<"\t";
+      columnValue << flow_dir_or_vel[iDim] << "\t";
     }
 
-    columnName << "# COORD-X  " << setw(24) << "COORD-Y    " << setw(24);
-    if(nDim==3) columnName << "COORD-Z    " << setw(24);
+    columnName << left << setw(24) << "# COORD-X" << left << setw(24) << "COORD-Y";
+    if (nDim == 3) columnName << left << setw(24) << "COORD-Z";
 
     if (KIND_MARKER == SUPERSONIC_INLET) {
-      columnName << "TEMPERATURE" << setw(24) << "PRESSURE   " << setw(24);
+      columnName << left << setw(24) << "TEMPERATURE" << left << setw(24) << "PRESSURE";
     } else if (config->GetKind_Regime() == ENUM_REGIME::COMPRESSIBLE) {
       switch (config->GetKind_Inlet()) {
         /*--- compressible conditions ---*/
         case INLET_TYPE::TOTAL_CONDITIONS:
-          columnName << "TEMPERATURE" << setw(24) << "PRESSURE   " << setw(24);
+          columnName << left << setw(24) << "TOTAL_TEMPERATURE" << left << setw(24) << "TOTAL_PRESSURE";
           break;
         case INLET_TYPE::MASS_FLOW:
-          columnName << "DENSITY    " << setw(24) << "VELOCITY   " << setw(24);
+          columnName << left << setw(24) << "DENSITY" << left << setw(24) << "VELOCITY";
           break;
         default:
           SU2_MPI::Error("Unsupported INLET_TYPE.", CURRENT_FUNCTION);
@@ -3660,30 +3664,36 @@ void CSolver::LoadInletProfile(CGeometry **geometry,
       switch (config->GetKind_Inc_Inlet(Marker_Tag)) {
         /*--- incompressible conditions ---*/
         case INLET_TYPE::VELOCITY_INLET:
-          columnName << "TEMPERATURE" << setw(24) << "VELOCITY   " << setw(24);
+          columnName << left << setw(24) << "TEMPERATURE " << left << setw(24) << "VELOCITY";
           break;
         case INLET_TYPE::PRESSURE_INLET:
-          columnName << "TEMPERATURE" << setw(24) << "PRESSURE   " << setw(24);
+          columnName << left << setw(24) << "TEMPERATURE" << left << setw(24) << "PRESSURE";
           break;
         default:
           SU2_MPI::Error("Unsupported INC_INLET_TYPE.", CURRENT_FUNCTION);
           break;
       }
     }
 
-    columnName << "NORMAL-X   " << setw(24) << "NORMAL-Y   " << setw(24);
-    if(nDim==3)  columnName << "NORMAL-Z   " << setw(24);
+    if (KIND_MARKER == SUPERSONIC_INLET) {
+      columnName << left << setw(24) << "VELOCITY-X" << left << setw(24) << "VELOCITY-Y";
+      if (nDim == 3) columnName << left << setw(24) << "VELOCITY-Z";
+    } else {
+      columnName << left << setw(24) << "NORMAL-X" << left << setw(24) << "NORMAL-Y";
+      if (nDim == 3) columnName << left << setw(24) << "NORMAL-Z";
+    }
 
     switch (TurbModelFamily(config->GetKind_Turb_Model())) {
-      case TURB_FAMILY::NONE: break;
+      case TURB_FAMILY::NONE:
+        break;
       case TURB_FAMILY::SA:
         /*--- 1-equation turbulence model: SA ---*/
-        columnName << "NU_TILDE   " << setw(24);
+        columnName << left << setw(24) << "NU_TILDE";
         columnValue << config->GetNuFactor_FreeStream() * config->GetViscosity_FreeStream() / config->GetDensity_FreeStream() <<"\t";
         break;
       case TURB_FAMILY::KW:
         /*--- 2-equation turbulence model (SST) ---*/
-        columnName << "TKE        " << setw(24) << "DISSIPATION" << setw(24);
+        columnName << left << setw(24) << "TKE" << left << setw(24) << "DISSIPATION";
         columnValue << config->GetTke_FreeStream() << "\t" << config->GetOmega_FreeStream() <<"\t";
         break;
     }
@@ -3692,22 +3702,22 @@ void CSolver::LoadInletProfile(CGeometry **geometry,
       case SPECIES_MODEL::NONE: break;
       case SPECIES_MODEL::SPECIES_TRANSPORT:
         for (unsigned short iVar = 0; iVar < nVar_Species; iVar++) {
-          columnName << "SPECIES_" + std::to_string(iVar) + "  " << setw(24);
+          columnName << left << setw(24) << "SPECIES_" + std::to_string(iVar);
           columnValue << config->GetInlet_SpeciesVal(Marker_Tag)[iVar] << "\t";
         }
         break;
       case SPECIES_MODEL::FLAMELET: {
         const auto& flamelet_config_options = config->GetFlameletParsedOptions();
         /*--- 2-equation flamelet model ---*/
-        columnName << "PROGRESSVAR" << setw(24) << "ENTHALPYTOT" << setw(24);
-        columnValue << config->GetInlet_SpeciesVal(Marker_Tag)[0] << "\t" <<  config->GetInlet_SpeciesVal(Marker_Tag)[1]<<"\t";
+        columnName << left << setw(24) << "PROGRESSVAR" << left << setw(24) << "ENTHALPYTOT";
+        columnValue << config->GetInlet_SpeciesVal(Marker_Tag)[0] << "\t" << config->GetInlet_SpeciesVal(Marker_Tag)[1] <<"\t";
         /*--- auxiliary species transport equations ---*/
         for (unsigned short iReactant = 0; iReactant < flamelet_config_options.n_user_scalars; iReactant++) {
-          columnName << flamelet_config_options.user_scalar_names[iReactant] << setw(24);
+          columnName << left << setw(24) << flamelet_config_options.user_scalar_names[iReactant];
           columnValue << config->GetInlet_SpeciesVal(Marker_Tag)[flamelet_config_options.n_control_vars + iReactant] << "\t";
         }
-        }
         break;
+      }
     }
 
     columnNames.push_back(columnName.str());
diff --git a/meson.build b/meson.build
@@ -1,5 +1,5 @@
 
-project('SU2', 'c', 'cpp', 'cuda',
+project('SU2', 'c', 'cpp',
         version: '8.2.0 "Harrier"',
         meson_version: '>=0.61.1',
         license: 'LGPL2',
@@ -16,8 +16,10 @@ endif
 pymod = import('python')
 python = pymod.find_installation()
 
-add_global_arguments(['-arch=sm_86'], language : 'cuda')
-
+if get_option('enable-cuda')
+  add_languages('cuda')
+  add_global_arguments('-arch=sm_86', language : 'cuda')
+endif
 
 su2_cpp_args = []
 su2_deps     = [declare_dependency(include_directories: 'externals/CLI11')]
@@ -200,9 +202,10 @@ if get_option('enable-pastix')
   su2_deps += pastix_dep
 endif
 
+# CUDA dependencies
 if get_option('enable-cuda')
   su2_cpp_args += '-DHAVE_CUDA'
-  gpu_dep = dependency('cuda', version : '>=10', modules : ['cudart', 'nvblas'])
+  gpu_dep = dependency('cuda', version : '>=10', modules : ['cudart'])
   su2_deps += gpu_dep
 endif