Skip to content

Commit 2468017

Browse files
committed
some more MG MPI fixes
1 parent 1945bdd commit 2468017

File tree

7 files changed

+307
-17
lines changed

7 files changed

+307
-17
lines changed

Common/include/CConfig.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,7 @@ class CConfig {
503503
bool MG_Smooth_Output; /*!< \brief Output per-iteration multigrid smoothing info. */
504504
bool MG_Implicit_Lines; /*!< \\brief Enable implicit-lines agglomeration from walls. */
505505
bool MG_Implicit_Debug; /*!< \brief Enable debug output for implicit-lines agglomeration. */
506+
bool MG_DebugHaloCoordinates; /*!< \brief Enable halo CV coordinate validation for multigrid. */
506507
su2double MG_Smooth_Coeff; /*!< \brief Smoothing coefficient for multigrid correction smoothing. */
507508
su2double *LocationStations; /*!< \brief Airfoil sections in wing slicing subroutine. */
508509

@@ -3875,6 +3876,12 @@ class CConfig {
38753876
*/
38763877
bool GetMG_Implicit_Debug() const { return MG_Implicit_Debug; }
38773878

3879+
/*!\
3880+
* \brief Get whether halo CV coordinate validation is enabled for multigrid.
3881+
* \return True if halo coordinate validation is enabled.
3882+
*/
3883+
bool GetMG_DebugHaloCoordinates() const { return MG_DebugHaloCoordinates; }
3884+
38783885
/*!\
38793886
* \brief Get the minimum mesh size threshold used to compute effective MG levels.
38803887
* \return Minimum mesh size per coarsest level.

Common/include/geometry/CMultiGridGeometry.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,4 +181,11 @@ class CMultiGridGeometry final : public CGeometry {
181181
* \param[in] val_marker - Index of the boundary marker.
182182
*/
183183
void SetMultiGridWallTemperature(const CGeometry* fine_grid, unsigned short val_marker) override;
184+
185+
/*!
186+
* \brief Validate that halo CV coordinates match corresponding domain CVs on remote ranks (debug feature).
187+
* \param[in] config - Definition of the particular problem.
188+
* \param[in] iMesh - Multigrid level for reporting.
189+
*/
190+
void ValidateHaloCoordinates(const CConfig* config, unsigned short iMesh) const;
184191
};

Common/src/CConfig.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,6 +1969,9 @@ void CConfig::SetConfig_Options() {
19691969
/*!\brief MG_IMPLICIT_DEBUG\n DESCRIPTION: Enable debug output for implicit-lines agglomeration. DEFAULT: NO \ingroup Config*/
19701970
addBoolOption("MG_IMPLICIT_DEBUG", MG_Implicit_Debug, false);
19711971

1972+
/*!\brief MG_DEBUG_HALO_COORDINATES\n DESCRIPTION: Enable halo CV coordinate validation for multigrid (expensive MPI check). DEFAULT: NO \ingroup Config*/
1973+
addBoolOption("MG_DEBUG_HALO_COORDINATES", MG_DebugHaloCoordinates, false);
1974+
19721975
/*!\brief MG_MIN_MESHSIZE
19731976
\ DESCRIPTION: Minimum global mesh size (points) to allow another multigrid level. DEFAULT: 1000 \ingroup Config*/
19741977
addUnsignedLongOption("MG_MIN_MESHSIZE", MG_Min_MeshSize, 1000);

Common/src/geometry/CMultiGridGeometry.cpp

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,107 @@ CMultiGridGeometry::CMultiGridGeometry(CGeometry* fine_grid, CConfig* config, un
758758
<< " orphaned halo coarse CVs found - this indicates a logic error!" << endl;
759759
}
760760
}
761+
762+
/*--- Debug validation: Verify halo CV coordinates match domain CVs on remote ranks ---*/
763+
/*--- Note: This validation is deferred until after SetVertex() is called, as vertices ---*/
764+
/*--- are not yet initialized at the end of the constructor. The validation is performed ---*/
765+
/*--- by calling ValidateHaloCoordinates() after SetVertex() in the driver. ---*/
766+
767+
/*--- For now, we skip this validation in the constructor to avoid segfaults. ---*/
768+
/*--- TODO: Move this to a separate validation function called after SetVertex(). ---*/
769+
770+
#if 0 // Disabled - causes segfault as vertex array not yet initialized
771+
if (size > SINGLE_NODE && config->GetMG_DebugHaloCoordinates()) {
772+
773+
if (rank == MASTER_NODE) {
774+
cout << "\n--- MG Halo Coordinate Validation (Level " << iMesh << ") ---" << endl;
775+
}
776+
777+
/*--- For each SEND_RECEIVE marker pair, exchange coordinates and validate ---*/
778+
for (auto iMarker = 0u; iMarker < config->GetnMarker_All(); iMarker++) {
779+
if ((config->GetMarker_All_KindBC(iMarker) == SEND_RECEIVE) && (config->GetMarker_All_SendRecv(iMarker) > 0)) {
780+
const auto MarkerS = iMarker;
781+
const auto MarkerR = iMarker + 1;
782+
783+
const auto send_to = config->GetMarker_All_SendRecv(MarkerS) - 1;
784+
const auto receive_from = abs(config->GetMarker_All_SendRecv(MarkerR)) - 1;
785+
786+
const auto nVertexS = nVertex[MarkerS];
787+
const auto nVertexR = nVertex[MarkerR];
788+
789+
/*--- Allocate buffers for coordinate exchange ---*/
790+
vector<su2double> Buffer_Send_Coord(nVertexS * nDim);
791+
vector<su2double> Buffer_Receive_Coord(nVertexR * nDim);
792+
793+
/*--- Pack SEND coordinates (domain CVs being sent) ---*/
794+
for (auto iVertex = 0ul; iVertex < nVertexS; iVertex++) {
795+
const auto iPoint = vertex[MarkerS][iVertex]->GetNode();
796+
const auto* Coord = nodes->GetCoord(iPoint);
797+
for (auto iDim = 0u; iDim < nDim; iDim++) {
798+
Buffer_Send_Coord[iVertex * nDim + iDim] = Coord[iDim];
799+
}
800+
}
801+
802+
/*--- Exchange coordinates ---*/
803+
SU2_MPI::Sendrecv(Buffer_Send_Coord.data(), nVertexS * nDim, MPI_DOUBLE, send_to, 0,
804+
Buffer_Receive_Coord.data(), nVertexR * nDim, MPI_DOUBLE, receive_from, 0,
805+
SU2_MPI::GetComm(), MPI_STATUS_IGNORE);
806+
807+
/*--- Validate RECEIVE coordinates against local halo CVs ---*/
808+
unsigned long nMismatch = 0;
809+
su2double maxError = 0.0;
810+
su2double tolerance = 1e-10;
811+
812+
for (auto iVertex = 0ul; iVertex < nVertexR; iVertex++) {
813+
const auto iPoint = vertex[MarkerR][iVertex]->GetNode();
814+
const auto* Coord_Local = nodes->GetCoord(iPoint);
815+
816+
su2double error = 0.0;
817+
for (auto iDim = 0u; iDim < nDim; iDim++) {
818+
su2double coord_remote = Buffer_Receive_Coord[iVertex * nDim + iDim];
819+
su2double diff = fabs(Coord_Local[iDim] - coord_remote);
820+
error += diff * diff;
821+
}
822+
error = sqrt(error);
823+
824+
if (error > tolerance) {
825+
nMismatch++;
826+
maxError = max(maxError, error);
827+
828+
if (nMismatch <= 5) { // Only print first 5 mismatches
829+
cout << "COORD MISMATCH [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
830+
<< ", Vertex " << iVertex << ", Point " << iPoint << "]: ";
831+
cout << "Local=(";
832+
for (auto iDim = 0u; iDim < nDim; iDim++) {
833+
cout << Coord_Local[iDim];
834+
if (iDim < nDim - 1) cout << ", ";
835+
}
836+
cout << "), Remote=(";
837+
for (auto iDim = 0u; iDim < nDim; iDim++) {
838+
cout << Buffer_Receive_Coord[iVertex * nDim + iDim];
839+
if (iDim < nDim - 1) cout << ", ";
840+
}
841+
cout << "), Error=" << error << endl;
842+
}
843+
}
844+
}
845+
846+
if (nMismatch > 0) {
847+
cout << "WARNING [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
848+
<< "]: " << nMismatch << " coordinate mismatches detected (max error: "
849+
<< maxError << ")" << endl;
850+
} else if (nVertexR > 0) {
851+
cout << "INFO [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
852+
<< "]: All " << nVertexR << " halo CV coordinates match (tol=" << tolerance << ")" << endl;
853+
}
854+
}
855+
}
856+
857+
if (rank == MASTER_NODE) {
858+
cout << "--- End MG Halo Coordinate Validation ---\n" << endl;
859+
}
860+
}
861+
#endif // Disabled validation code
761862
#endif // HAVE_MPI
762863

763864
/*--- Update the number of points after the MPI agglomeration ---*/
@@ -1835,3 +1936,104 @@ su2double CMultiGridGeometry::ComputeLocalCurvature(const CGeometry* fine_grid,
18351936

18361937
return max_angle;
18371938
}
1939+
1940+
void CMultiGridGeometry::ValidateHaloCoordinates(const CConfig* config, unsigned short iMesh) const {
1941+
#ifdef HAVE_MPI
1942+
1943+
int size = SU2_MPI::GetSize();
1944+
int rank = SU2_MPI::GetRank();
1945+
1946+
if (size == SINGLE_NODE || !config->GetMG_DebugHaloCoordinates()) {
1947+
return; // Skip if single-node or debug option disabled
1948+
}
1949+
1950+
if (rank == MASTER_NODE) {
1951+
cout << "\n--- MG Halo Coordinate Validation (Level " << iMesh << ") ---" << endl;
1952+
}
1953+
1954+
/*--- For each SEND_RECEIVE marker pair, exchange coordinates and validate ---*/
1955+
for (auto iMarker = 0u; iMarker < config->GetnMarker_All(); iMarker++) {
1956+
if ((config->GetMarker_All_KindBC(iMarker) == SEND_RECEIVE) && (config->GetMarker_All_SendRecv(iMarker) > 0)) {
1957+
const auto MarkerS = iMarker;
1958+
const auto MarkerR = iMarker + 1;
1959+
1960+
const auto send_to = config->GetMarker_All_SendRecv(MarkerS) - 1;
1961+
const auto receive_from = abs(config->GetMarker_All_SendRecv(MarkerR)) - 1;
1962+
1963+
const auto nVertexS = nVertex[MarkerS];
1964+
const auto nVertexR = nVertex[MarkerR];
1965+
1966+
/*--- Allocate buffers for coordinate exchange ---*/
1967+
vector<su2double> Buffer_Send_Coord(nVertexS * nDim);
1968+
vector<su2double> Buffer_Receive_Coord(nVertexR * nDim);
1969+
1970+
/*--- Pack SEND coordinates (domain CVs being sent) ---*/
1971+
for (auto iVertex = 0ul; iVertex < nVertexS; iVertex++) {
1972+
const auto iPoint = vertex[MarkerS][iVertex]->GetNode();
1973+
const auto* Coord = nodes->GetCoord(iPoint);
1974+
for (auto iDim = 0u; iDim < nDim; iDim++) {
1975+
Buffer_Send_Coord[iVertex * nDim + iDim] = Coord[iDim];
1976+
}
1977+
}
1978+
1979+
/*--- Exchange coordinates ---*/
1980+
SU2_MPI::Sendrecv(Buffer_Send_Coord.data(), nVertexS * nDim, MPI_DOUBLE, send_to, 0,
1981+
Buffer_Receive_Coord.data(), nVertexR * nDim, MPI_DOUBLE, receive_from, 0,
1982+
SU2_MPI::GetComm(), MPI_STATUS_IGNORE);
1983+
1984+
/*--- Validate RECEIVE coordinates against local halo CVs ---*/
1985+
unsigned long nMismatch = 0;
1986+
su2double maxError = 0.0;
1987+
su2double tolerance = 1e-10;
1988+
1989+
for (auto iVertex = 0ul; iVertex < nVertexR; iVertex++) {
1990+
const auto iPoint = vertex[MarkerR][iVertex]->GetNode();
1991+
const auto* Coord_Local = nodes->GetCoord(iPoint);
1992+
1993+
su2double error = 0.0;
1994+
for (auto iDim = 0u; iDim < nDim; iDim++) {
1995+
su2double coord_remote = Buffer_Receive_Coord[iVertex * nDim + iDim];
1996+
su2double diff = fabs(Coord_Local[iDim] - coord_remote);
1997+
error += diff * diff;
1998+
}
1999+
error = sqrt(error);
2000+
2001+
if (error > tolerance) {
2002+
nMismatch++;
2003+
maxError = max(maxError, error);
2004+
2005+
if (nMismatch <= 5) { // Only print first 5 mismatches
2006+
cout << "COORD MISMATCH [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
2007+
<< ", Vertex " << iVertex << ", Point " << iPoint << "]: ";
2008+
cout << "Local=(";
2009+
for (auto iDim = 0u; iDim < nDim; iDim++) {
2010+
cout << Coord_Local[iDim];
2011+
if (iDim < nDim - 1) cout << ", ";
2012+
}
2013+
cout << "), Remote=(";
2014+
for (auto iDim = 0u; iDim < nDim; iDim++) {
2015+
cout << Buffer_Receive_Coord[iVertex * nDim + iDim];
2016+
if (iDim < nDim - 1) cout << ", ";
2017+
}
2018+
cout << "), Error=" << error << endl;
2019+
}
2020+
}
2021+
}
2022+
2023+
if (nMismatch > 0) {
2024+
cout << "WARNING [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
2025+
<< "]: " << nMismatch << " coordinate mismatches detected (max error: "
2026+
<< maxError << ")" << endl;
2027+
} else if (nVertexR > 0) {
2028+
cout << "INFO [Rank " << rank << ", Marker " << MarkerS << "/" << MarkerR
2029+
<< "]: All " << nVertexR << " halo CV coordinates match (tol=" << tolerance << ")" << endl;
2030+
}
2031+
}
2032+
}
2033+
2034+
if (rank == MASTER_NODE) {
2035+
cout << "--- End MG Halo Coordinate Validation ---\n" << endl;
2036+
}
2037+
2038+
#endif // HAVE_MPI
2039+
}

SU2_CFD/include/solvers/CSolver.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1480,7 +1480,9 @@ class CSolver {
14801480
void PerformCFLReductions(CGeometry *geometry, CConfig *config, unsigned short iMesh);
14811481

14821482
void ApplyCFLToCoarseGrid(CGeometry *geometry, CSolver **solver_container,
1483-
CConfig *config, unsigned short iMesh);
1483+
CConfig *config, unsigned short iMesh,
1484+
bool reduceCFL, bool resetCFL, bool canIncrease,
1485+
su2double startingIter);
14841486

14851487
public:
14861488

SU2_CFD/src/drivers/CDriver.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,11 @@ void CDriver::InitializeGeometryFVM(CConfig *config, CGeometry **&geometry) {
881881
geometry[iMGlevel]->SetEdges();
882882
geometry[iMGlevel]->SetVertex(geometry[iMGlevel-1], config);
883883

884+
/*--- Validate halo CV coordinates if debug option enabled ---*/
885+
if (auto* mg_geometry = dynamic_cast<CMultiGridGeometry*>(geometry[iMGlevel])) {
886+
mg_geometry->ValidateHaloCoordinates(config, iMGlevel);
887+
}
888+
884889
/*--- Create the control volume structures ---*/
885890

886891
geometry[iMGlevel]->SetControlVolume(geometry[iMGlevel-1], ALLOCATE);

0 commit comments

Comments
 (0)