Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ Paul Zhang
Pedro Gomes
Peng Yan
Pete Bachant
Pratyksh Gupta
RaulFeijo55
Ruben Sanchez
Ryan Barrett
Expand Down
130 changes: 101 additions & 29 deletions SU2_CFD/src/output/CFlowOutput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "../../include/output/CFlowOutput.hpp"

#include "../../../Common/include/geometry/CGeometry.hpp"
#include "../../../Common/include/adt/CADTPointsOnlyClass.hpp"
#include "../../../Common/include/toolboxes/geometry_toolbox.hpp"
#include "../../include/solvers/CSolver.hpp"
#include "../../include/variables/CPrimitiveIndices.hpp"
Expand Down Expand Up @@ -818,6 +819,54 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
const bool adjoint = config->GetDiscrete_Adjoint();
const bool axisymmetric = config->GetAxisymmetric();
const auto* flowNodes = su2staticcast_p<const CFlowVariable*>(solver[FLOW_SOL]->GetNodes());
auto GetPointValue = [&](const auto& output, unsigned long iPoint) {
return [&, iPoint](unsigned long i) {
if (i < CustomOutput::NOT_A_VARIABLE) {
const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER;
const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER;
if (solIdx == FLOW_SOL) {
return flowNodes->GetPrimitive(iPoint, varIdx);
}
return solver[solIdx]->GetNodes()->GetSolution(iPoint, varIdx);
} else {
return *output.otherOutputs[i - CustomOutput::NOT_A_VARIABLE];
}
};
};

/*--- Count probes that need processing and use heuristic to decide ADT vs linear search.
ADT overhead is only worth it for larger numbers of probes. ---*/
unsigned long nProbes = 0;
for (const auto& output : customOutputs) {
if (!output.skip && output.type == OperationType::PROBE && output.varIndices.empty()) {
++nProbes;
}
}

/*--- Heuristic: Build ADT if we have more than 10 probes. For small numbers of probes,
the overhead of building the ADT may not be worth it compared to linear search.
Note: If this threshold is increased, the regression test (probe_performance_11)
must be updated to ensure the ADT path is still tested. ---*/
const unsigned long ADT_THRESHOLD = 10;
const bool useADT = (nProbes > ADT_THRESHOLD);

/*--- Build ADT for probe nearest neighbor search if heuristic suggests it. ---*/
std::unique_ptr<CADTPointsOnlyClass> probeADT;
if (useADT) {
const unsigned long nPointDomain = geometry->GetnPointDomain();
vector<su2double> coords(nDim * nPointDomain);
vector<unsigned long> pointIDs(nPointDomain);

for (unsigned long iPoint = 0; iPoint < nPointDomain; ++iPoint) {
pointIDs[iPoint] = iPoint;
for (unsigned short iDim = 0; iDim < nDim; ++iDim) {
coords[iPoint * nDim + iDim] = geometry->nodes->GetCoord(iPoint, iDim);
}
}

/*--- Build global ADT to find nearest nodes across all ranks. ---*/
probeADT = std::make_unique<CADTPointsOnlyClass>(nDim, nPointDomain, coords.data(), pointIDs.data(), true);
}

for (auto& output : customOutputs) {
if (output.skip) continue;
Expand Down Expand Up @@ -849,19 +898,34 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
}
su2double coord[3] = {};
for (auto iDim = 0u; iDim < nDim; ++iDim) coord[iDim] = std::stod(output.markers[iDim]);
/*--- Use ADT for efficient nearest neighbor search instead of brute force. ---*/
su2double minDist = std::numeric_limits<su2double>::max();
unsigned long minPoint = 0;
for (auto iPoint = 0ul; iPoint < geometry->GetnPointDomain(); ++iPoint) {
const su2double dist = GeometryToolbox::SquaredDistance(nDim, coord, geometry->nodes->GetCoord(iPoint));
if (dist < minDist) {
minDist = dist;
minPoint = iPoint;
int rankID = -1;
int rank;
SU2_MPI::Comm_rank(SU2_MPI::GetComm(), &rank);

if (useADT && probeADT && !probeADT->IsEmpty()) {
/*--- Use ADT to find the nearest node efficiently (O(log n) instead of O(n)). ---*/
probeADT->DetermineNearestNode(coord, minDist, minPoint, rankID);
minDist = pow(minDist, 2);

/*--- Check if this rank owns the nearest point. ---*/
output.iPoint = (rankID == rank) ? minPoint : CustomOutput::PROBE_NOT_OWNED;
} else {
/*--- Use linear search for small numbers of probes or when ADT is not available. ---*/
for (auto iPoint = 0ul; iPoint < geometry->GetnPointDomain(); ++iPoint) {
const su2double dist = GeometryToolbox::SquaredDistance(nDim, coord, geometry->nodes->GetCoord(iPoint));
if (dist < minDist) {
minDist = dist;
minPoint = iPoint;
}
}
/*--- Decide which rank owns the probe using Allreduce. ---*/
su2double globMinDist;
SU2_MPI::Allreduce(&minDist, &globMinDist, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
output.iPoint = fabs(minDist - globMinDist) < EPS ? minPoint : CustomOutput::PROBE_NOT_OWNED;
}
/*--- Decide which rank owns the probe. ---*/
su2double globMinDist;
SU2_MPI::Allreduce(&minDist, &globMinDist, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
output.iPoint = fabs(minDist - globMinDist) < EPS ? minPoint : CustomOutput::PROBE_NOT_OWNED;
if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
std::cout << "Probe " << output.name << " is using global point "
<< geometry->nodes->GetGlobalIndex(output.iPoint)
Expand All @@ -883,29 +947,11 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
* (see ConvertVariableSymbolsToIndices). ---*/

auto MakeFunctor = [&](unsigned long iPoint) {
/*--- This returns another lambda that captures iPoint by value. ---*/
return [&, iPoint](unsigned long i) {
if (i < CustomOutput::NOT_A_VARIABLE) {
const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER;
const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER;
if (solIdx == FLOW_SOL) {
return flowNodes->GetPrimitive(iPoint, varIdx);
}
return solver[solIdx]->GetNodes()->GetSolution(iPoint, varIdx);
} else {
return *output.otherOutputs[i - CustomOutput::NOT_A_VARIABLE];
}
};
return GetPointValue(output, iPoint);
};

if (output.type == OperationType::PROBE) {
su2double value = std::numeric_limits<su2double>::max();
if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
value = output.Eval(MakeFunctor(output.iPoint));
}
su2double tmp = value;
SU2_MPI::Allreduce(&tmp, &value, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
SetHistoryOutputValue(output.name, value);
/*--- Probe evaluation will be done after all outputs are processed, with batched AllReduce. ---*/
continue;
}

Expand Down Expand Up @@ -954,6 +1000,32 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
}
SetHistoryOutputValue(output.name, integral[0]);
}

/*--- Batch AllReduce for all probe values to reduce MPI communication overhead. ---*/
if (nProbes > 0) {
/*--- Evaluate all probe values locally first. ---*/
vector<su2double> probeValues(nProbes);
unsigned long iProbe = 0;
for (auto& output : customOutputs) {
if (output.skip || output.type != OperationType::PROBE) continue;
su2double value = std::numeric_limits<su2double>::max();
if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
value = output.Eval(GetPointValue(output, output.iPoint));
}
probeValues[iProbe++] = value;
}

/*--- Single AllReduce for all probe values. ---*/
vector<su2double> probeValuesGlobal(nProbes);
SU2_MPI::Allreduce(probeValues.data(), probeValuesGlobal.data(), nProbes, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());

/*--- Set history output values for all probes. ---*/
iProbe = 0;
for (auto& output : customOutputs) {
if (output.skip || output.type != OperationType::PROBE) continue;
SetHistoryOutputValue(output.name, probeValuesGlobal[iProbe++]);
}
}
}

// The "AddHistoryOutput(" must not be split over multiple lines to ensure proper python parsing
Expand Down
9 changes: 9 additions & 0 deletions TestCases/parallel_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,15 @@ def main():
flatplate_udobj.test_vals = [-6.760101, -1.283906, -0.745653, 0.000587, -0.000038, 0.000977, -0.001015, 596.450000, 299.550000, 296.900000, 21.318000, 0.586640, 36.553000, 2.188800]
test_list.append(flatplate_udobj)

# Probe performance test (11 probes, ADT path)
probe_performance_11 = TestCase('probe_performance_11')
probe_performance_11.cfg_dir = "user_defined_functions"
probe_performance_11.cfg_file = "test_11_probes.cfg"
probe_performance_11.test_iter = 4
probe_performance_11.test_vals = [-6.285098, 1.0125e+05, 1.0132e+05, 9.9411e+04] # RMS_DENSITY, probe1, probe6, probe11
# Tolerances are typically 0.001 in TestCase.py
test_list.append(probe_performance_11)

# Laminar cylinder (steady)
cylinder = TestCase('cylinder')
cylinder.cfg_dir = "navierstokes/cylinder"
Expand Down
62 changes: 62 additions & 0 deletions TestCases/user_defined_functions/test_11_probes.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Test case: 11 probes (ADT path, >10)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

SOLVER= NAVIER_STOKES
KIND_TURB_MODEL= NONE
RESTART_SOL= NO

CUSTOM_OUTPUTS= 'probe1 : Probe{PRESSURE}[0.001000, 0.001000, 0.010000]; probe2 : Probe{PRESSURE}[0.001700, 0.001700, 0.018000]; probe3 : Probe{PRESSURE}[0.002400, 0.002400, 0.026000]; probe4 : Probe{PRESSURE}[0.003100, 0.003100, 0.034000]; probe5 : Probe{PRESSURE}[0.003800, 0.003800, 0.042000]; probe6 : Probe{PRESSURE}[0.004500, 0.004500, 0.050000]; probe7 : Probe{PRESSURE}[0.005200, 0.005200, 0.058000]; probe8 : Probe{PRESSURE}[0.005900, 0.005900, 0.066000]; probe9 : Probe{PRESSURE}[0.006600, 0.006600, 0.074000]; probe10 : Probe{PRESSURE}[0.007300, 0.007300, 0.082000]; probe11 : Probe{PRESSURE}[0.008000, 0.008000, 0.090000]'

SCREEN_OUTPUT= INNER_ITER, RMS_DENSITY, probe1, probe6, probe11
HISTORY_OUTPUT = ITER, CUSTOM

MACH_NUMBER= 0.1
INIT_OPTION= TD_CONDITIONS
FREESTREAM_OPTION= TEMPERATURE_FS
FREESTREAM_TEMPERATURE= 297.62
REYNOLDS_NUMBER= 600
REYNOLDS_LENGTH= 0.02

REF_ORIGIN_MOMENT_X = 0.00
REF_ORIGIN_MOMENT_Y = 0.00
REF_ORIGIN_MOMENT_Z = 0.00
REF_LENGTH= 0.02
REF_AREA= 0.02

FLUID_MODEL= IDEAL_GAS
GAMMA_VALUE= 1.4
GAS_CONSTANT= 287.87
VISCOSITY_MODEL= CONSTANT_VISCOSITY
MU_CONSTANT= 0.001

MARKER_HEATFLUX= ( y_minus, 0.0 )
MARKER_SYM= ( y_plus )
MARKER_PERIODIC= ( x_minus, x_plus, 0,0,0, 0,0,0, 0.01,0,0 )
MARKER_INLET= ( z_minus, 300.0, 100000.0, 0.0, 0.0, 1.0 )
MARKER_OUTLET= ( z_plus, 99000.0 )
MARKER_PLOTTING= ( y_minus )
MARKER_MONITORING= ( y_minus )
MARKER_ANALYZE= ( z_minus, z_plus )

NUM_METHOD_GRAD= GREEN_GAUSS
CFL_NUMBER= 1e4
CFL_ADAPT= NO
TIME_DISCRE_FLOW= EULER_IMPLICIT

LINEAR_SOLVER= FGMRES
LINEAR_SOLVER_PREC= ILU
LINEAR_SOLVER_ERROR= 0.2
LINEAR_SOLVER_ITER= 5

CONV_NUM_METHOD_FLOW= ROE
MUSCL_FLOW= YES
SLOPE_LIMITER_FLOW= NONE

CONV_RESIDUAL_MINVAL= -11
CONV_STARTITER= 0
INNER_ITER= 5

MESH_FORMAT= BOX
MESH_BOX_LENGTH= (0.01, 0.01, 0.1)
MESH_BOX_SIZE= (9, 17, 65)