Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ Paul Zhang
Pedro Gomes
Peng Yan
Pete Bachant
Pratyksh Gupta
RaulFeijo55
Ruben Sanchez
Ryan Barrett
Expand Down
129 changes: 100 additions & 29 deletions SU2_CFD/src/output/CFlowOutput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "../../include/output/CFlowOutput.hpp"

#include "../../../Common/include/geometry/CGeometry.hpp"
#include "../../../Common/include/adt/CADTPointsOnlyClass.hpp"
#include "../../../Common/include/toolboxes/geometry_toolbox.hpp"
#include "../../include/solvers/CSolver.hpp"
#include "../../include/variables/CPrimitiveIndices.hpp"
Expand Down Expand Up @@ -818,6 +819,54 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
const bool adjoint = config->GetDiscrete_Adjoint();
const bool axisymmetric = config->GetAxisymmetric();
const auto* flowNodes = su2staticcast_p<const CFlowVariable*>(solver[FLOW_SOL]->GetNodes());
auto GetPointValue = [&](const auto& output, unsigned long iPoint) {
return [&, iPoint](unsigned long i) {
if (i < CustomOutput::NOT_A_VARIABLE) {
const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER;
const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER;
if (solIdx == FLOW_SOL) {
return flowNodes->GetPrimitive(iPoint, varIdx);
}
return solver[solIdx]->GetNodes()->GetSolution(iPoint, varIdx);
} else {
return *output.otherOutputs[i - CustomOutput::NOT_A_VARIABLE];
}
};
};

/*--- Count probes that need processing and use heuristic to decide ADT vs linear search.
ADT overhead is only worth it for larger numbers of probes. ---*/
unsigned long nProbes = 0;
for (const auto& output : customOutputs) {
if (!output.skip && output.type == OperationType::PROBE && output.varIndices.empty()) {
++nProbes;
}
}

/*--- Heuristic: Build ADT if we have more than 10 probes. For small numbers of probes,
the overhead of building the ADT may not be worth it compared to linear search.
Note: If this threshold is increased, the regression test (probe_performance_11)
must be updated to ensure the ADT path is still tested. ---*/
const unsigned long ADT_THRESHOLD = 10;
const bool useADT = (nProbes > ADT_THRESHOLD);

/*--- Build ADT for probe nearest neighbor search if heuristic suggests it. ---*/
std::unique_ptr<CADTPointsOnlyClass> probeADT;
if (useADT) {
const unsigned long nPointDomain = geometry->GetnPointDomain();
vector<su2double> coords(nDim * nPointDomain);
vector<unsigned long> pointIDs(nPointDomain);

for (unsigned long iPoint = 0; iPoint < nPointDomain; ++iPoint) {
pointIDs[iPoint] = iPoint;
for (unsigned short iDim = 0; iDim < nDim; ++iDim) {
coords[iPoint * nDim + iDim] = geometry->nodes->GetCoord(iPoint, iDim);
}
}

/*--- Build global ADT to find nearest nodes across all ranks. ---*/
probeADT = std::make_unique<CADTPointsOnlyClass>(nDim, nPointDomain, coords.data(), pointIDs.data(), true);
}

for (auto& output : customOutputs) {
if (output.skip) continue;
Expand Down Expand Up @@ -849,19 +898,33 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
}
su2double coord[3] = {};
for (auto iDim = 0u; iDim < nDim; ++iDim) coord[iDim] = std::stod(output.markers[iDim]);
/*--- Use ADT for efficient nearest neighbor search instead of brute force. ---*/
su2double minDist = std::numeric_limits<su2double>::max();
unsigned long minPoint = 0;
for (auto iPoint = 0ul; iPoint < geometry->GetnPointDomain(); ++iPoint) {
const su2double dist = GeometryToolbox::SquaredDistance(nDim, coord, geometry->nodes->GetCoord(iPoint));
if (dist < minDist) {
minDist = dist;
minPoint = iPoint;
int rankID = -1;
int rank;
SU2_MPI::Comm_rank(SU2_MPI::GetComm(), &rank);

if (useADT && probeADT && !probeADT->IsEmpty()) {
/*--- Use ADT to find the nearest node efficiently (O(log n) instead of O(n)). ---*/
probeADT->DetermineNearestNode(coord, minDist, minPoint, rankID);

/*--- Check if this rank owns the nearest point. ---*/
output.iPoint = (rankID == rank) ? minPoint : CustomOutput::PROBE_NOT_OWNED;
} else {
/*--- Use linear search for small numbers of probes or when ADT is not available. ---*/
for (auto iPoint = 0ul; iPoint < geometry->GetnPointDomain(); ++iPoint) {
const su2double dist = GeometryToolbox::SquaredDistance(nDim, coord, geometry->nodes->GetCoord(iPoint));
if (dist < minDist) {
minDist = dist;
minPoint = iPoint;
}
}
/*--- Decide which rank owns the probe using Allreduce. ---*/
su2double globMinDist;
SU2_MPI::Allreduce(&minDist, &globMinDist, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
output.iPoint = fabs(minDist - globMinDist) < EPS ? minPoint : CustomOutput::PROBE_NOT_OWNED;
}
/*--- Decide which rank owns the probe. ---*/
su2double globMinDist;
SU2_MPI::Allreduce(&minDist, &globMinDist, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
output.iPoint = fabs(minDist - globMinDist) < EPS ? minPoint : CustomOutput::PROBE_NOT_OWNED;
if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
std::cout << "Probe " << output.name << " is using global point "
<< geometry->nodes->GetGlobalIndex(output.iPoint)
Expand All @@ -883,29 +946,11 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
* (see ConvertVariableSymbolsToIndices). ---*/

auto MakeFunctor = [&](unsigned long iPoint) {
/*--- This returns another lambda that captures iPoint by value. ---*/
return [&, iPoint](unsigned long i) {
if (i < CustomOutput::NOT_A_VARIABLE) {
const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER;
const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER;
if (solIdx == FLOW_SOL) {
return flowNodes->GetPrimitive(iPoint, varIdx);
}
return solver[solIdx]->GetNodes()->GetSolution(iPoint, varIdx);
} else {
return *output.otherOutputs[i - CustomOutput::NOT_A_VARIABLE];
}
};
return GetPointValue(output, iPoint);
};

if (output.type == OperationType::PROBE) {
su2double value = std::numeric_limits<su2double>::max();
if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
value = output.Eval(MakeFunctor(output.iPoint));
}
su2double tmp = value;
SU2_MPI::Allreduce(&tmp, &value, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
SetHistoryOutputValue(output.name, value);
/*--- Probe evaluation will be done after all outputs are processed, with batched AllReduce. ---*/
continue;
}

Expand Down Expand Up @@ -954,6 +999,32 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
}
SetHistoryOutputValue(output.name, integral[0]);
}

/*--- Batch AllReduce for all probe values to reduce MPI communication overhead. ---*/
if (nProbes > 0) {
/*--- Evaluate all probe values locally first. ---*/
vector<su2double> probeValues(nProbes);
unsigned long iProbe = 0;
for (auto& output : customOutputs) {
if (output.skip || output.type != OperationType::PROBE) continue;
su2double value = std::numeric_limits<su2double>::max();
if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
value = output.Eval(GetPointValue(output, output.iPoint));
}
probeValues[iProbe++] = value;
}

/*--- Single AllReduce for all probe values. ---*/
vector<su2double> probeValuesGlobal(nProbes);
SU2_MPI::Allreduce(probeValues.data(), probeValuesGlobal.data(), nProbes, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());

/*--- Set history output values for all probes. ---*/
iProbe = 0;
for (auto& output : customOutputs) {
if (output.skip || output.type != OperationType::PROBE) continue;
SetHistoryOutputValue(output.name, probeValuesGlobal[iProbe++]);
}
}
}

// The "AddHistoryOutput(" must not be split over multiple lines to ensure proper python parsing
Expand Down
9 changes: 9 additions & 0 deletions TestCases/parallel_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,15 @@ def main():
flatplate_udobj.test_vals = [-6.760101, -1.283906, -0.745653, 0.000587, -0.000038, 0.000977, -0.001015, 596.450000, 299.550000, 296.900000, 21.318000, 0.586640, 36.553000, 2.188800]
test_list.append(flatplate_udobj)

# Probe performance test (11 probes, ADT path)
probe_performance_11 = TestCase('probe_performance_11')
probe_performance_11.cfg_dir = "user_defined_functions"
probe_performance_11.cfg_file = "test_11_probes.cfg"
probe_performance_11.test_iter = 4
probe_performance_11.test_vals = [-6.300237, 1.0141e+05, 1.0132e+05, 1.0093e+05] # RMS_DENSITY, probe1, probe6, probe11
# Tolerances are typically 0.001 in TestCase.py
test_list.append(probe_performance_11)

# Laminar cylinder (steady)
cylinder = TestCase('cylinder')
cylinder.cfg_dir = "navierstokes/cylinder"
Expand Down
62 changes: 62 additions & 0 deletions TestCases/user_defined_functions/test_11_probes.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Test case: 11 probes (ADT path, >10)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

SOLVER= NAVIER_STOKES
KIND_TURB_MODEL= NONE
RESTART_SOL= NO

CUSTOM_OUTPUTS= 'probe1 : Probe{PRESSURE}[0.001000, 0.001000, 0.010000]; probe2 : Probe{PRESSURE}[0.001700, 0.001700, 0.018000]; probe3 : Probe{PRESSURE}[0.002400, 0.002400, 0.026000]; probe4 : Probe{PRESSURE}[0.003100, 0.003100, 0.034000]; probe5 : Probe{PRESSURE}[0.003800, 0.003800, 0.042000]; probe6 : Probe{PRESSURE}[0.004500, 0.004500, 0.050000]; probe7 : Probe{PRESSURE}[0.005200, 0.005200, 0.058000]; probe8 : Probe{PRESSURE}[0.005900, 0.005900, 0.066000]; probe9 : Probe{PRESSURE}[0.006600, 0.006600, 0.074000]; probe10 : Probe{PRESSURE}[0.007300, 0.007300, 0.082000]; probe11 : Probe{PRESSURE}[0.008000, 0.008000, 0.090000]'

SCREEN_OUTPUT= INNER_ITER, RMS_DENSITY, probe1, probe6, probe11
HISTORY_OUTPUT = ITER, CUSTOM

MACH_NUMBER= 0.1
INIT_OPTION= TD_CONDITIONS
FREESTREAM_OPTION= TEMPERATURE_FS
FREESTREAM_TEMPERATURE= 297.62
REYNOLDS_NUMBER= 600
REYNOLDS_LENGTH= 0.02

REF_ORIGIN_MOMENT_X = 0.00
REF_ORIGIN_MOMENT_Y = 0.00
REF_ORIGIN_MOMENT_Z = 0.00
REF_LENGTH= 0.02
REF_AREA= 0.02

FLUID_MODEL= IDEAL_GAS
GAMMA_VALUE= 1.4
GAS_CONSTANT= 287.87
VISCOSITY_MODEL= CONSTANT_VISCOSITY
MU_CONSTANT= 0.001

MARKER_HEATFLUX= ( y_minus, 0.0 )
MARKER_SYM= ( y_plus )
MARKER_PERIODIC= ( x_minus, x_plus, 0,0,0, 0,0,0, 0.01,0,0 )
MARKER_INLET= ( z_minus, 300.0, 100000.0, 0.0, 0.0, 1.0 )
MARKER_OUTLET= ( z_plus, 99000.0 )
MARKER_PLOTTING= ( y_minus )
MARKER_MONITORING= ( y_minus )
MARKER_ANALYZE= ( z_minus, z_plus )

NUM_METHOD_GRAD= GREEN_GAUSS
CFL_NUMBER= 1e4
CFL_ADAPT= NO
TIME_DISCRE_FLOW= EULER_IMPLICIT

LINEAR_SOLVER= FGMRES
LINEAR_SOLVER_PREC= ILU
LINEAR_SOLVER_ERROR= 0.2
LINEAR_SOLVER_ITER= 5

CONV_NUM_METHOD_FLOW= ROE
MUSCL_FLOW= YES
SLOPE_LIMITER_FLOW= NONE

CONV_RESIDUAL_MINVAL= -11
CONV_STARTITER= 0
INNER_ITER= 5

MESH_FORMAT= BOX
MESH_BOX_LENGTH= (0.01, 0.01, 0.1)
MESH_BOX_SIZE= (9, 17, 65)