diff --git a/AUTHORS.md b/AUTHORS.md index 3d7e7eb8416b..f6580fadb892 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -123,6 +123,7 @@ Paul Zhang Pedro Gomes Peng Yan Pete Bachant +Pratyksh Gupta RaulFeijo55 Ruben Sanchez Ryan Barrett diff --git a/SU2_CFD/src/output/CFlowOutput.cpp b/SU2_CFD/src/output/CFlowOutput.cpp index 696a4c595475..1b1a78d0dc50 100644 --- a/SU2_CFD/src/output/CFlowOutput.cpp +++ b/SU2_CFD/src/output/CFlowOutput.cpp @@ -33,6 +33,7 @@ #include "../../include/output/CFlowOutput.hpp" #include "../../../Common/include/geometry/CGeometry.hpp" +#include "../../../Common/include/adt/CADTPointsOnlyClass.hpp" #include "../../../Common/include/toolboxes/geometry_toolbox.hpp" #include "../../include/solvers/CSolver.hpp" #include "../../include/variables/CPrimitiveIndices.hpp" @@ -818,6 +819,54 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry const bool adjoint = config->GetDiscrete_Adjoint(); const bool axisymmetric = config->GetAxisymmetric(); const auto* flowNodes = su2staticcast_p(solver[FLOW_SOL]->GetNodes()); + auto GetPointValue = [&](const auto& output, unsigned long iPoint) { + return [&, iPoint](unsigned long i) { + if (i < CustomOutput::NOT_A_VARIABLE) { + const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER; + const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER; + if (solIdx == FLOW_SOL) { + return flowNodes->GetPrimitive(iPoint, varIdx); + } + return solver[solIdx]->GetNodes()->GetSolution(iPoint, varIdx); + } else { + return *output.otherOutputs[i - CustomOutput::NOT_A_VARIABLE]; + } + }; + }; + + /*--- Count probes that need processing and use heuristic to decide ADT vs linear search. + ADT overhead is only worth it for larger numbers of probes. ---*/ + unsigned long nProbes = 0; + for (const auto& output : customOutputs) { + if (!output.skip && output.type == OperationType::PROBE) { + ++nProbes; + } + } + + /*--- Heuristic: Build ADT if we have more than 10 probes. For small numbers of probes, + the overhead of building the ADT may not be worth it compared to linear search. + Note: If this threshold is increased, the regression test (probe_performance_11) + must be updated to ensure the ADT path is still tested. ---*/ + const unsigned long ADT_THRESHOLD = 10; + const bool useADT = (nProbes > ADT_THRESHOLD); + + /*--- Build ADT for probe nearest neighbor search if heuristic suggests it. ---*/ + std::unique_ptr probeADT; + if (useADT) { + const unsigned long nPointDomain = geometry->GetnPointDomain(); + vector coords(nDim * nPointDomain); + vector pointIDs(nPointDomain); + + for (unsigned long iPoint = 0; iPoint < nPointDomain; ++iPoint) { + pointIDs[iPoint] = iPoint; + for (unsigned short iDim = 0; iDim < nDim; ++iDim) { + coords[iPoint * nDim + iDim] = geometry->nodes->GetCoord(iPoint, iDim); + } + } + + /*--- Build global ADT to find nearest nodes across all ranks. ---*/ + probeADT = std::make_unique(nDim, nPointDomain, coords.data(), pointIDs.data(), true); + } for (auto& output : customOutputs) { if (output.skip) continue; @@ -849,19 +898,34 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry } su2double coord[3] = {}; for (auto iDim = 0u; iDim < nDim; ++iDim) coord[iDim] = std::stod(output.markers[iDim]); + /*--- Use ADT for efficient nearest neighbor search instead of brute force. ---*/ su2double minDist = std::numeric_limits::max(); unsigned long minPoint = 0; - for (auto iPoint = 0ul; iPoint < geometry->GetnPointDomain(); ++iPoint) { - const su2double dist = GeometryToolbox::SquaredDistance(nDim, coord, geometry->nodes->GetCoord(iPoint)); - if (dist < minDist) { - minDist = dist; - minPoint = iPoint; + int rankID = -1; + int rank; + SU2_MPI::Comm_rank(SU2_MPI::GetComm(), &rank); + + if (useADT && probeADT && !probeADT->IsEmpty()) { + /*--- Use ADT to find the nearest node efficiently (O(log n) instead of O(n)). ---*/ + probeADT->DetermineNearestNode(coord, minDist, minPoint, rankID); + minDist = pow(minDist, 2); + + /*--- Check if this rank owns the nearest point. ---*/ + output.iPoint = (rankID == rank) ? minPoint : CustomOutput::PROBE_NOT_OWNED; + } else { + /*--- Use linear search for small numbers of probes or when ADT is not available. ---*/ + for (auto iPoint = 0ul; iPoint < geometry->GetnPointDomain(); ++iPoint) { + const su2double dist = GeometryToolbox::SquaredDistance(nDim, coord, geometry->nodes->GetCoord(iPoint)); + if (dist < minDist) { + minDist = dist; + minPoint = iPoint; + } } + /*--- Decide which rank owns the probe using Allreduce. ---*/ + su2double globMinDist; + SU2_MPI::Allreduce(&minDist, &globMinDist, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); + output.iPoint = fabs(minDist - globMinDist) < EPS ? minPoint : CustomOutput::PROBE_NOT_OWNED; } - /*--- Decide which rank owns the probe. ---*/ - su2double globMinDist; - SU2_MPI::Allreduce(&minDist, &globMinDist, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); - output.iPoint = fabs(minDist - globMinDist) < EPS ? minPoint : CustomOutput::PROBE_NOT_OWNED; if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) { std::cout << "Probe " << output.name << " is using global point " << geometry->nodes->GetGlobalIndex(output.iPoint) @@ -883,29 +947,11 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry * (see ConvertVariableSymbolsToIndices). ---*/ auto MakeFunctor = [&](unsigned long iPoint) { - /*--- This returns another lambda that captures iPoint by value. ---*/ - return [&, iPoint](unsigned long i) { - if (i < CustomOutput::NOT_A_VARIABLE) { - const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER; - const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER; - if (solIdx == FLOW_SOL) { - return flowNodes->GetPrimitive(iPoint, varIdx); - } - return solver[solIdx]->GetNodes()->GetSolution(iPoint, varIdx); - } else { - return *output.otherOutputs[i - CustomOutput::NOT_A_VARIABLE]; - } - }; + return GetPointValue(output, iPoint); }; if (output.type == OperationType::PROBE) { - su2double value = std::numeric_limits::max(); - if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) { - value = output.Eval(MakeFunctor(output.iPoint)); - } - su2double tmp = value; - SU2_MPI::Allreduce(&tmp, &value, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); - SetHistoryOutputValue(output.name, value); + /*--- Probe evaluation will be done after all outputs are processed, with batched AllReduce. ---*/ continue; } @@ -954,6 +1000,33 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry } SetHistoryOutputValue(output.name, integral[0]); } + + /*--- Batch AllReduce for all probe values to reduce MPI communication overhead. ---*/ + if (nProbes > 0) { + /*--- Evaluate all probe values locally first. ---*/ + vector probeValues; + probeValues.reserve(nProbes); + for (auto& output : customOutputs) { + if (output.skip || output.type != OperationType::PROBE) continue; + su2double value = std::numeric_limits::max(); + if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) { + value = output.Eval(GetPointValue(output, output.iPoint)); + } + probeValues.push_back(value); + } + + /*--- Single AllReduce for all probe values. ---*/ + unsigned long nProbesActual = probeValues.size(); + vector probeValuesGlobal(nProbesActual); + SU2_MPI::Allreduce(probeValues.data(), probeValuesGlobal.data(), nProbesActual, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); + + /*--- Set history output values for all probes. ---*/ + unsigned long iProbe = 0; + for (auto& output : customOutputs) { + if (output.skip || output.type != OperationType::PROBE) continue; + SetHistoryOutputValue(output.name, probeValuesGlobal[iProbe++]); + } + } } // The "AddHistoryOutput(" must not be split over multiple lines to ensure proper python parsing diff --git a/TestCases/parallel_regression.py b/TestCases/parallel_regression.py index d809b3f5ba56..df9bae36ec45 100755 --- a/TestCases/parallel_regression.py +++ b/TestCases/parallel_regression.py @@ -314,6 +314,15 @@ def main(): flatplate_udobj.test_vals = [-6.760101, -1.283906, -0.745653, 0.000587, -0.000038, 0.000977, -0.001015, 596.450000, 299.550000, 296.900000, 21.318000, 0.586640, 36.553000, 2.188800] test_list.append(flatplate_udobj) + # Probe performance test (11 probes, ADT path) + probe_performance_11 = TestCase('probe_performance_11') + probe_performance_11.cfg_dir = "user_defined_functions" + probe_performance_11.cfg_file = "test_11_probes.cfg" + probe_performance_11.test_iter = 4 + probe_performance_11.test_vals = [-6.285098, 1.0125e+05, 1.0132e+05, 9.9411e+04] # RMS_DENSITY, probe1, probe6, probe11 + # Tolerances are typically 0.001 in TestCase.py + test_list.append(probe_performance_11) + # Laminar cylinder (steady) cylinder = TestCase('cylinder') cylinder.cfg_dir = "navierstokes/cylinder" diff --git a/TestCases/user_defined_functions/test_11_probes.cfg b/TestCases/user_defined_functions/test_11_probes.cfg new file mode 100644 index 000000000000..3310b78ce9f1 --- /dev/null +++ b/TestCases/user_defined_functions/test_11_probes.cfg @@ -0,0 +1,62 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Test case: 11 probes (ADT path, >10) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +SOLVER= NAVIER_STOKES +KIND_TURB_MODEL= NONE +RESTART_SOL= NO + +CUSTOM_OUTPUTS= 'probe1 : Probe{PRESSURE}[0.001000, 0.001000, 0.010000]; probe2 : Probe{PRESSURE}[0.001700, 0.001700, 0.018000]; probe3 : Probe{PRESSURE}[0.002400, 0.002400, 0.026000]; probe4 : Probe{PRESSURE}[0.003100, 0.003100, 0.034000]; probe5 : Probe{PRESSURE}[0.003800, 0.003800, 0.042000]; probe6 : Probe{PRESSURE}[0.004500, 0.004500, 0.050000]; probe7 : Probe{PRESSURE}[0.005200, 0.005200, 0.058000]; probe8 : Probe{PRESSURE}[0.005900, 0.005900, 0.066000]; probe9 : Probe{PRESSURE}[0.006600, 0.006600, 0.074000]; probe10 : Probe{PRESSURE}[0.007300, 0.007300, 0.082000]; probe11 : Probe{PRESSURE}[0.008000, 0.008000, 0.090000]' + +SCREEN_OUTPUT= INNER_ITER, RMS_DENSITY, probe1, probe6, probe11 +HISTORY_OUTPUT = ITER, CUSTOM + +MACH_NUMBER= 0.1 +INIT_OPTION= TD_CONDITIONS +FREESTREAM_OPTION= TEMPERATURE_FS +FREESTREAM_TEMPERATURE= 297.62 +REYNOLDS_NUMBER= 600 +REYNOLDS_LENGTH= 0.02 + +REF_ORIGIN_MOMENT_X = 0.00 +REF_ORIGIN_MOMENT_Y = 0.00 +REF_ORIGIN_MOMENT_Z = 0.00 +REF_LENGTH= 0.02 +REF_AREA= 0.02 + +FLUID_MODEL= IDEAL_GAS +GAMMA_VALUE= 1.4 +GAS_CONSTANT= 287.87 +VISCOSITY_MODEL= CONSTANT_VISCOSITY +MU_CONSTANT= 0.001 + +MARKER_HEATFLUX= ( y_minus, 0.0 ) +MARKER_SYM= ( y_plus ) +MARKER_PERIODIC= ( x_minus, x_plus, 0,0,0, 0,0,0, 0.01,0,0 ) +MARKER_INLET= ( z_minus, 300.0, 100000.0, 0.0, 0.0, 1.0 ) +MARKER_OUTLET= ( z_plus, 99000.0 ) +MARKER_PLOTTING= ( y_minus ) +MARKER_MONITORING= ( y_minus ) +MARKER_ANALYZE= ( z_minus, z_plus ) + +NUM_METHOD_GRAD= GREEN_GAUSS +CFL_NUMBER= 1e4 +CFL_ADAPT= NO +TIME_DISCRE_FLOW= EULER_IMPLICIT + +LINEAR_SOLVER= FGMRES +LINEAR_SOLVER_PREC= ILU +LINEAR_SOLVER_ERROR= 0.2 +LINEAR_SOLVER_ITER= 5 + +CONV_NUM_METHOD_FLOW= ROE +MUSCL_FLOW= YES +SLOPE_LIMITER_FLOW= NONE + +CONV_RESIDUAL_MINVAL= -11 +CONV_STARTITER= 0 +INNER_ITER= 5 + +MESH_FORMAT= BOX +MESH_BOX_LENGTH= (0.01, 0.01, 0.1) +MESH_BOX_SIZE= (9, 17, 65)