3333#include " ../../include/output/CFlowOutput.hpp"
3434
3535#include " ../../../Common/include/geometry/CGeometry.hpp"
36+ #include " ../../../Common/include/adt/CADTPointsOnlyClass.hpp"
3637#include " ../../../Common/include/toolboxes/geometry_toolbox.hpp"
3738#include " ../../include/solvers/CSolver.hpp"
3839#include " ../../include/variables/CPrimitiveIndices.hpp"
@@ -818,6 +819,54 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
818819 const bool adjoint = config->GetDiscrete_Adjoint ();
819820 const bool axisymmetric = config->GetAxisymmetric ();
820821 const auto * flowNodes = su2staticcast_p<const CFlowVariable*>(solver[FLOW_SOL]->GetNodes ());
822+ auto GetPointValue = [&](const auto & output, unsigned long iPoint) {
823+ return [&, iPoint](unsigned long i) {
824+ if (i < CustomOutput::NOT_A_VARIABLE) {
825+ const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER;
826+ const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER;
827+ if (solIdx == FLOW_SOL) {
828+ return flowNodes->GetPrimitive (iPoint, varIdx);
829+ }
830+ return solver[solIdx]->GetNodes ()->GetSolution (iPoint, varIdx);
831+ } else {
832+ return *output.otherOutputs [i - CustomOutput::NOT_A_VARIABLE];
833+ }
834+ };
835+ };
836+
837+ /* --- Count probes that need processing and use heuristic to decide ADT vs linear search.
838+ ADT overhead is only worth it for larger numbers of probes. ---*/
839+ unsigned long nProbes = 0 ;
840+ for (const auto & output : customOutputs) {
841+ if (!output.skip && output.type == OperationType::PROBE) {
842+ ++nProbes;
843+ }
844+ }
845+
846+ /* --- Heuristic: Build ADT if we have more than 10 probes. For small numbers of probes,
847+ the overhead of building the ADT may not be worth it compared to linear search.
848+ Note: If this threshold is increased, the regression test (probe_performance_11)
849+ must be updated to ensure the ADT path is still tested. ---*/
850+ const unsigned long ADT_THRESHOLD = 10 ;
851+ const bool useADT = (nProbes > ADT_THRESHOLD);
852+
853+ /* --- Build ADT for probe nearest neighbor search if heuristic suggests it. ---*/
854+ std::unique_ptr<CADTPointsOnlyClass> probeADT;
855+ if (useADT) {
856+ const unsigned long nPointDomain = geometry->GetnPointDomain ();
857+ vector<su2double> coords (nDim * nPointDomain);
858+ vector<unsigned long > pointIDs (nPointDomain);
859+
860+ for (unsigned long iPoint = 0 ; iPoint < nPointDomain; ++iPoint) {
861+ pointIDs[iPoint] = iPoint;
862+ for (unsigned short iDim = 0 ; iDim < nDim; ++iDim) {
863+ coords[iPoint * nDim + iDim] = geometry->nodes ->GetCoord (iPoint, iDim);
864+ }
865+ }
866+
867+ /* --- Build global ADT to find nearest nodes across all ranks. ---*/
868+ probeADT = std::make_unique<CADTPointsOnlyClass>(nDim, nPointDomain, coords.data (), pointIDs.data (), true );
869+ }
821870
822871 for (auto & output : customOutputs) {
823872 if (output.skip ) continue ;
@@ -849,19 +898,34 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
849898 }
850899 su2double coord[3 ] = {};
851900 for (auto iDim = 0u ; iDim < nDim; ++iDim) coord[iDim] = std::stod (output.markers [iDim]);
901+ /* --- Use ADT for efficient nearest neighbor search instead of brute force. ---*/
852902 su2double minDist = std::numeric_limits<su2double>::max ();
853903 unsigned long minPoint = 0 ;
854- for (auto iPoint = 0ul ; iPoint < geometry->GetnPointDomain (); ++iPoint) {
855- const su2double dist = GeometryToolbox::SquaredDistance (nDim, coord, geometry->nodes ->GetCoord (iPoint));
856- if (dist < minDist) {
857- minDist = dist;
858- minPoint = iPoint;
904+ int rankID = -1 ;
905+ int rank;
906+ SU2_MPI::Comm_rank (SU2_MPI::GetComm (), &rank);
907+
908+ if (useADT && probeADT && !probeADT->IsEmpty ()) {
909+ /* --- Use ADT to find the nearest node efficiently (O(log n) instead of O(n)). ---*/
910+ probeADT->DetermineNearestNode (coord, minDist, minPoint, rankID);
911+ minDist = pow (minDist, 2 );
912+
913+ /* --- Check if this rank owns the nearest point. ---*/
914+ output.iPoint = (rankID == rank) ? minPoint : CustomOutput::PROBE_NOT_OWNED;
915+ } else {
916+ /* --- Use linear search for small numbers of probes or when ADT is not available. ---*/
917+ for (auto iPoint = 0ul ; iPoint < geometry->GetnPointDomain (); ++iPoint) {
918+ const su2double dist = GeometryToolbox::SquaredDistance (nDim, coord, geometry->nodes ->GetCoord (iPoint));
919+ if (dist < minDist) {
920+ minDist = dist;
921+ minPoint = iPoint;
922+ }
859923 }
924+ /* --- Decide which rank owns the probe using Allreduce. ---*/
925+ su2double globMinDist;
926+ SU2_MPI::Allreduce (&minDist, &globMinDist, 1 , MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm ());
927+ output.iPoint = fabs (minDist - globMinDist) < EPS ? minPoint : CustomOutput::PROBE_NOT_OWNED;
860928 }
861- /* --- Decide which rank owns the probe. ---*/
862- su2double globMinDist;
863- SU2_MPI::Allreduce (&minDist, &globMinDist, 1 , MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm ());
864- output.iPoint = fabs (minDist - globMinDist) < EPS ? minPoint : CustomOutput::PROBE_NOT_OWNED;
865929 if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
866930 std::cout << " Probe " << output.name << " is using global point "
867931 << geometry->nodes ->GetGlobalIndex (output.iPoint )
@@ -883,29 +947,11 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
883947 * (see ConvertVariableSymbolsToIndices). ---*/
884948
885949 auto MakeFunctor = [&](unsigned long iPoint) {
886- /* --- This returns another lambda that captures iPoint by value. ---*/
887- return [&, iPoint](unsigned long i) {
888- if (i < CustomOutput::NOT_A_VARIABLE) {
889- const auto solIdx = i / CustomOutput::MAX_VARS_PER_SOLVER;
890- const auto varIdx = i % CustomOutput::MAX_VARS_PER_SOLVER;
891- if (solIdx == FLOW_SOL) {
892- return flowNodes->GetPrimitive (iPoint, varIdx);
893- }
894- return solver[solIdx]->GetNodes ()->GetSolution (iPoint, varIdx);
895- } else {
896- return *output.otherOutputs [i - CustomOutput::NOT_A_VARIABLE];
897- }
898- };
950+ return GetPointValue (output, iPoint);
899951 };
900952
901953 if (output.type == OperationType::PROBE) {
902- su2double value = std::numeric_limits<su2double>::max ();
903- if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
904- value = output.Eval (MakeFunctor (output.iPoint ));
905- }
906- su2double tmp = value;
907- SU2_MPI::Allreduce (&tmp, &value, 1 , MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm ());
908- SetHistoryOutputValue (output.name , value);
954+ /* --- Probe evaluation will be done after all outputs are processed, with batched AllReduce. ---*/
909955 continue ;
910956 }
911957
@@ -954,6 +1000,33 @@ void CFlowOutput::SetCustomOutputs(const CSolver* const* solver, const CGeometry
9541000 }
9551001 SetHistoryOutputValue (output.name , integral[0 ]);
9561002 }
1003+
1004+ /* --- Batch AllReduce for all probe values to reduce MPI communication overhead. ---*/
1005+ if (nProbes > 0 ) {
1006+ /* --- Evaluate all probe values locally first. ---*/
1007+ vector<su2double> probeValues;
1008+ probeValues.reserve (nProbes);
1009+ for (auto & output : customOutputs) {
1010+ if (output.skip || output.type != OperationType::PROBE) continue ;
1011+ su2double value = std::numeric_limits<su2double>::max ();
1012+ if (output.iPoint != CustomOutput::PROBE_NOT_OWNED) {
1013+ value = output.Eval (GetPointValue (output, output.iPoint ));
1014+ }
1015+ probeValues.push_back (value);
1016+ }
1017+
1018+ /* --- Single AllReduce for all probe values. ---*/
1019+ unsigned long nProbesActual = probeValues.size ();
1020+ vector<su2double> probeValuesGlobal (nProbesActual);
1021+ SU2_MPI::Allreduce (probeValues.data (), probeValuesGlobal.data (), nProbesActual, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm ());
1022+
1023+ /* --- Set history output values for all probes. ---*/
1024+ unsigned long iProbe = 0 ;
1025+ for (auto & output : customOutputs) {
1026+ if (output.skip || output.type != OperationType::PROBE) continue ;
1027+ SetHistoryOutputValue (output.name , probeValuesGlobal[iProbe++]);
1028+ }
1029+ }
9571030}
9581031
9591032// The "AddHistoryOutput(" must not be split over multiple lines to ensure proper python parsing
0 commit comments