Add FOFC reporting

pgrete · pgrete · commit 64bd96c8c020 · 2026-02-01T22:34:21.000+01:00
diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
@@ -5,6 +5,7 @@
 //========================================================================================
 
 #include <algorithm>
+#include <cstdint>
 #include <limits>
 #include <memory>
 #include <string>
@@ -313,6 +314,9 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
   pkg->AddParam<Real>("dt_hyp", std::numeric_limits<Real>::max(),
                       Params::Mutability::Mutable);
 
+  // counter for first order flux correction cells
+  pkg->AddParam<std::int64_t>("fixed_num_cells_fofc", 0, Params::Mutability::Mutable);
+
   const auto recon_str = pin->GetString("hydro", "reconstruction");
   int recon_need_nghost = 3; // largest number for the choices below
   auto recon = Reconstruction::undefined;
@@ -1256,87 +1260,69 @@ TaskStatus FirstOrderFluxCorrect(MeshData<Real> *u0_data, MeshData<Real> *u1_dat
 
   auto riemann = Riemann<fluid, RiemannSolver::llf>();
 
-  std::int64_t num_corrected, num_need_floor;
-  // Potentially need multiple attempts as flux correction corrects 6 (in 3D) fluxes
-  // of a single cell at the same time. So the neighboring cells need to be rechecked with
-  // the corrected fluxes as the corrected fluxes in one cell may result in the need to
-  // correct all the fluxes of an originally "good" neighboring cell.
-  size_t num_attempts = 0;
-  do {
-    num_corrected = 0;
-
-    Kokkos::parallel_reduce(
-        "FirstOrderFluxCorrect",
-        Kokkos::MDRangePolicy<Kokkos::Rank<4>>(
-            DevExecSpace(), {0, kb.s, jb.s, ib.s},
-            {u0_cons_pack.GetDim(5), kb.e + 1, jb.e + 1, ib.e + 1},
-            {1, 1, 1, ib.e + 1 - ib.s}),
-        KOKKOS_LAMBDA(const int b, const int k, const int j, const int i,
-                      std::int64_t &lnum_corrected, std::int64_t &lnum_need_floor) {
-          const auto &coords = u0_cons_pack.GetCoords(b);
-          const auto &u0_prim = u0_prim_pack(b);
-          auto &u0_cons = u0_cons_pack(b);
-
-          // In principle, the u_cons.fluxes could be updated in parallel by a
-          // different thread resulting in a race conditon here. However, if the
-          // fluxes of a cell have been updated (anywhere) then the entire kernel will
-          // be called again anyway, and, at that point the already fixed
-          // u0_cons.fluxes will automaticlly be used here.
-          Real new_cons[NVAR];
-          for (auto v = 0; v < NVAR; v++) {
-            new_cons[v] =
-                gam0 * u0_cons(v, k, j, i) + gam1 * u1_cons_pack(b, v, k, j, i) +
-                beta_dt *
-                    parthenon::Update::FluxDivHelper(v, k, j, i, ndim, coords, u0_cons);
-          }
+  std::int64_t num_corrected = 0;
 
-          // no need to include gamma - 1 as we only care for negative values
-          auto new_p =
-              new_cons[IEN] -
-              0.5 * (SQR(new_cons[IM1]) + SQR(new_cons[IM2]) + SQR(new_cons[IM3])) /
-                  new_cons[IDN];
-          if constexpr (fluid == Fluid::glmmhd) {
-            new_p -= 0.5 * (SQR(new_cons[IB1]) + SQR(new_cons[IB2]) + SQR(new_cons[IB3]));
-          }
-          // no correction required
-          if (new_cons[IDN] > 0.0 && new_p > 0.0) {
-            return;
-          }
-          // if already tried 3 times and only pressure is negative, then we'll rely
-          // on the pressure floor during ConsToPrim conversion
-          if (num_attempts > 2 && new_cons[IDN] > 0.0 && new_p < 0.0) {
-            lnum_need_floor += 1;
-            return;
-          }
-          // In principle, there could be a racecondion as this loop goes over all
-          // k,j,i and we updating the i+1 flux here. However, the results are
-          // idential because u0_prim is never updated in this kernel so we don't
-          // worry about it.
-          // TODO(pgrete) as we need to keep the function signature idential for now
-          // (due to Cuda compiler bug) we could potentially template these function
-          // and get rid of the `if constexpr`
-          riemann.Solve(eos, k, j, i, IV1, u0_prim, u0_cons, c_h);
-          riemann.Solve(eos, k, j, i + 1, IV1, u0_prim, u0_cons, c_h);
-
-          if (ndim >= 2) {
-            riemann.Solve(eos, k, j, i, IV2, u0_prim, u0_cons, c_h);
-            riemann.Solve(eos, k, j + 1, i, IV2, u0_prim, u0_cons, c_h);
-          }
-          if (ndim >= 3) {
-            riemann.Solve(eos, k, j, i, IV3, u0_prim, u0_cons, c_h);
-            riemann.Solve(eos, k + 1, j, i, IV3, u0_prim, u0_cons, c_h);
-          }
-          lnum_corrected += 1;
-        },
-        Kokkos::Sum<std::int64_t>(num_corrected),
-        Kokkos::Sum<std::int64_t>(num_need_floor));
-    // TODO(pgrete) make this optional and global (potentially store values in Params)
-    // std::cout << "[" << parthenon::Globals::my_rank << "] Attempt: " <<
-    // num_attempts
-    //           << " Corrected (center): " << num_corrected
-    //           << " Failed (will rely on floor): " << num_need_floor << std::endl;
-    num_attempts += 1;
-  } while (num_corrected > 0 && num_attempts < 4);
+  Kokkos::parallel_reduce(
+      "FirstOrderFluxCorrect",
+      Kokkos::MDRangePolicy<Kokkos::Rank<4>>(
+          DevExecSpace(), {0, kb.s, jb.s, ib.s},
+          {u0_cons_pack.GetDim(5), kb.e + 1, jb.e + 1, ib.e + 1},
+          {1, 1, 1, ib.e + 1 - ib.s}),
+      KOKKOS_LAMBDA(const int b, const int k, const int j, const int i,
+                    std::int64_t &lnum_corrected) {
+        const auto &coords = u0_cons_pack.GetCoords(b);
+        const auto &u0_prim = u0_prim_pack(b);
+        auto &u0_cons = u0_cons_pack(b);
+
+        // In principle, the u_cons.fluxes could be updated in parallel by a
+        // different thread resulting in a race conditon here. However, if the
+        // fluxes of a cell have been updated (anywhere) then the entire kernel will
+        // be called again anyway, and, at that point the already fixed
+        // u0_cons.fluxes will automaticlly be used here.
+        Real new_cons[NVAR];
+        for (auto v = 0; v < NVAR; v++) {
+          new_cons[v] = gam0 * u0_cons(v, k, j, i) + gam1 * u1_cons_pack(b, v, k, j, i) +
+                        beta_dt * parthenon::Update::FluxDivHelper(v, k, j, i, ndim,
+                                                                   coords, u0_cons);
+        }
+
+        // no need to include gamma - 1 as we only care for negative values
+        auto new_p = new_cons[IEN] -
+                     0.5 *
+                         (SQR(new_cons[IM1]) + SQR(new_cons[IM2]) + SQR(new_cons[IM3])) /
+                         new_cons[IDN];
+        if constexpr (fluid == Fluid::glmmhd) {
+          new_p -= 0.5 * (SQR(new_cons[IB1]) + SQR(new_cons[IB2]) + SQR(new_cons[IB3]));
+        }
+        // no correction required
+        if (new_cons[IDN] > 0.0 && new_p > 0.0) {
+          return;
+        }
+        // In principle, there could be a racecondion as this loop goes over all
+        // k,j,i and we updating the i+1 flux here. However, the results are
+        // idential because u0_prim is never updated in this kernel so we don't
+        // worry about it.
+        // TODO(pgrete) as we need to keep the function signature idential for now
+        // (due to Cuda compiler bug) we could potentially template these function
+        // and get rid of the `if constexpr`
+        riemann.Solve(eos, k, j, i, IV1, u0_prim, u0_cons, c_h);
+        riemann.Solve(eos, k, j, i + 1, IV1, u0_prim, u0_cons, c_h);
+
+        if (ndim >= 2) {
+          riemann.Solve(eos, k, j, i, IV2, u0_prim, u0_cons, c_h);
+          riemann.Solve(eos, k, j + 1, i, IV2, u0_prim, u0_cons, c_h);
+        }
+        if (ndim >= 3) {
+          riemann.Solve(eos, k, j, i, IV3, u0_prim, u0_cons, c_h);
+          riemann.Solve(eos, k + 1, j, i, IV3, u0_prim, u0_cons, c_h);
+        }
+        lnum_corrected += 1;
+      },
+      Kokkos::Sum<std::int64_t>(num_corrected));
+
+  // update central counter
+  const auto counter = pkg->Param<std::int64_t>("fixed_num_cells_fofc");
+  pkg->UpdateParam("fixed_num_cells_fofc", counter + num_corrected);
 
   return TaskStatus::complete;
 }
diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp
@@ -14,6 +14,7 @@
 #include "amr_criteria/refinement_package.hpp"
 #include "basic_types.hpp"
 #include "bvals/comms/bvals_in_one.hpp"
+#include "globals.hpp"
 #include "prolong_restrict/prolong_restrict.hpp"
 #include "utils/error_checking.hpp"
 #include <parthenon/parthenon.hpp>
@@ -575,6 +576,29 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
     auto fill_derived =
         tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, mu0.get());
   }
+  // Report numerical fixes.
+  // We do this before the STS task because if there are issues from the diffusive fluxes,
+  // we're in real trouble.
+  TaskRegion &report_fixes_region = tc.AddRegion(1);
+  auto &tl = report_fixes_region[0];
+  tl.AddTask(
+      none,
+      [](Mesh *pmesh, StateDescriptor *hydro_pkg, const int stage) {
+        if (parthenon::Globals::my_rank == 0) {
+          std::stringstream msg;
+          msg << "Fixes employed in stage " << stage << " (with "
+              << pmesh->GetTotalCells() << " cells): ";
+          if (hydro_pkg->Param<bool>("first_order_flux_correct")) {
+            msg << hydro_pkg->Param<std::int64_t>("fixed_num_cells_fofc") << " FOFC. ";
+          }
+          std::cout << msg.str() << "\n";
+        }
+
+        hydro_pkg->UpdateParam("fixed_num_cells_fofc", 0); // reset counter for next stage
+        return TaskStatus::complete;
+      },
+      pmesh, hydro_pkg.get(), stage);
+
   const auto &diffint = hydro_pkg->Param<DiffInt>("diffint");
   // If any tasks modify the conserved variables before this place and after FillDerived,
   // then the STS tasks should be updated to not assume prim and cons are in sync.