Skip to content

Commit 64bd96c

Browse files
committed
Add FOFC reporting
1 parent c3f42a3 commit 64bd96c

File tree

2 files changed

+90
-80
lines changed

2 files changed

+90
-80
lines changed

src/hydro/hydro.cpp

Lines changed: 66 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
//========================================================================================
66

77
#include <algorithm>
8+
#include <cstdint>
89
#include <limits>
910
#include <memory>
1011
#include <string>
@@ -313,6 +314,9 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
313314
pkg->AddParam<Real>("dt_hyp", std::numeric_limits<Real>::max(),
314315
Params::Mutability::Mutable);
315316

317+
// counter for first order flux correction cells
318+
pkg->AddParam<std::int64_t>("fixed_num_cells_fofc", 0, Params::Mutability::Mutable);
319+
316320
const auto recon_str = pin->GetString("hydro", "reconstruction");
317321
int recon_need_nghost = 3; // largest number for the choices below
318322
auto recon = Reconstruction::undefined;
@@ -1256,87 +1260,69 @@ TaskStatus FirstOrderFluxCorrect(MeshData<Real> *u0_data, MeshData<Real> *u1_dat
12561260

12571261
auto riemann = Riemann<fluid, RiemannSolver::llf>();
12581262

1259-
std::int64_t num_corrected, num_need_floor;
1260-
// Potentially need multiple attempts as flux correction corrects 6 (in 3D) fluxes
1261-
// of a single cell at the same time. So the neighboring cells need to be rechecked with
1262-
// the corrected fluxes as the corrected fluxes in one cell may result in the need to
1263-
// correct all the fluxes of an originally "good" neighboring cell.
1264-
size_t num_attempts = 0;
1265-
do {
1266-
num_corrected = 0;
1267-
1268-
Kokkos::parallel_reduce(
1269-
"FirstOrderFluxCorrect",
1270-
Kokkos::MDRangePolicy<Kokkos::Rank<4>>(
1271-
DevExecSpace(), {0, kb.s, jb.s, ib.s},
1272-
{u0_cons_pack.GetDim(5), kb.e + 1, jb.e + 1, ib.e + 1},
1273-
{1, 1, 1, ib.e + 1 - ib.s}),
1274-
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i,
1275-
std::int64_t &lnum_corrected, std::int64_t &lnum_need_floor) {
1276-
const auto &coords = u0_cons_pack.GetCoords(b);
1277-
const auto &u0_prim = u0_prim_pack(b);
1278-
auto &u0_cons = u0_cons_pack(b);
1279-
1280-
// In principle, the u_cons.fluxes could be updated in parallel by a
1281-
// different thread resulting in a race conditon here. However, if the
1282-
// fluxes of a cell have been updated (anywhere) then the entire kernel will
1283-
// be called again anyway, and, at that point the already fixed
1284-
// u0_cons.fluxes will automaticlly be used here.
1285-
Real new_cons[NVAR];
1286-
for (auto v = 0; v < NVAR; v++) {
1287-
new_cons[v] =
1288-
gam0 * u0_cons(v, k, j, i) + gam1 * u1_cons_pack(b, v, k, j, i) +
1289-
beta_dt *
1290-
parthenon::Update::FluxDivHelper(v, k, j, i, ndim, coords, u0_cons);
1291-
}
1263+
std::int64_t num_corrected = 0;
12921264

1293-
// no need to include gamma - 1 as we only care for negative values
1294-
auto new_p =
1295-
new_cons[IEN] -
1296-
0.5 * (SQR(new_cons[IM1]) + SQR(new_cons[IM2]) + SQR(new_cons[IM3])) /
1297-
new_cons[IDN];
1298-
if constexpr (fluid == Fluid::glmmhd) {
1299-
new_p -= 0.5 * (SQR(new_cons[IB1]) + SQR(new_cons[IB2]) + SQR(new_cons[IB3]));
1300-
}
1301-
// no correction required
1302-
if (new_cons[IDN] > 0.0 && new_p > 0.0) {
1303-
return;
1304-
}
1305-
// if already tried 3 times and only pressure is negative, then we'll rely
1306-
// on the pressure floor during ConsToPrim conversion
1307-
if (num_attempts > 2 && new_cons[IDN] > 0.0 && new_p < 0.0) {
1308-
lnum_need_floor += 1;
1309-
return;
1310-
}
1311-
// In principle, there could be a racecondion as this loop goes over all
1312-
// k,j,i and we updating the i+1 flux here. However, the results are
1313-
// idential because u0_prim is never updated in this kernel so we don't
1314-
// worry about it.
1315-
// TODO(pgrete) as we need to keep the function signature idential for now
1316-
// (due to Cuda compiler bug) we could potentially template these function
1317-
// and get rid of the `if constexpr`
1318-
riemann.Solve(eos, k, j, i, IV1, u0_prim, u0_cons, c_h);
1319-
riemann.Solve(eos, k, j, i + 1, IV1, u0_prim, u0_cons, c_h);
1320-
1321-
if (ndim >= 2) {
1322-
riemann.Solve(eos, k, j, i, IV2, u0_prim, u0_cons, c_h);
1323-
riemann.Solve(eos, k, j + 1, i, IV2, u0_prim, u0_cons, c_h);
1324-
}
1325-
if (ndim >= 3) {
1326-
riemann.Solve(eos, k, j, i, IV3, u0_prim, u0_cons, c_h);
1327-
riemann.Solve(eos, k + 1, j, i, IV3, u0_prim, u0_cons, c_h);
1328-
}
1329-
lnum_corrected += 1;
1330-
},
1331-
Kokkos::Sum<std::int64_t>(num_corrected),
1332-
Kokkos::Sum<std::int64_t>(num_need_floor));
1333-
// TODO(pgrete) make this optional and global (potentially store values in Params)
1334-
// std::cout << "[" << parthenon::Globals::my_rank << "] Attempt: " <<
1335-
// num_attempts
1336-
// << " Corrected (center): " << num_corrected
1337-
// << " Failed (will rely on floor): " << num_need_floor << std::endl;
1338-
num_attempts += 1;
1339-
} while (num_corrected > 0 && num_attempts < 4);
1265+
Kokkos::parallel_reduce(
1266+
"FirstOrderFluxCorrect",
1267+
Kokkos::MDRangePolicy<Kokkos::Rank<4>>(
1268+
DevExecSpace(), {0, kb.s, jb.s, ib.s},
1269+
{u0_cons_pack.GetDim(5), kb.e + 1, jb.e + 1, ib.e + 1},
1270+
{1, 1, 1, ib.e + 1 - ib.s}),
1271+
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i,
1272+
std::int64_t &lnum_corrected) {
1273+
const auto &coords = u0_cons_pack.GetCoords(b);
1274+
const auto &u0_prim = u0_prim_pack(b);
1275+
auto &u0_cons = u0_cons_pack(b);
1276+
1277+
// In principle, the u_cons.fluxes could be updated in parallel by a
1278+
// different thread resulting in a race conditon here. However, if the
1279+
// fluxes of a cell have been updated (anywhere) then the entire kernel will
1280+
// be called again anyway, and, at that point the already fixed
1281+
// u0_cons.fluxes will automaticlly be used here.
1282+
Real new_cons[NVAR];
1283+
for (auto v = 0; v < NVAR; v++) {
1284+
new_cons[v] = gam0 * u0_cons(v, k, j, i) + gam1 * u1_cons_pack(b, v, k, j, i) +
1285+
beta_dt * parthenon::Update::FluxDivHelper(v, k, j, i, ndim,
1286+
coords, u0_cons);
1287+
}
1288+
1289+
// no need to include gamma - 1 as we only care for negative values
1290+
auto new_p = new_cons[IEN] -
1291+
0.5 *
1292+
(SQR(new_cons[IM1]) + SQR(new_cons[IM2]) + SQR(new_cons[IM3])) /
1293+
new_cons[IDN];
1294+
if constexpr (fluid == Fluid::glmmhd) {
1295+
new_p -= 0.5 * (SQR(new_cons[IB1]) + SQR(new_cons[IB2]) + SQR(new_cons[IB3]));
1296+
}
1297+
// no correction required
1298+
if (new_cons[IDN] > 0.0 && new_p > 0.0) {
1299+
return;
1300+
}
1301+
// In principle, there could be a racecondion as this loop goes over all
1302+
// k,j,i and we updating the i+1 flux here. However, the results are
1303+
// idential because u0_prim is never updated in this kernel so we don't
1304+
// worry about it.
1305+
// TODO(pgrete) as we need to keep the function signature idential for now
1306+
// (due to Cuda compiler bug) we could potentially template these function
1307+
// and get rid of the `if constexpr`
1308+
riemann.Solve(eos, k, j, i, IV1, u0_prim, u0_cons, c_h);
1309+
riemann.Solve(eos, k, j, i + 1, IV1, u0_prim, u0_cons, c_h);
1310+
1311+
if (ndim >= 2) {
1312+
riemann.Solve(eos, k, j, i, IV2, u0_prim, u0_cons, c_h);
1313+
riemann.Solve(eos, k, j + 1, i, IV2, u0_prim, u0_cons, c_h);
1314+
}
1315+
if (ndim >= 3) {
1316+
riemann.Solve(eos, k, j, i, IV3, u0_prim, u0_cons, c_h);
1317+
riemann.Solve(eos, k + 1, j, i, IV3, u0_prim, u0_cons, c_h);
1318+
}
1319+
lnum_corrected += 1;
1320+
},
1321+
Kokkos::Sum<std::int64_t>(num_corrected));
1322+
1323+
// update central counter
1324+
const auto counter = pkg->Param<std::int64_t>("fixed_num_cells_fofc");
1325+
pkg->UpdateParam("fixed_num_cells_fofc", counter + num_corrected);
13401326

13411327
return TaskStatus::complete;
13421328
}

src/hydro/hydro_driver.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "amr_criteria/refinement_package.hpp"
1515
#include "basic_types.hpp"
1616
#include "bvals/comms/bvals_in_one.hpp"
17+
#include "globals.hpp"
1718
#include "prolong_restrict/prolong_restrict.hpp"
1819
#include "utils/error_checking.hpp"
1920
#include <parthenon/parthenon.hpp>
@@ -575,6 +576,29 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
575576
auto fill_derived =
576577
tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, mu0.get());
577578
}
579+
// Report numerical fixes.
580+
// We do this before the STS task because if there are issues from the diffusive fluxes,
581+
// we're in real trouble.
582+
TaskRegion &report_fixes_region = tc.AddRegion(1);
583+
auto &tl = report_fixes_region[0];
584+
tl.AddTask(
585+
none,
586+
[](Mesh *pmesh, StateDescriptor *hydro_pkg, const int stage) {
587+
if (parthenon::Globals::my_rank == 0) {
588+
std::stringstream msg;
589+
msg << "Fixes employed in stage " << stage << " (with "
590+
<< pmesh->GetTotalCells() << " cells): ";
591+
if (hydro_pkg->Param<bool>("first_order_flux_correct")) {
592+
msg << hydro_pkg->Param<std::int64_t>("fixed_num_cells_fofc") << " FOFC. ";
593+
}
594+
std::cout << msg.str() << "\n";
595+
}
596+
597+
hydro_pkg->UpdateParam("fixed_num_cells_fofc", 0); // reset counter for next stage
598+
return TaskStatus::complete;
599+
},
600+
pmesh, hydro_pkg.get(), stage);
601+
578602
const auto &diffint = hydro_pkg->Param<DiffInt>("diffint");
579603
// If any tasks modify the conserved variables before this place and after FillDerived,
580604
// then the STS tasks should be updated to not assume prim and cons are in sync.

0 commit comments

Comments
 (0)