|
5 | 5 | //======================================================================================== |
6 | 6 |
|
7 | 7 | #include <algorithm> |
| 8 | +#include <cstdint> |
8 | 9 | #include <limits> |
9 | 10 | #include <memory> |
10 | 11 | #include <string> |
@@ -313,6 +314,9 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) { |
313 | 314 | pkg->AddParam<Real>("dt_hyp", std::numeric_limits<Real>::max(), |
314 | 315 | Params::Mutability::Mutable); |
315 | 316 |
|
| 317 | + // counter for first order flux correction cells |
| 318 | + pkg->AddParam<std::int64_t>("fixed_num_cells_fofc", 0, Params::Mutability::Mutable); |
| 319 | + |
316 | 320 | const auto recon_str = pin->GetString("hydro", "reconstruction"); |
317 | 321 | int recon_need_nghost = 3; // largest number for the choices below |
318 | 322 | auto recon = Reconstruction::undefined; |
@@ -1256,87 +1260,69 @@ TaskStatus FirstOrderFluxCorrect(MeshData<Real> *u0_data, MeshData<Real> *u1_dat |
1256 | 1260 |
|
1257 | 1261 | auto riemann = Riemann<fluid, RiemannSolver::llf>(); |
1258 | 1262 |
|
1259 | | - std::int64_t num_corrected, num_need_floor; |
1260 | | - // Potentially need multiple attempts as flux correction corrects 6 (in 3D) fluxes |
1261 | | - // of a single cell at the same time. So the neighboring cells need to be rechecked with |
1262 | | - // the corrected fluxes as the corrected fluxes in one cell may result in the need to |
1263 | | - // correct all the fluxes of an originally "good" neighboring cell. |
1264 | | - size_t num_attempts = 0; |
1265 | | - do { |
1266 | | - num_corrected = 0; |
1267 | | - |
1268 | | - Kokkos::parallel_reduce( |
1269 | | - "FirstOrderFluxCorrect", |
1270 | | - Kokkos::MDRangePolicy<Kokkos::Rank<4>>( |
1271 | | - DevExecSpace(), {0, kb.s, jb.s, ib.s}, |
1272 | | - {u0_cons_pack.GetDim(5), kb.e + 1, jb.e + 1, ib.e + 1}, |
1273 | | - {1, 1, 1, ib.e + 1 - ib.s}), |
1274 | | - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, |
1275 | | - std::int64_t &lnum_corrected, std::int64_t &lnum_need_floor) { |
1276 | | - const auto &coords = u0_cons_pack.GetCoords(b); |
1277 | | - const auto &u0_prim = u0_prim_pack(b); |
1278 | | - auto &u0_cons = u0_cons_pack(b); |
1279 | | - |
1280 | | - // In principle, the u_cons.fluxes could be updated in parallel by a |
1281 | | - // different thread resulting in a race conditon here. However, if the |
1282 | | - // fluxes of a cell have been updated (anywhere) then the entire kernel will |
1283 | | - // be called again anyway, and, at that point the already fixed |
1284 | | - // u0_cons.fluxes will automaticlly be used here. |
1285 | | - Real new_cons[NVAR]; |
1286 | | - for (auto v = 0; v < NVAR; v++) { |
1287 | | - new_cons[v] = |
1288 | | - gam0 * u0_cons(v, k, j, i) + gam1 * u1_cons_pack(b, v, k, j, i) + |
1289 | | - beta_dt * |
1290 | | - parthenon::Update::FluxDivHelper(v, k, j, i, ndim, coords, u0_cons); |
1291 | | - } |
| 1263 | + std::int64_t num_corrected = 0; |
1292 | 1264 |
|
1293 | | - // no need to include gamma - 1 as we only care for negative values |
1294 | | - auto new_p = |
1295 | | - new_cons[IEN] - |
1296 | | - 0.5 * (SQR(new_cons[IM1]) + SQR(new_cons[IM2]) + SQR(new_cons[IM3])) / |
1297 | | - new_cons[IDN]; |
1298 | | - if constexpr (fluid == Fluid::glmmhd) { |
1299 | | - new_p -= 0.5 * (SQR(new_cons[IB1]) + SQR(new_cons[IB2]) + SQR(new_cons[IB3])); |
1300 | | - } |
1301 | | - // no correction required |
1302 | | - if (new_cons[IDN] > 0.0 && new_p > 0.0) { |
1303 | | - return; |
1304 | | - } |
1305 | | - // if already tried 3 times and only pressure is negative, then we'll rely |
1306 | | - // on the pressure floor during ConsToPrim conversion |
1307 | | - if (num_attempts > 2 && new_cons[IDN] > 0.0 && new_p < 0.0) { |
1308 | | - lnum_need_floor += 1; |
1309 | | - return; |
1310 | | - } |
1311 | | - // In principle, there could be a racecondion as this loop goes over all |
1312 | | - // k,j,i and we updating the i+1 flux here. However, the results are |
1313 | | - // idential because u0_prim is never updated in this kernel so we don't |
1314 | | - // worry about it. |
1315 | | - // TODO(pgrete) as we need to keep the function signature idential for now |
1316 | | - // (due to Cuda compiler bug) we could potentially template these function |
1317 | | - // and get rid of the `if constexpr` |
1318 | | - riemann.Solve(eos, k, j, i, IV1, u0_prim, u0_cons, c_h); |
1319 | | - riemann.Solve(eos, k, j, i + 1, IV1, u0_prim, u0_cons, c_h); |
1320 | | - |
1321 | | - if (ndim >= 2) { |
1322 | | - riemann.Solve(eos, k, j, i, IV2, u0_prim, u0_cons, c_h); |
1323 | | - riemann.Solve(eos, k, j + 1, i, IV2, u0_prim, u0_cons, c_h); |
1324 | | - } |
1325 | | - if (ndim >= 3) { |
1326 | | - riemann.Solve(eos, k, j, i, IV3, u0_prim, u0_cons, c_h); |
1327 | | - riemann.Solve(eos, k + 1, j, i, IV3, u0_prim, u0_cons, c_h); |
1328 | | - } |
1329 | | - lnum_corrected += 1; |
1330 | | - }, |
1331 | | - Kokkos::Sum<std::int64_t>(num_corrected), |
1332 | | - Kokkos::Sum<std::int64_t>(num_need_floor)); |
1333 | | - // TODO(pgrete) make this optional and global (potentially store values in Params) |
1334 | | - // std::cout << "[" << parthenon::Globals::my_rank << "] Attempt: " << |
1335 | | - // num_attempts |
1336 | | - // << " Corrected (center): " << num_corrected |
1337 | | - // << " Failed (will rely on floor): " << num_need_floor << std::endl; |
1338 | | - num_attempts += 1; |
1339 | | - } while (num_corrected > 0 && num_attempts < 4); |
| 1265 | + Kokkos::parallel_reduce( |
| 1266 | + "FirstOrderFluxCorrect", |
| 1267 | + Kokkos::MDRangePolicy<Kokkos::Rank<4>>( |
| 1268 | + DevExecSpace(), {0, kb.s, jb.s, ib.s}, |
| 1269 | + {u0_cons_pack.GetDim(5), kb.e + 1, jb.e + 1, ib.e + 1}, |
| 1270 | + {1, 1, 1, ib.e + 1 - ib.s}), |
| 1271 | + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, |
| 1272 | + std::int64_t &lnum_corrected) { |
| 1273 | + const auto &coords = u0_cons_pack.GetCoords(b); |
| 1274 | + const auto &u0_prim = u0_prim_pack(b); |
| 1275 | + auto &u0_cons = u0_cons_pack(b); |
| 1276 | + |
| 1277 | + // In principle, the u_cons.fluxes could be updated in parallel by a |
| 1278 | + // different thread resulting in a race conditon here. However, if the |
| 1279 | + // fluxes of a cell have been updated (anywhere) then the entire kernel will |
| 1280 | + // be called again anyway, and, at that point the already fixed |
| 1281 | + // u0_cons.fluxes will automaticlly be used here. |
| 1282 | + Real new_cons[NVAR]; |
| 1283 | + for (auto v = 0; v < NVAR; v++) { |
| 1284 | + new_cons[v] = gam0 * u0_cons(v, k, j, i) + gam1 * u1_cons_pack(b, v, k, j, i) + |
| 1285 | + beta_dt * parthenon::Update::FluxDivHelper(v, k, j, i, ndim, |
| 1286 | + coords, u0_cons); |
| 1287 | + } |
| 1288 | + |
| 1289 | + // no need to include gamma - 1 as we only care for negative values |
| 1290 | + auto new_p = new_cons[IEN] - |
| 1291 | + 0.5 * |
| 1292 | + (SQR(new_cons[IM1]) + SQR(new_cons[IM2]) + SQR(new_cons[IM3])) / |
| 1293 | + new_cons[IDN]; |
| 1294 | + if constexpr (fluid == Fluid::glmmhd) { |
| 1295 | + new_p -= 0.5 * (SQR(new_cons[IB1]) + SQR(new_cons[IB2]) + SQR(new_cons[IB3])); |
| 1296 | + } |
| 1297 | + // no correction required |
| 1298 | + if (new_cons[IDN] > 0.0 && new_p > 0.0) { |
| 1299 | + return; |
| 1300 | + } |
| 1301 | + // In principle, there could be a racecondion as this loop goes over all |
| 1302 | + // k,j,i and we updating the i+1 flux here. However, the results are |
| 1303 | + // idential because u0_prim is never updated in this kernel so we don't |
| 1304 | + // worry about it. |
| 1305 | + // TODO(pgrete) as we need to keep the function signature idential for now |
| 1306 | + // (due to Cuda compiler bug) we could potentially template these function |
| 1307 | + // and get rid of the `if constexpr` |
| 1308 | + riemann.Solve(eos, k, j, i, IV1, u0_prim, u0_cons, c_h); |
| 1309 | + riemann.Solve(eos, k, j, i + 1, IV1, u0_prim, u0_cons, c_h); |
| 1310 | + |
| 1311 | + if (ndim >= 2) { |
| 1312 | + riemann.Solve(eos, k, j, i, IV2, u0_prim, u0_cons, c_h); |
| 1313 | + riemann.Solve(eos, k, j + 1, i, IV2, u0_prim, u0_cons, c_h); |
| 1314 | + } |
| 1315 | + if (ndim >= 3) { |
| 1316 | + riemann.Solve(eos, k, j, i, IV3, u0_prim, u0_cons, c_h); |
| 1317 | + riemann.Solve(eos, k + 1, j, i, IV3, u0_prim, u0_cons, c_h); |
| 1318 | + } |
| 1319 | + lnum_corrected += 1; |
| 1320 | + }, |
| 1321 | + Kokkos::Sum<std::int64_t>(num_corrected)); |
| 1322 | + |
| 1323 | + // update central counter |
| 1324 | + const auto counter = pkg->Param<std::int64_t>("fixed_num_cells_fofc"); |
| 1325 | + pkg->UpdateParam("fixed_num_cells_fofc", counter + num_corrected); |
1340 | 1326 |
|
1341 | 1327 | return TaskStatus::complete; |
1342 | 1328 | } |
|
0 commit comments