Skip to content

Commit f9b80f9

Browse files
committed
Merge branch 'develop' into lroberts36/multigrid-reduce-block-size-first
2 parents 6f69341 + c193aca commit f9b80f9

25 files changed

+268
-139
lines changed

.github/workflows/ci-macos.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,16 @@ jobs:
3030

3131
- name: Install dependencies
3232
run: |
33-
brew install openmpi hdf5-mpi adios2 || true
33+
brew install openmpi adios2 || true
34+
35+
# this is a workaround because homebrew's stable is hdf5 2.0.0 and cmake's
36+
# find_package(HDF5) doesn't properly determine HDF5_IS_PARALLEL
37+
# this *should* be fixed in cmake 4.3
38+
- name: Install hdf5 from old formula
39+
run: |
40+
mkdir -p $(brew --repository)/Library/Taps/user/homebrew-repo
41+
curl -o $(brew --repository)/Library/Taps/user/homebrew-repo/hdf5-mpi.rb https://raw.githubusercontent.com/Homebrew/homebrew-core/d03791212a4670e68f21c66939c5d348bc1d4bef/Formula/h/hdf5-mpi.rb
42+
brew install user/repo/hdf5-mpi
3443
3544
- name: Install parthenon_tools
3645
run: |

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@
44

55
### Added (new features/APIs/variables/...)
66
- [[PR 1346]](https://github.com/parthenon-hpc-lab/parthenon/pull/1346) Allow for user defined inter-level restrictions in multigrid
7+
- [[PR 1308]](https://github.com/parthenon-hpc-lab/parthenon/pull/1308) Add additional indexing options for (b, type, ...indices) meshdata fields. Helpful for lower dimensional Metadata::None fields.
78
- [[PR 1344]](https://github.com/parthenon-hpc-lab/parthenon/pull/1344) Add option to communicate single layer of ghosts, only communicate required two-level composite boundaries
89

910

1011
### Changed (changing behavior/API/variables/...)
1112
- [[PR 1355]](https://github.com/parthenon-hpc-lab/parthenon/pull/1355) Allow disabling format and lint targets
1213

1314
### Fixed (not changing behavior/API/variables/...)
15+
- [[PR 1365]](https://github.com/parthenon-hpc-lab/parthenon/pull/1365) Fix boundary condition being called with coarse=true but no coarse neighbors.
1416
- [[PR 1345]](https://github.com/parthenon-hpc-lab/parthenon/pull/1345) Coalesce dot product reductions and speed up kernel
1517
- [[PR 1360]](https://github.com/parthenon-hpc-lab/parthenon/pull/1360) Fix boundary cache clearing in different MeshData partitions
1618

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ add_library(parthenon
187187
mesh/mesh-gmg.cpp
188188
mesh/mesh.cpp
189189
mesh/mesh.hpp
190+
mesh/mesh_neighbors.hpp
190191
mesh/meshblock.hpp
191192
mesh/meshblock_pack.hpp
192193
mesh/meshblock.cpp

src/bvals/boundary_conditions.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ TaskStatus ApplyBoundaryConditionsOnCoarseOrFine(std::shared_ptr<MeshBlockData<R
4141
Mesh *pmesh = pmb->pmy_mesh;
4242
const int ndim = pmesh->ndim;
4343

44+
// We only need to call the BC on the coarse buffer if one of the neighbors
45+
// is at a coarser level than us.
46+
if (coarse && !pmb->HasCoarserNeighbors()) return TaskStatus::complete;
47+
4448
auto &tree_bnd_func = pmesh->forest.GetTreePtr(pmb->loc.tree())->MeshBndryFnctn;
4549
auto &tree_bnd_func_user =
4650
pmesh->forest.GetTreePtr(pmb->loc.tree())->UserBoundaryFunctions;

src/bvals/bvals.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ void BoundarySwarm::SetupPersistentMPI() {
7070
std::shared_ptr<MeshBlock> pmb = GetBlockPointer();
7171

7272
// Initialize neighbor communications to other ranks
73-
for (int n = 0; n < pmb->neighbors.size(); n++) {
74-
NeighborBlock &nb = pmb->neighbors[n];
73+
const auto &neighbors = pmb->GetNeighbors();
74+
for (int n = 0; n < neighbors.size(); n++) {
75+
const NeighborBlock &nb = neighbors[n];
7576
// Neighbor on different MPI process
7677
if (nb.rank != Globals::my_rank) {
7778
send_tag[nb.bufid] = pmb->pmy_mesh->tag_map.GetTag(pmb.get(), nb);
@@ -93,8 +94,9 @@ void BoundarySwarm::Send(BoundaryCommSubset phase) {
9394
std::shared_ptr<MeshBlock> pmb = GetBlockPointer();
9495
// Fence to make sure buffers are loaded before sending
9596
pmb->exec_space.fence();
96-
for (int n = 0; n < pmb->neighbors.size(); n++) {
97-
NeighborBlock &nb = pmb->neighbors[n];
97+
const auto &neighbors = pmb->GetNeighbors();
98+
for (int n = 0; n < neighbors.size(); n++) {
99+
const NeighborBlock &nb = neighbors[n];
98100
if (nb.rank != Globals::my_rank) {
99101
#ifdef MPI_PARALLEL
100102
PARTHENON_REQUIRE(bd_var_.req_send[nb.bufid] == MPI_REQUEST_NULL,
@@ -131,8 +133,9 @@ void BoundarySwarm::Receive(BoundaryCommSubset phase) {
131133
#ifdef MPI_PARALLEL
132134
std::shared_ptr<MeshBlock> pmb = GetBlockPointer();
133135
const int &mylevel = pmb->loc.level();
134-
for (int n = 0; n < pmb->neighbors.size(); n++) {
135-
NeighborBlock &nb = pmb->neighbors[n];
136+
const auto &neighbors = pmb->GetNeighbors();
137+
for (int n = 0; n < neighbors.size(); n++) {
138+
const NeighborBlock &nb = neighbors[n];
136139
if (nb.rank != Globals::my_rank) {
137140
// Check to see if we got a message
138141
int test;

src/bvals/comms/bnd_info.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -441,13 +441,7 @@ ProResInfo ProResInfo::GetSet(MeshBlock *pmb, const NeighborBlock &nb,
441441
// This will select a superset of the boundaries that actually need to be restricted,
442442
// more logic could be added to only restrict boundary regions that abut boundary
443443
// regions that were filled by coarser neighbors
444-
bool restricted = false;
445-
int mylevel = pmb->loc.level();
446-
if (mylevel > 0) {
447-
for (const auto &nb : pmb->neighbors) {
448-
restricted = restricted || (nb.origin_loc.level() == (mylevel - 1));
449-
}
450-
}
444+
bool restricted = pmb->HasCoarserNeighbors();
451445

452446
for (auto el : v->GetTopologicalElements()) {
453447
out.IncludeTopoEl(el) = true;

src/bvals/comms/boundary_communication.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ TaskStatus ReceiveBoundBufs(std::shared_ptr<MeshData<Real>> &md) {
234234
int ibound = 0;
235235
if (Globals::sparse_config.enabled && all_received) {
236236
ForEachBoundary<bound_type>(
237-
md, [&](auto pmb, sp_mbd_t rc, nb_t &nb, const sp_cv_t v) {
237+
md, [&](auto pmb, sp_mbd_t rc, const nb_t &nb, const sp_cv_t v) {
238238
const std::size_t ibuf = cache.idx_vec[ibound];
239239
auto &buf = *cache.buf_vec[ibuf];
240240

src/bvals/comms/build_boundary_buffers.cpp

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -54,20 +54,23 @@ void BuildBoundaryBufferSubset(std::shared_ptr<MeshData<Real>> &md,
5454
std::unordered_map<std::size_t, std::size_t>
5555
nbufs_allocated; // total that are actually allocated
5656

57-
ForEachBoundary<BTYPE>(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) {
58-
// Calculate the required size of the buffer for this boundary
59-
int buf_size = GetBufferSize(pmb, nb, v);
60-
// LR: Multigrid logic requires blocks sending messages to themselves (since the same
61-
// block can show up on two multigrid levels). This doesn't require any data
62-
// transfer, so the message size can be zero. It is essentially just a flag to show
63-
// that the block is done being used on one level and can be used on the next level.
64-
if (pmb->gid == nb.gid && nb.offsets.IsCell()) buf_size = 0;
65-
66-
nbufs[buf_size] += 1; // relying on value init of int to 0 for initial entry
67-
nbufs_allocated[buf_size] += v->IsAllocated();
68-
});
69-
70-
ForEachBoundary<BTYPE>(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) {
57+
ForEachBoundary<BTYPE>(
58+
md, [&](auto pmb, sp_mbd_t /*rc*/, const nb_t &nb, const sp_cv_t v) {
59+
// Calculate the required size of the buffer for this boundary
60+
int buf_size = GetBufferSize(pmb, nb, v);
61+
// LR: Multigrid logic requires blocks sending messages to themselves (since the
62+
// same block can show up on two multigrid levels). This doesn't require any data
63+
// transfer, so the message size can be zero. It is essentially just a flag to
64+
// show that the block is done being used on one level and can be used on the
65+
// next level.
66+
if (pmb->gid == nb.gid && nb.offsets.IsCell()) buf_size = 0;
67+
68+
nbufs[buf_size] += 1; // relying on value init of int to 0 for initial entry
69+
nbufs_allocated[buf_size] += v->IsAllocated();
70+
});
71+
72+
ForEachBoundary<BTYPE>(md, [&](auto pmb, sp_mbd_t /*rc*/, const nb_t &nb,
73+
const sp_cv_t v) {
7174
// Calculate the required size of the buffer for this boundary
7275
int buf_size = GetBufferSize(pmb, nb, v);
7376
// See comment above on the same logic.
@@ -143,18 +146,19 @@ void BuildBoundaryBufferSubset(std::shared_ptr<MeshData<Real>> &md,
143146
template <BoundaryType BTYPE>
144147
void RegisterCoalescedCommsSubset(std::shared_ptr<MeshData<Real>> &md) {
145148
Mesh *pmesh = md->GetMeshPointer();
146-
ForEachBoundary<BTYPE>(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) {
147-
const int receiver_rank = nb.rank;
148-
const int sender_rank = Globals::my_rank;
149-
if (receiver_rank != sender_rank) {
150-
if constexpr (IsSender(BTYPE)) {
151-
pmesh->pcoalesced_comms->AddSendBuffer(md->partition, pmb, nb, v, BTYPE);
152-
}
153-
if constexpr (IsReceiver(BTYPE)) {
154-
pmesh->pcoalesced_comms->AddRecvBuffer(pmb, nb, v, BTYPE);
155-
}
156-
}
157-
});
149+
ForEachBoundary<BTYPE>(
150+
md, [&](auto pmb, sp_mbd_t /*rc*/, const nb_t &nb, const sp_cv_t v) {
151+
const int receiver_rank = nb.rank;
152+
const int sender_rank = Globals::my_rank;
153+
if (receiver_rank != sender_rank) {
154+
if constexpr (IsSender(BTYPE)) {
155+
pmesh->pcoalesced_comms->AddSendBuffer(md->partition, pmb, nb, v, BTYPE);
156+
}
157+
if constexpr (IsReceiver(BTYPE)) {
158+
pmesh->pcoalesced_comms->AddRecvBuffer(pmb, nb, v, BTYPE);
159+
}
160+
}
161+
});
158162
}
159163

160164
} // namespace

src/bvals/comms/bvals_utils.hpp

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ void InitializeBufferCache(std::shared_ptr<MeshData<Real>> &md, COMM_MAP *comm_m
105105
std::vector<std::tuple<int, int, Mesh::channel_key_t>> key_order;
106106

107107
int boundary_idx = 0;
108-
ForEachBoundary<bound_type>(md, [&](auto pmb, sp_mbd_t rc, nb_t &nb, const sp_cv_t v) {
108+
ForEachBoundary<bound_type>(md, [&](auto pmb, sp_mbd_t rc, const nb_t &nb, const sp_cv_t v) {
109109
auto key = KeyFunc(pmb, nb, v, bound_type, md->GetBoundBufferId(bound_type));
110110
PARTHENON_DEBUG_REQUIRE(comm_map->count(key) > 0,
111111
"Boundary communicator does not exist");
@@ -159,7 +159,8 @@ inline auto CheckSendBufferCacheForRebuild(std::shared_ptr<MeshData<Real>> md) {
159159
bool rebuild = false;
160160
bool other_communication_unfinished = false;
161161
int nbound = 0;
162-
ForEachBoundary<BOUND_TYPE>(md, [&](auto pmb, sp_mbd_t rc, nb_t &nb, const sp_cv_t v) {
162+
ForEachBoundary<BOUND_TYPE>(md, [&](auto pmb, sp_mbd_t rc, const nb_t &nb,
163+
const sp_cv_t v) {
163164
const std::size_t ibuf = cache.idx_vec[nbound];
164165
auto &buf = *(cache.buf_vec[ibuf]);
165166

@@ -191,7 +192,8 @@ inline auto CheckReceiveBufferCacheForRebuild(std::shared_ptr<MeshData<Real>> md
191192
bool rebuild = false;
192193
int nbound = 0;
193194

194-
ForEachBoundary<BOUND_TYPE>(md, [&](auto pmb, sp_mbd_t rc, nb_t &nb, const sp_cv_t v) {
195+
ForEachBoundary<BOUND_TYPE>(md, [&](auto pmb, sp_mbd_t rc, const nb_t &nb,
196+
const sp_cv_t v) {
195197
const std::size_t ibuf = cache.idx_vec[nbound];
196198
auto &buf = *cache.buf_vec[ibuf];
197199
if (ibuf < cache.bnd_info_h.size()) {
@@ -241,19 +243,20 @@ inline void RebuildBufferCache(std::shared_ptr<MeshData<Real>> md, int nbound,
241243
cache.prores_cache.Initialize(nbound, pkg);
242244

243245
int ibound = 0;
244-
ForEachBoundary<BOUND_TYPE>(md, [&](auto pmb, sp_mbd_t rc, nb_t &nb, const sp_cv_t v) {
245-
// bnd_info
246-
const std::size_t ibuf = cache.idx_vec[ibound];
247-
cache.bnd_info_h(ibuf) = BndInfoCreator(pmb, nb, v, cache.buf_vec[ibuf]);
248-
249-
// subsets ordering is same as in cache.bnd_info
250-
// RefinementFunctions_t owns all relevant functionality, so
251-
// only one ParArray2D needed.
252-
cache.prores_cache.RegisterRegionHost(ibuf, ProResInfoCreator(pmb, nb, v), v.get(),
253-
pkg);
254-
255-
++ibound;
256-
});
246+
ForEachBoundary<BOUND_TYPE>(
247+
md, [&](auto pmb, sp_mbd_t rc, const nb_t &nb, const sp_cv_t v) {
248+
// bnd_info
249+
const std::size_t ibuf = cache.idx_vec[ibound];
250+
cache.bnd_info_h(ibuf) = BndInfoCreator(pmb, nb, v, cache.buf_vec[ibuf]);
251+
252+
// subsets ordering is same as in cache.bnd_info
253+
// RefinementFunctions_t owns all relevant functionality, so
254+
// only one ParArray2D needed.
255+
cache.prores_cache.RegisterRegionHost(ibuf, ProResInfoCreator(pmb, nb, v),
256+
v.get(), pkg);
257+
258+
++ibound;
259+
});
257260
Kokkos::deep_copy(cache.bnd_info, cache.bnd_info_h);
258261
cache.prores_cache.CopyToDevice();
259262
}

0 commit comments

Comments
 (0)