Skip to content

Commit fe87fc5

Browse files
anandrdbzAnandsbryngelsonAnand
authored
Resolving bug with multiple ranks using IBM (#990)
Co-authored-by: Anand <[email protected]> Co-authored-by: Spencer Bryngelson <[email protected]> Co-authored-by: Anand <[email protected]>
1 parent 88c0a11 commit fe87fc5

File tree

7 files changed

+100
-66
lines changed

7 files changed

+100
-66
lines changed

.github/workflows/test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ jobs:
5353
run: |
5454
brew update
5555
brew upgrade
56-
brew install coreutils python cmake fftw hdf5 gcc@15 boost open-mpi lapack
56+
brew install coreutils python fftw hdf5 gcc@15 boost open-mpi lapack
5757
echo "FC=gfortran-15" >> $GITHUB_ENV
5858
echo "BOOST_INCLUDE=/opt/homebrew/include/" >> $GITHUB_ENV
5959

src/common/m_helper_basic.fpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,14 +112,15 @@ contains
112112

113113
pure subroutine s_configure_coordinate_bounds(recon_type, weno_polyn, muscl_polyn, &
114114
igr_order, buff_size, idwint, idwbuff, &
115-
viscous, bubbles_lagrange, m, n, p, num_dims, igr)
115+
viscous, bubbles_lagrange, m, n, p, num_dims, igr, ib)
116116

117117
integer, intent(in) :: recon_type, weno_polyn, muscl_polyn
118118
integer, intent(in) :: m, n, p, num_dims, igr_order
119119
integer, intent(inout) :: buff_size
120120
type(int_bounds_info), dimension(3), intent(inout) :: idwint, idwbuff
121121
logical, intent(in) :: viscous, bubbles_lagrange
122122
logical, intent(in) :: igr
123+
logical, intent(in) :: ib
123124

124125
! Determining the number of cells that are needed in order to store
125126
! sufficient boundary conditions data as to iterate the solution in
@@ -142,6 +143,10 @@ contains
142143
buff_size = max(buff_size, 6)
143144
end if
144145

146+
if (ib) then
147+
buff_size = max(buff_size, 10)
148+
end if
149+
145150
! Configuring Coordinate Direction Indexes
146151
idwint(1)%beg = 0; idwint(2)%beg = 0; idwint(3)%beg = 0
147152
idwint(1)%end = m; idwint(2)%end = n; idwint(3)%end = p

src/pre_process/m_global_parameters.fpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -884,7 +884,7 @@ contains
884884
igr_order, buff_size, &
885885
idwint, idwbuff, viscous, &
886886
bubbles_lagrange, m, n, p, &
887-
num_dims, igr)
887+
num_dims, igr, ib)
888888

889889
#ifdef MFC_MPI
890890

src/simulation/m_global_parameters.fpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1254,7 +1254,7 @@ contains
12541254
igr_order, buff_size, &
12551255
idwint, idwbuff, viscous, &
12561256
bubbles_lagrange, m, n, p, &
1257-
num_dims, igr)
1257+
num_dims, igr, ib)
12581258
$:GPU_UPDATE(device='[idwint, idwbuff]')
12591259
12601260
! Configuring Coordinate Direction Indexes

src/simulation/m_ibm.fpp

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -53,19 +53,19 @@ contains
5353
impure subroutine s_initialize_ibm_module()
5454

5555
if (p > 0) then
56-
@:ALLOCATE(ib_markers%sf(-gp_layers:m+gp_layers, &
57-
-gp_layers:n+gp_layers, -gp_layers:p+gp_layers))
58-
@:ALLOCATE(levelset%sf(-gp_layers:m+gp_layers, &
59-
-gp_layers:n+gp_layers, -gp_layers:p+gp_layers, 1:num_ibs))
60-
@:ALLOCATE(levelset_norm%sf(-gp_layers:m+gp_layers, &
61-
-gp_layers:n+gp_layers, -gp_layers:p+gp_layers, 1:num_ibs, 1:3))
56+
@:ALLOCATE(ib_markers%sf(-buff_size:m+buff_size, &
57+
-buff_size:n+buff_size, -buff_size:p+buff_size))
58+
@:ALLOCATE(levelset%sf(-buff_size:m+buff_size, &
59+
-buff_size:n+buff_size, -buff_size:p+buff_size, 1:num_ibs))
60+
@:ALLOCATE(levelset_norm%sf(-buff_size:m+buff_size, &
61+
-buff_size:n+buff_size, -buff_size:p+buff_size, 1:num_ibs, 1:3))
6262
else
63-
@:ALLOCATE(ib_markers%sf(-gp_layers:m+gp_layers, &
64-
-gp_layers:n+gp_layers, 0:0))
65-
@:ALLOCATE(levelset%sf(-gp_layers:m+gp_layers, &
66-
-gp_layers:n+gp_layers, 0:0, 1:num_ibs))
67-
@:ALLOCATE(levelset_norm%sf(-gp_layers:m+gp_layers, &
68-
-gp_layers:n+gp_layers, 0:0, 1:num_ibs, 1:3))
63+
@:ALLOCATE(ib_markers%sf(-buff_size:m+buff_size, &
64+
-buff_size:n+buff_size, 0:0))
65+
@:ALLOCATE(levelset%sf(-buff_size:m+buff_size, &
66+
-buff_size:n+buff_size, 0:0, 1:num_ibs))
67+
@:ALLOCATE(levelset_norm%sf(-buff_size:m+buff_size, &
68+
-buff_size:n+buff_size, 0:0, 1:num_ibs, 1:3))
6969
end if
7070

7171
@:ACC_SETUP_SFs(ib_markers)
@@ -381,13 +381,13 @@ contains
381381
! s_cc points to the dim array we need
382382
if (dim == 1) then
383383
s_cc => x_cc
384-
bound = m
384+
bound = m + buff_size - 1
385385
elseif (dim == 2) then
386386
s_cc => y_cc
387-
bound = n
387+
bound = n + buff_size - 1
388388
else
389389
s_cc => z_cc
390-
bound = p
390+
bound = p + buff_size - 1
391391
end if
392392

393393
if (f_approx_equal(norm(dim), 0._wp)) then
@@ -402,9 +402,12 @@ contains
402402
index = ghost_points_in(q)%loc(dim)
403403
temp_loc = ghost_points_in(q)%ip_loc(dim)
404404
do while ((temp_loc < s_cc(index) &
405-
.or. temp_loc > s_cc(index + 1)) &
406-
.and. (index >= 0 .and. index <= bound))
405+
.or. temp_loc > s_cc(index + 1)))
407406
index = index + dir
407+
if (index < -buff_size .or. index > bound) then
408+
print *, "Increase buff_size further in m_helper_basic (currently set to a minimum of 10)"
409+
error stop "Increase buff_size"
410+
end if
408411
end do
409412
ghost_points_in(q)%ip_grid(dim) = index
410413
if (ghost_points_in(q)%DB(dim) == -1) then

src/simulation/m_mpi_proxy.fpp

Lines changed: 68 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,17 @@ contains
5353
if (ib) then
5454
if (n > 0) then
5555
if (p > 0) then
56-
i_halo_size = -1 + gp_layers* &
57-
& (m + 2*gp_layers + 1)* &
58-
& (n + 2*gp_layers + 1)* &
59-
& (p + 2*gp_layers + 1)/ &
60-
& (cells_bounds%mnp_min + 2*gp_layers + 1)
56+
i_halo_size = -1 + buff_size* &
57+
& (m + 2*buff_size + 1)* &
58+
& (n + 2*buff_size + 1)* &
59+
& (p + 2*buff_size + 1)/ &
60+
& (cells_bounds%mnp_min + 2*buff_size + 1)
6161
else
62-
i_halo_size = -1 + gp_layers* &
63-
& (cells_bounds%mn_max + 2*gp_layers + 1)
62+
i_halo_size = -1 + buff_size* &
63+
& (cells_bounds%mn_max + 2*buff_size + 1)
6464
end if
6565
else
66-
i_halo_size = -1 + gp_layers
66+
i_halo_size = -1 + buff_size
6767
end if
6868
6969
$:GPU_UPDATE(device='[i_halo_size]')
@@ -270,9 +270,9 @@ contains
270270
call nvtxStartRange("IB-MARKER-COMM-PACKBUF")
271271
272272
buffer_counts = (/ &
273-
gp_layers*(n + 1)*(p + 1), &
274-
gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
275-
gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1) &
273+
buff_size*(n + 1)*(p + 1), &
274+
buff_size*(m + 2*buff_size + 1)*(p + 1), &
275+
buff_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
276276
/)
277277
278278
buffer_count = buffer_counts(mpi_dir)
@@ -297,12 +297,12 @@ contains
297297
298298
pack_offset = 0
299299
if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
300-
pack_offset = grid_dims(mpi_dir) - gp_layers + 1
300+
pack_offset = grid_dims(mpi_dir) - buff_size + 1
301301
end if
302302
303303
unpack_offset = 0
304304
if (pbc_loc == 1) then
305-
unpack_offset = grid_dims(mpi_dir) + gp_layers + 1
305+
unpack_offset = grid_dims(mpi_dir) + buff_size + 1
306306
end if
307307
308308
! Pack Buffer to Send
@@ -312,30 +312,30 @@ contains
312312
$:GPU_PARALLEL_LOOP(collapse=3,private='[r]')
313313
do l = 0, p
314314
do k = 0, n
315-
do j = 0, gp_layers - 1
316-
r = (j + gp_layers*(k + (n + 1)*l))
315+
do j = 0, buff_size - 1
316+
r = (j + buff_size*(k + (n + 1)*l))
317317
ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l)
318318
end do
319319
end do
320320
end do
321321
#:elif mpi_dir == 2
322322
$:GPU_PARALLEL_LOOP(collapse=3,private='[r]')
323323
do l = 0, p
324-
do k = 0, gp_layers - 1
325-
do j = -gp_layers, m + gp_layers
326-
r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
327-
(k + gp_layers*l))
324+
do k = 0, buff_size - 1
325+
do j = -buff_size, m + buff_size
326+
r = ((j + buff_size) + (m + 2*buff_size + 1)* &
327+
(k + buff_size*l))
328328
ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l)
329329
end do
330330
end do
331331
end do
332332
#:else
333333
$:GPU_PARALLEL_LOOP(collapse=3,private='[r]')
334-
do l = 0, gp_layers - 1
335-
do k = -gp_layers, n + gp_layers
336-
do j = -gp_layers, m + gp_layers
337-
r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
338-
((k + gp_layers) + (n + 2*gp_layers + 1)*l))
334+
do l = 0, buff_size - 1
335+
do k = -buff_size, n + buff_size
336+
do j = -buff_size, m + buff_size
337+
r = ((j + buff_size) + (m + 2*buff_size + 1)* &
338+
((k + buff_size) + (n + 2*buff_size + 1)*l))
339339
ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset)
340340
end do
341341
end do
@@ -345,12 +345,38 @@ contains
345345
#:endfor
346346
call nvtxEndRange ! Packbuf
347347
348-
call nvtxStartRange("IB-MARKER-SENDRECV")
349-
call MPI_SENDRECV( &
350-
ib_buff_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, &
351-
ib_buff_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, &
352-
MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
353-
call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
348+
#:for rdma_mpi in [False, True]
349+
if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
350+
#:if rdma_mpi
351+
#:call GPU_HOST_DATA(use_device='[ib_buff_send, ib_buff_recv]')
352+
353+
call nvtxStartRange("IB-MARKER-SENDRECV-RDMA")
354+
call MPI_SENDRECV( &
355+
ib_buff_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, &
356+
ib_buff_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, &
357+
MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
358+
call nvtxEndRange
359+
360+
#:endcall GPU_HOST_DATA
361+
$:GPU_WAIT()
362+
#:else
363+
call nvtxStartRange("IB-MARKER-DEV2HOST")
364+
$:GPU_UPDATE(host='[ib_buff_send]')
365+
call nvtxEndRange
366+
367+
call nvtxStartRange("IB-MARKER-SENDRECV-NO-RMDA")
368+
call MPI_SENDRECV( &
369+
ib_buff_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, &
370+
ib_buff_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, &
371+
MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
372+
call nvtxEndRange
373+
374+
call nvtxStartRange("IB-MARKER-HOST2DEV")
375+
$:GPU_UPDATE(device='[ib_buff_recv]')
376+
call nvtxEndRange
377+
#:endif
378+
end if
379+
#:endfor
354380
355381
! Unpack Received Buffer
356382
call nvtxStartRange("IB-MARKER-COMM-UNPACKBUF")
@@ -360,32 +386,32 @@ contains
360386
$:GPU_PARALLEL_LOOP(collapse=3,private='[r]')
361387
do l = 0, p
362388
do k = 0, n
363-
do j = -gp_layers, -1
364-
r = (j + gp_layers*((k + 1) + (n + 1)*l))
389+
do j = -buff_size, -1
390+
r = (j + buff_size*((k + 1) + (n + 1)*l))
365391
ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r)
366392
end do
367393
end do
368394
end do
369395
#:elif mpi_dir == 2
370396
$:GPU_PARALLEL_LOOP(collapse=3,private='[r]')
371397
do l = 0, p
372-
do k = -gp_layers, -1
373-
do j = -gp_layers, m + gp_layers
374-
r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
375-
((k + gp_layers) + gp_layers*l))
398+
do k = -buff_size, -1
399+
do j = -buff_size, m + buff_size
400+
r = ((j + buff_size) + (m + 2*buff_size + 1)* &
401+
((k + buff_size) + buff_size*l))
376402
ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r)
377403
end do
378404
end do
379405
end do
380406
#:else
381407
! Unpacking buffer from bc_z%beg
382408
$:GPU_PARALLEL_LOOP(collapse=3,private='[r]')
383-
do l = -gp_layers, -1
384-
do k = -gp_layers, n + gp_layers
385-
do j = -gp_layers, m + gp_layers
386-
r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
387-
((k + gp_layers) + (n + 2*gp_layers + 1)* &
388-
(l + gp_layers)))
409+
do l = -buff_size, -1
410+
do k = -buff_size, n + buff_size
411+
do j = -buff_size, m + buff_size
412+
r = ((j + buff_size) + (m + 2*buff_size + 1)* &
413+
((k + buff_size) + (n + 2*buff_size + 1)* &
414+
(l + buff_size)))
389415
ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r)
390416
end do
391417
end do

src/simulation/m_start_up.fpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1366,14 +1366,14 @@ contains
13661366
call s_read_data_files(q_cons_ts(1)%vf)
13671367
end if
13681368

1369+
! Populating the buffers of the grid variables using the boundary conditions
1370+
call s_populate_grid_variables_buffers()
1371+
13691372
if (model_eqns == 3) call s_initialize_internal_energy_equations(q_cons_ts(1)%vf)
13701373
if (ib) call s_ibm_setup()
13711374
if (bodyForces) call s_initialize_body_forces_module()
13721375
if (acoustic_source) call s_precalculate_acoustic_spatial_sources()
13731376

1374-
! Populating the buffers of the grid variables using the boundary conditions
1375-
call s_populate_grid_variables_buffers()
1376-
13771377
! Initialize the Temperature cache.
13781378
if (chemistry) call s_compute_q_T_sf(q_T_sf, q_cons_ts(1)%vf, idwint)
13791379

0 commit comments

Comments
 (0)