Skip to content

Commit 21a5a8d

Browse files
amruedaranocha
andauthored
MPI implementation for non-conservative equations using ParallelP4estMesh{3} (#2126)
* Added first working version of MPI with non-conservative systems for CONFORMING meshes (analysis routines need work) * Added a first version of calc_mpi_mortar_flux! for non-conservative equations (not working yet) * Added non-unique mortar fluxes to be able to handle non-conservative equations * format * Updated p4est 3d tests * Updated parabolic and parallel p4est 3d implementations for new naming convention * format * Adapted MPI mortars to new mortar fluxes fstar_primary and fstar_secondary * Fixed problem with parabolic non-conforming discretization * One solution for the MHD analysis problem with MPI * Added parallel MHD test * Selected the right type for inizialization of integral in integrate_via_indices * Added minimal test to reproduce issues with nranks > ntrees * Fixed GLM speed callback for parallel meshes * Fixed the computation of :linf_divb for parallel meshes and improved reduction operations for GLM speed * Added non-conforming non-conservative parallel test with 1 tree to test nranks > ntrees * Removed unused MHD conforming test * Apply suggestions from code review Change the occurrences of `Float64` literals to `Float32`-compatible literals * Format * Apply suggestions from code review Co-authored-by: Hendrik Ranocha <[email protected]> --------- Co-authored-by: Hendrik Ranocha <[email protected]>
1 parent db83c71 commit 21a5a8d

File tree

5 files changed

+188
-19
lines changed

5 files changed

+188
-19
lines changed

src/callbacks_step/analysis_dg3d.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,11 @@ function analyze(::Val{:linf_divb}, du, u, t,
376376
end
377377
end
378378

379+
if mpi_isparallel()
380+
# Base.max instead of max needed, see comment in src/auxiliary/math.jl
381+
linf_divb = MPI.Allreduce!(Ref(linf_divb), Base.max, mpi_comm())[]
382+
end
383+
379384
return linf_divb
380385
end
381386

@@ -412,6 +417,11 @@ function analyze(::Val{:linf_divb}, du, u, t,
412417
end
413418
end
414419

420+
if mpi_isparallel()
421+
# Base.max instead of max needed, see comment in src/auxiliary/math.jl
422+
linf_divb = MPI.Allreduce!(Ref(linf_divb), Base.max, mpi_comm())[]
423+
end
424+
415425
return linf_divb
416426
end
417427
end # @muladd

src/callbacks_step/analysis_dg3d_parallel.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,12 @@ function integrate_via_indices(func::Func, u,
7272
@unpack weights = dg.basis
7373

7474
# Initialize integral with zeros of the right shape
75-
# Pass `zero(SVector{nvariables(equations), eltype(u))}` to `func` since `u` might be empty, if the
76-
# current rank has no elements, see also https://github.com/trixi-framework/Trixi.jl/issues/1096.
77-
integral = zero(func(zero(SVector{nvariables(equations), eltype(u)}), 1, 1, 1, 1,
78-
equations, dg, args...))
75+
# Pass `zeros(eltype(u), nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg), 1)`
76+
# to `func` since `u` might be empty, if the current rank has no elements.
77+
# See also https://github.com/trixi-framework/Trixi.jl/issues/1096, and
78+
# https://github.com/trixi-framework/Trixi.jl/pull/2126/files/7cbc57cfcba93e67353566e10fce1f3edac27330#r1814483243.
79+
integral = zero(func(zeros(eltype(u), nvariables(equations), nnodes(dg), nnodes(dg),
80+
nnodes(dg), 1), 1, 1, 1, 1, equations, dg, args...))
7981
volume = zero(real(mesh))
8082

8183
# Use quadrature to numerically integrate over entire domain

src/callbacks_step/glm_speed_dg.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,14 @@ function calc_dt_for_cleaning_speed(cfl::Real, mesh,
1313
# Cartesian meshes
1414
max_scaled_speed_for_c_h = maximum(cache.elements.inverse_jacobian) *
1515
ndims(equations)
16+
17+
if mpi_isparallel()
18+
# Base.max instead of max needed, see comment in src/auxiliary/math.jl
19+
max_scaled_speed_for_c_h = MPI.Allreduce!(Ref(max_scaled_speed_for_c_h),
20+
Base.max,
21+
mpi_comm())[]
22+
end
23+
1624
# OBS! This depends on the implementation details of the StepsizeCallback and needs to be adapted
1725
# as well if that callback changes.
1826
return cfl * 2 / (nnodes(dg) * max_scaled_speed_for_c_h)
@@ -29,6 +37,12 @@ function calc_dt_for_cleaning_speed(cfl::Real, mesh,
2937
# Copies implementation behavior of `calc_dt_for_cleaning_speed` for DGSEM discretizations.
3038
max_scaled_speed_for_c_h = inv(minimum(md.J)) * ndims(equations)
3139

40+
if mpi_isparallel()
41+
# Base.max instead of max needed, see comment in src/auxiliary/math.jl
42+
max_scaled_speed_for_c_h = MPI.Allreduce!(Ref(max_scaled_speed_for_c_h),
43+
Base.max,
44+
mpi_comm())[]
45+
end
3246
# This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by
3347
# `polydeg+1`. This is because `nnodes(dg)` returns the total number of
3448
# multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns

src/solvers/dgsem_p4est/dg_3d_parallel.jl

Lines changed: 84 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,40 @@ end
267267
end
268268
end
269269

270+
# Inlined version of the interface flux computation for non-conservative equations
271+
@inline function calc_mpi_interface_flux!(surface_flux_values,
272+
mesh::Union{ParallelP4estMesh{3},
273+
ParallelT8codeMesh{3}},
274+
nonconservative_terms::True, equations,
275+
surface_integral, dg::DG, cache,
276+
interface_index, normal_direction,
277+
interface_i_node_index,
278+
interface_j_node_index, local_side,
279+
surface_i_node_index, surface_j_node_index,
280+
local_direction_index, local_element_index)
281+
@unpack u = cache.mpi_interfaces
282+
surface_flux, nonconservative_flux = surface_integral.surface_flux
283+
284+
u_ll, u_rr = get_surface_node_vars(u, equations, dg,
285+
interface_i_node_index, interface_j_node_index,
286+
interface_index)
287+
288+
# Compute flux and non-conservative term for this side of the interface
289+
if local_side == 1
290+
flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
291+
noncons_flux_ = nonconservative_flux(u_ll, u_rr, normal_direction, equations)
292+
else # local_side == 2
293+
flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations)
294+
noncons_flux_ = -nonconservative_flux(u_rr, u_ll, -normal_direction, equations)
295+
end
296+
297+
for v in eachvariable(equations)
298+
surface_flux_values[v, surface_i_node_index, surface_j_node_index,
299+
local_direction_index, local_element_index] = flux_[v] +
300+
0.5f0 * noncons_flux_[v]
301+
end
302+
end
303+
270304
function prolong2mpimortars!(cache, u,
271305
mesh::Union{ParallelP4estMesh{3}, ParallelT8codeMesh{3}},
272306
equations,
@@ -384,12 +418,13 @@ function calc_mpi_mortar_flux!(surface_flux_values,
384418
surface_integral, dg::DG, cache)
385419
@unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
386420
@unpack contravariant_vectors = cache.elements
387-
@unpack fstar_primary_threaded, fstar_tmp_threaded = cache
421+
@unpack fstar_primary_threaded, fstar_secondary_threaded, fstar_tmp_threaded = cache
388422
index_range = eachnode(dg)
389423

390424
@threaded for mortar in eachmpimortar(dg, cache)
391425
# Choose thread-specific pre-allocated container
392-
fstar = fstar_primary_threaded[Threads.threadid()]
426+
fstar_primary = fstar_primary_threaded[Threads.threadid()]
427+
fstar_secondary = fstar_secondary_threaded[Threads.threadid()]
393428
fstar_tmp = fstar_tmp_threaded[Threads.threadid()]
394429

395430
# Get index information on the small elements
@@ -412,7 +447,8 @@ function calc_mpi_mortar_flux!(surface_flux_values,
412447
normal_direction = get_normal_direction(cache.mpi_mortars, i, j,
413448
position, mortar)
414449

415-
calc_mpi_mortar_flux!(fstar, mesh, nonconservative_terms, equations,
450+
calc_mpi_mortar_flux!(fstar_primary, fstar_secondary, mesh,
451+
nonconservative_terms, equations,
416452
surface_integral, dg, cache,
417453
mortar, position, normal_direction,
418454
i, j)
@@ -433,14 +469,15 @@ function calc_mpi_mortar_flux!(surface_flux_values,
433469

434470
mpi_mortar_fluxes_to_elements!(surface_flux_values,
435471
mesh, equations, mortar_l2, dg, cache,
436-
mortar, fstar, u_buffer, fstar_tmp)
472+
mortar, fstar_primary, fstar_secondary, u_buffer,
473+
fstar_tmp)
437474
end
438475

439476
return nothing
440477
end
441478

442479
# Inlined version of the mortar flux computation on small elements for conservation laws
443-
@inline function calc_mpi_mortar_flux!(fstar,
480+
@inline function calc_mpi_mortar_flux!(fstar_primary, fstar_secondary,
444481
mesh::Union{ParallelP4estMesh{3},
445482
ParallelT8codeMesh{3}},
446483
nonconservative_terms::False, equations,
@@ -456,16 +493,48 @@ end
456493
flux = surface_flux(u_ll, u_rr, normal_direction, equations)
457494

458495
# Copy flux to buffer
459-
set_node_vars!(fstar, flux, equations, dg, i_node_index, j_node_index,
496+
set_node_vars!(fstar_primary, flux, equations, dg, i_node_index, j_node_index,
497+
position_index)
498+
set_node_vars!(fstar_secondary, flux, equations, dg, i_node_index, j_node_index,
460499
position_index)
461500
end
462501

502+
# Inlined version of the mortar flux computation on small elements for non-conservative equations
503+
@inline function calc_mpi_mortar_flux!(fstar_primary, fstar_secondary,
504+
mesh::Union{ParallelP4estMesh{3},
505+
ParallelT8codeMesh{3}},
506+
nonconservative_terms::True, equations,
507+
surface_integral, dg::DG, cache,
508+
mortar_index, position_index, normal_direction,
509+
i_node_index, j_node_index)
510+
@unpack u = cache.mpi_mortars
511+
surface_flux, nonconservative_flux = surface_integral.surface_flux
512+
513+
u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index,
514+
j_node_index, mortar_index)
515+
516+
flux = surface_flux(u_ll, u_rr, normal_direction, equations)
517+
noncons_flux_primary = nonconservative_flux(u_ll, u_rr, normal_direction, equations)
518+
noncons_flux_secondary = nonconservative_flux(u_rr, u_ll, normal_direction,
519+
equations)
520+
521+
for v in eachvariable(equations)
522+
fstar_primary[v, i_node_index, j_node_index, position_index] = flux[v] +
523+
0.5f0 *
524+
noncons_flux_primary[v]
525+
fstar_secondary[v, i_node_index, j_node_index, position_index] = flux[v] +
526+
0.5f0 *
527+
noncons_flux_secondary[v]
528+
end
529+
end
530+
463531
@inline function mpi_mortar_fluxes_to_elements!(surface_flux_values,
464532
mesh::Union{ParallelP4estMesh{3},
465533
ParallelT8codeMesh{3}},
466534
equations,
467535
mortar_l2::LobattoLegendreMortarL2,
468-
dg::DGSEM, cache, mortar, fstar,
536+
dg::DGSEM, cache, mortar, fstar_primary,
537+
fstar_secondary,
469538
u_buffer, fstar_tmp)
470539
@unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
471540
index_range = eachnode(dg)
@@ -487,22 +556,22 @@ end
487556
# Project small fluxes to large element.
488557
multiply_dimensionwise!(u_buffer,
489558
mortar_l2.reverse_lower, mortar_l2.reverse_lower,
490-
view(fstar, .., 1),
559+
view(fstar_secondary, .., 1),
491560
fstar_tmp)
492561
add_multiply_dimensionwise!(u_buffer,
493562
mortar_l2.reverse_upper,
494563
mortar_l2.reverse_lower,
495-
view(fstar, .., 2),
564+
view(fstar_secondary, .., 2),
496565
fstar_tmp)
497566
add_multiply_dimensionwise!(u_buffer,
498567
mortar_l2.reverse_lower,
499568
mortar_l2.reverse_upper,
500-
view(fstar, .., 3),
569+
view(fstar_secondary, .., 3),
501570
fstar_tmp)
502571
add_multiply_dimensionwise!(u_buffer,
503572
mortar_l2.reverse_upper,
504573
mortar_l2.reverse_upper,
505-
view(fstar, .., 4),
574+
view(fstar_secondary, .., 4),
506575
fstar_tmp)
507576
# The flux is calculated in the outward direction of the small elements,
508577
# so the sign must be switched to get the flux in outward direction
@@ -536,10 +605,10 @@ end
536605
for j in eachnode(dg)
537606
for i in eachnode(dg)
538607
for v in eachvariable(equations)
539-
surface_flux_values[v, i, j, small_direction, element] = fstar[v,
540-
i,
541-
j,
542-
position]
608+
surface_flux_values[v, i, j, small_direction, element] = fstar_primary[v,
609+
i,
610+
j,
611+
position]
543612
end
544613
end
545614
end

test/test_mpi_p4est_3d.jl

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,80 @@ const EXAMPLES_DIR = pkgdir(Trixi, "examples", "p4est_3d_dgsem")
235235
@test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
236236
end
237237
end
238+
239+
@trixi_testset "elixir_mhd_alfven_wave_nonconforming.jl" begin
240+
@test_trixi_include(joinpath(EXAMPLES_DIR,
241+
"elixir_mhd_alfven_wave_nonconforming.jl"),
242+
l2=[
243+
0.0001788543743594658,
244+
0.000624334205581902,
245+
0.00022892869974368887,
246+
0.0007223464581156573,
247+
0.0006651366626523314,
248+
0.0006287275014743352,
249+
0.000344484339916008,
250+
0.0007179788287557142,
251+
8.632896980651243e-7
252+
],
253+
linf=[
254+
0.0010730565632763867,
255+
0.004596749809344033,
256+
0.0013235269262853733,
257+
0.00468874234888117,
258+
0.004719267084104306,
259+
0.004228339352211896,
260+
0.0037503625505571625,
261+
0.005104176909383168,
262+
9.738081186490818e-6
263+
],
264+
tspan=(0.0, 0.25),
265+
coverage_override=(trees_per_dimension = (1, 1, 1),))
266+
# Ensure that we do not have excessive memory allocations
267+
# (e.g., from type instabilities)
268+
let
269+
t = sol.t[end]
270+
u_ode = sol.u[end]
271+
du_ode = similar(u_ode)
272+
@test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
273+
end
274+
end
275+
276+
# Same test as above but with only one tree in the mesh
277+
# We use it to test meshes with elements of different size in each partition
278+
@test_trixi_include(joinpath(EXAMPLES_DIR,
279+
"elixir_mhd_alfven_wave_nonconforming.jl"),
280+
l2=[
281+
0.0019054118500017054,
282+
0.006957977226608083,
283+
0.003429930594167365,
284+
0.009051598556176287,
285+
0.0077261662742688425,
286+
0.008210851821439208,
287+
0.003763030674412298,
288+
0.009175470744760567,
289+
2.881690753923244e-5
290+
],
291+
linf=[
292+
0.010983704624623503,
293+
0.04584128974425262,
294+
0.02022630484954286,
295+
0.04851342295826149,
296+
0.040710154751363525,
297+
0.044722299260292586,
298+
0.036591209423654236,
299+
0.05701669133068068,
300+
0.00024182906501186622
301+
],
302+
tspan=(0.0, 0.25), trees_per_dimension=(1, 1, 1),
303+
coverage_override=(trees_per_dimension = (1, 1, 1),))
304+
# Ensure that we do not have excessive memory allocations
305+
# (e.g., from type instabilities)
306+
let
307+
t = sol.t[end]
308+
u_ode = sol.u[end]
309+
du_ode = similar(u_ode)
310+
@test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
311+
end
238312
end
239313
end # P4estMesh MPI
240314

0 commit comments

Comments
 (0)