From 658f8f89e82c2d8176e021b8d62bac106e145b10 Mon Sep 17 00:00:00 2001
From: bennibolm <benjamin.bolm@gmx.de>
Date: Wed, 17 Sep 2025 15:50:06 +0200
Subject: [PATCH 01/18] Add first 3d support for subcell limiting

---
 .../elixir_euler_sedov_sc_subcell.jl          | 111 +++
 src/callbacks_stage/subcell_bounds_check.jl   |   1 +
 .../subcell_bounds_check_2d.jl                |   2 +-
 .../subcell_bounds_check_3d.jl                | 109 +++
 .../subcell_limiter_idp_correction.jl         |   1 +
 .../subcell_limiter_idp_correction_3d.jl      |  55 ++
 src/equations/compressible_euler_3d.jl        |  64 +-
 src/solvers/dgsem_p4est/dg.jl                 |   1 +
 .../dgsem_p4est/subcell_limiters_3d.jl        | 308 ++++++++
 src/solvers/dgsem_structured/dg.jl            |   1 +
 .../dg_3d_subcell_limiters.jl                 | 668 ++++++++++++++++++
 src/solvers/dgsem_tree/containers_3d.jl       | 217 +++++-
 src/solvers/dgsem_tree/dg.jl                  |   2 +
 .../dgsem_tree/dg_2d_subcell_limiters.jl      |   3 +-
 .../dgsem_tree/dg_3d_subcell_limiters.jl      | 224 ++++++
 src/solvers/dgsem_tree/subcell_limiters_2d.jl |  38 +-
 src/solvers/dgsem_tree/subcell_limiters_3d.jl | 586 +++++++++++++++
 test/test_p4est_3d.jl                         |  65 ++
 18 files changed, 2437 insertions(+), 19 deletions(-)
 create mode 100644 examples/p4est_3d_dgsem/elixir_euler_sedov_sc_subcell.jl
 create mode 100644 src/callbacks_stage/subcell_bounds_check_3d.jl
 create mode 100644 src/callbacks_stage/subcell_limiter_idp_correction_3d.jl
 create mode 100644 src/solvers/dgsem_p4est/subcell_limiters_3d.jl
 create mode 100644 src/solvers/dgsem_structured/dg_3d_subcell_limiters.jl
 create mode 100644 src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl
 create mode 100644 src/solvers/dgsem_tree/subcell_limiters_3d.jl

diff --git a/examples/p4est_3d_dgsem/elixir_euler_sedov_sc_subcell.jl b/examples/p4est_3d_dgsem/elixir_euler_sedov_sc_subcell.jl
new file mode 100644
index 00000000000..aba510353cd
--- /dev/null
+++ b/examples/p4est_3d_dgsem/elixir_euler_sedov_sc_subcell.jl
@@ -0,0 +1,111 @@
+using OrdinaryDiffEqLowStorageRK
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible Euler equations
+
+equations = CompressibleEulerEquations3D(1.4)
+
+"""
+    initial_condition_medium_sedov_blast_wave(x, t, equations::CompressibleEulerEquations3D)
+
+The Sedov blast wave setup based on Flash
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
+with smaller strength of the initial discontinuity.
+"""
+function initial_condition_sedov_blast_wave(x, t,
+                                            equations::CompressibleEulerEquations3D)
+    # Set up polar coordinates
+    inicenter = SVector(0.0, 0.0, 0.0)
+    x_norm = x[1] - inicenter[1]
+    y_norm = x[2] - inicenter[2]
+    z_norm = x[3] - inicenter[3]
+    r = sqrt(x_norm^2 + y_norm^2 + z_norm^2)
+
+    # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
+    r0 = 0.21875 # = 3.5 * smallest dx (for domain length=4 and max-ref=6)
+    E = 1.0
+    p0_inner = 3 * (equations.gamma - 1) * E / (4 * pi * r0^2)
+    p0_outer = 1.0e-5
+
+    # Calculate primitive variables
+    rho = 1.0
+    v1 = 0.0
+    v2 = 0.0
+    v3 = 0.0
+    p = r > r0 ? p0_outer : p0_inner
+
+    return prim2cons(SVector(rho, v1, v2, v3, p), equations)
+end
+
+initial_condition = initial_condition_sedov_blast_wave
+
+# Up to version 0.13.0, `max_abs_speed_naive` was used as the default wave speed estimate of
+# `const flux_lax_friedrichs = FluxLaxFriedrichs(), i.e., `FluxLaxFriedrichs(max_abs_speed = max_abs_speed_naive)`.
+# In the `StepsizeCallback`, though, the less diffusive `max_abs_speeds` is employed which is consistent with `max_abs_speed`.
+# Thus, we exchanged in PR#2458 the default wave speed used in the LLF flux to `max_abs_speed`.
+# To ensure that every example still runs we specify explicitly `FluxLaxFriedrichs(max_abs_speed_naive)`.
+# We remark, however, that the now default `max_abs_speed` is in general recommended due to compliance with the
+# `StepsizeCallback` (CFL-Condition) and less diffusion.
+surface_flux = FluxLaxFriedrichs(max_abs_speed_naive)
+volume_flux = flux_ranocha
+polydeg = 3
+basis = LobattoLegendreBasis(polydeg)
+limiter_idp = SubcellLimiterIDP(equations, basis;
+                                positivity_variables_cons = ["rho"],
+                                positivity_variables_nonlinear = [pressure],
+                                local_twosided_variables_cons = [],
+                                local_onesided_variables_nonlinear = [],
+                                max_iterations_newton = 40, # Default parameters are not sufficient to fulfill bounds properly.
+                                newton_tolerances = (1.0e-14, 1.0e-15))
+volume_integral = VolumeIntegralSubcellLimiting(limiter_idp;
+                                                volume_flux_dg = volume_flux,
+                                                volume_flux_fv = surface_flux)
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+coordinates_min = (-1.0, -1.0, -1.0)
+coordinates_max = (1.0, 1.0, 1.0)
+
+trees_per_dimension = (8, 8, 8)
+mesh = P4estMesh(trees_per_dimension,
+                 polydeg = 1, initial_refinement_level = 0,
+                 coordinates_min = coordinates_min, coordinates_max = coordinates_max,
+                 periodicity = true)
+
+# create the semi discretization object
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 3.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+save_solution = SaveSolutionCallback(interval = 10,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     extra_node_variables = (:limiting_coefficient,))
+
+stepsize_callback = StepsizeCallback(cfl = 0.5)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback,
+                        save_solution,
+                        stepsize_callback)
+
+###############################################################################
+# run the simulation
+
+stage_callbacks = (SubcellLimiterIDPCorrection(), BoundsCheckCallback())
+
+sol = Trixi.solve(ode, Trixi.SimpleSSPRK33(stage_callbacks = stage_callbacks);
+                  dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  ode_default_options()..., callback = callbacks);
diff --git a/src/callbacks_stage/subcell_bounds_check.jl b/src/callbacks_stage/subcell_bounds_check.jl
index 15231b25922..8fd550cb45d 100644
--- a/src/callbacks_stage/subcell_bounds_check.jl
+++ b/src/callbacks_stage/subcell_bounds_check.jl
@@ -214,4 +214,5 @@ end
 end
 
 include("subcell_bounds_check_2d.jl")
+include("subcell_bounds_check_3d.jl")
 end # @muladd
diff --git a/src/callbacks_stage/subcell_bounds_check_2d.jl b/src/callbacks_stage/subcell_bounds_check_2d.jl
index 8ac4c74b518..6085673959d 100644
--- a/src/callbacks_stage/subcell_bounds_check_2d.jl
+++ b/src/callbacks_stage/subcell_bounds_check_2d.jl
@@ -5,7 +5,7 @@
 @muladd begin
 #! format: noindent
 
-@inline function check_bounds(u, equations::AbstractEquations{2}, # only works for 2D
+@inline function check_bounds(u, equations::AbstractEquations{2},
                               solver, cache, limiter::SubcellLimiterIDP)
     (; local_twosided, positivity, local_onesided) = solver.volume_integral.limiter
     (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
diff --git a/src/callbacks_stage/subcell_bounds_check_3d.jl b/src/callbacks_stage/subcell_bounds_check_3d.jl
new file mode 100644
index 00000000000..b842aef2df9
--- /dev/null
+++ b/src/callbacks_stage/subcell_bounds_check_3d.jl
@@ -0,0 +1,109 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+@inline function check_bounds(u, equations::AbstractEquations{3},
+                              solver, cache, limiter::SubcellLimiterIDP)
+    (; local_twosided, positivity, local_onesided) = solver.volume_integral.limiter
+    (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
+    (; idp_bounds_delta_local, idp_bounds_delta_global) = limiter.cache
+
+    # Note: In order to get the maximum deviation from the target bounds, this bounds check
+    # requires a reduction in every RK stage and for every enabled limiting option. To make
+    # this Thread-parallel we are using Polyester.jl's (at least v0.7.10) `@batch reduction`
+    # functionality.
+    # Although `@threaded` and `@batch` are currently used equivalently in Trixi.jl, we use
+    # `@batch` here to allow a possible redefinition of `@threaded` without creating errors here.
+    # See also https://github.com/trixi-framework/Trixi.jl/pull/1888#discussion_r1537785293.
+
+    if local_twosided
+        for v in limiter.local_twosided_variables_cons
+            v_string = string(v)
+            key_min = Symbol(v_string, "_min")
+            key_max = Symbol(v_string, "_max")
+            deviation_min = idp_bounds_delta_local[key_min]
+            deviation_max = idp_bounds_delta_local[key_max]
+            @batch reduction=((max, deviation_min), (max, deviation_max)) for element in eachelement(solver,
+                                                                                                     cache)
+                for k in eachnode(solver), j in eachnode(solver), i in eachnode(solver)
+                    var = u[v, i, j, k, element]
+                    # Note: We always save the absolute deviations >= 0 and therefore use the
+                    # `max` operator for the lower and upper bound. The different directions of
+                    # upper and lower bound are considered in their calculations with a
+                    # different sign.
+                    deviation_min = max(deviation_min,
+                                        variable_bounds[key_min][i, j, k, element] -
+                                        var)
+                    deviation_max = max(deviation_max,
+                                        var -
+                                        variable_bounds[key_max][i, j, k, element])
+                end
+            end
+            idp_bounds_delta_local[key_min] = deviation_min
+            idp_bounds_delta_local[key_max] = deviation_max
+        end
+    end
+    if local_onesided
+        for (variable, min_or_max) in limiter.local_onesided_variables_nonlinear
+            key = Symbol(string(variable), "_", string(min_or_max))
+            deviation = idp_bounds_delta_local[key]
+            sign_ = min_or_max(1.0, -1.0)
+            @batch reduction=(max, deviation) for element in eachelement(solver, cache)
+                for k in eachnode(solver), j in eachnode(solver), i in eachnode(solver)
+                    v = variable(get_node_vars(u, equations, solver, i, j, k, element),
+                                 equations)
+                    # Note: We always save the absolute deviations >= 0 and therefore use the
+                    # `max` operator for lower and upper bounds. The different directions of
+                    # upper and lower bounds are considered with `sign_`.
+                    deviation = max(deviation,
+                                    sign_ *
+                                    (v - variable_bounds[key][i, j, k, element]))
+                end
+            end
+            idp_bounds_delta_local[key] = deviation
+        end
+    end
+    if positivity
+        for v in limiter.positivity_variables_cons
+            if v in limiter.local_twosided_variables_cons
+                continue
+            end
+            key = Symbol(string(v), "_min")
+            deviation = idp_bounds_delta_local[key]
+            @batch reduction=(max, deviation) for element in eachelement(solver, cache)
+                for k in eachnode(solver), j in eachnode(solver), i in eachnode(solver)
+                    var = u[v, i, j, k, element]
+                    deviation = max(deviation,
+                                    variable_bounds[key][i, j, k, element] - var)
+                end
+            end
+            idp_bounds_delta_local[key] = deviation
+        end
+        for variable in limiter.positivity_variables_nonlinear
+            key = Symbol(string(variable), "_min")
+            deviation = idp_bounds_delta_local[key]
+            @batch reduction=(max, deviation) for element in eachelement(solver, cache)
+                for k in eachnode(solver), j in eachnode(solver), i in eachnode(solver)
+                    var = variable(get_node_vars(u, equations, solver, i, j, k,
+                                                 element),
+                                   equations)
+                    deviation = max(deviation,
+                                    variable_bounds[key][i, j, k, element] - var)
+                end
+            end
+            idp_bounds_delta_local[key] = deviation
+        end
+    end
+
+    for (key, _) in idp_bounds_delta_local
+        # Update global maximum deviations
+        idp_bounds_delta_global[key] = max(idp_bounds_delta_global[key],
+                                           idp_bounds_delta_local[key])
+    end
+
+    return nothing
+end
+end # @muladd
diff --git a/src/callbacks_stage/subcell_limiter_idp_correction.jl b/src/callbacks_stage/subcell_limiter_idp_correction.jl
index e1cb42035d1..69e4fc62d4e 100644
--- a/src/callbacks_stage/subcell_limiter_idp_correction.jl
+++ b/src/callbacks_stage/subcell_limiter_idp_correction.jl
@@ -64,4 +64,5 @@ init_callback(limiter!::SubcellLimiterIDPCorrection, semi) = nothing
 finalize_callback(limiter!::SubcellLimiterIDPCorrection, semi) = nothing
 
 include("subcell_limiter_idp_correction_2d.jl")
+include("subcell_limiter_idp_correction_3d.jl")
 end # @muladd
diff --git a/src/callbacks_stage/subcell_limiter_idp_correction_3d.jl b/src/callbacks_stage/subcell_limiter_idp_correction_3d.jl
new file mode 100644
index 00000000000..c74c7f5dccf
--- /dev/null
+++ b/src/callbacks_stage/subcell_limiter_idp_correction_3d.jl
@@ -0,0 +1,55 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+function perform_idp_correction!(u, dt,
+                                 mesh::P4estMesh{3},
+                                 equations, dg, cache)
+    @unpack inverse_weights = dg.basis
+    @unpack antidiffusive_flux1_L, antidiffusive_flux1_R, antidiffusive_flux2_L, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R = cache.antidiffusive_fluxes
+    @unpack alpha1, alpha2, alpha3 = dg.volume_integral.limiter.cache.subcell_limiter_coefficients
+
+    @threaded for element in eachelement(dg, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            # Sign switch as in apply_jacobian!
+            inverse_jacobian = -get_inverse_jacobian(cache.elements.inverse_jacobian,
+                                                     mesh, i, j, k, element)
+
+            # Note: antidiffusive_flux1[v, i, xi, eta, element] = antidiffusive_flux2[v, xi, i, eta, element] = antidiffusive_flux2[v, xi, eta, i, element] = 0 for all i in 1:nnodes and xi, eta in {1, nnodes+1}
+            alpha_flux1 = (1 - alpha1[i, j, k, element]) *
+                          get_node_vars(antidiffusive_flux1_R, equations, dg,
+                                        i, j, k, element)
+            alpha_flux1_ip1 = (1 - alpha1[i + 1, j, k, element]) *
+                              get_node_vars(antidiffusive_flux1_L, equations, dg,
+                                            i + 1, j, k, element)
+            alpha_flux2 = (1 - alpha2[i, j, k, element]) *
+                          get_node_vars(antidiffusive_flux2_R, equations, dg,
+                                        i, j, k, element)
+            alpha_flux2_jp1 = (1 - alpha2[i, j + 1, k, element]) *
+                              get_node_vars(antidiffusive_flux2_L, equations, dg,
+                                            i, j + 1, k, element)
+            alpha_flux3 = (1 - alpha3[i, j, k, element]) *
+                          get_node_vars(antidiffusive_flux3_R, equations, dg,
+                                        i, j, k, element)
+            alpha_flux3_jp1 = (1 - alpha3[i, j, k + 1, element]) *
+                              get_node_vars(antidiffusive_flux3_L, equations, dg,
+                                            i, j, k + 1, element)
+
+            for v in eachvariable(equations)
+                u[v, i, j, k, element] += dt * inverse_jacobian *
+                                          (inverse_weights[i] *
+                                           (alpha_flux1_ip1[v] - alpha_flux1[v]) +
+                                           inverse_weights[j] *
+                                           (alpha_flux2_jp1[v] - alpha_flux2[v]) +
+                                           inverse_weights[k] *
+                                           (alpha_flux3_jp1[v] - alpha_flux3[v]))
+            end
+        end
+    end
+
+    return nothing
+end
+end # @muladd
diff --git a/src/equations/compressible_euler_3d.jl b/src/equations/compressible_euler_3d.jl
index 573c94194e9..758e0f640f2 100644
--- a/src/equations/compressible_euler_3d.jl
+++ b/src/equations/compressible_euler_3d.jl
@@ -1319,8 +1319,8 @@ end
     c_bar = sqrt(gamma * p_avg / rho_log)
 
     # Step 3:
-    # Build the dissipation term as given in Appendix A of the paper 
-    # - A. R. Winters, D. Derigs, G. Gassner, S. Walch, A uniquely defined entropy stable matrix dissipation operator 
+    # Build the dissipation term as given in Appendix A of the paper
+    # - A. R. Winters, D. Derigs, G. Gassner, S. Walch, A uniquely defined entropy stable matrix dissipation operator
     # for high Mach number ideal MHD and compressible Euler simulations (2017). Journal of Computational Physics.
     # [DOI: 10.1016/j.jcp.2016.12.006](https://doi.org/10.1016/j.jcp.2016.12.006).
 
@@ -1783,6 +1783,29 @@ end
     return SVector(w1, w2, w3, w4, w5)
 end
 
+# Transformation from conservative variables u to entropy vector ds_0/du,
+# using the modified specific entropy of Guermond et al. (2019): s_0 = p * rho^(-gamma) / (gamma-1).
+# Note: This is *not* the "conventional" specific entropy s = ln(p / rho^(gamma)).
+@inline function cons2entropy_guermond_etal(u, equations::CompressibleEulerEquations3D)
+    rho, rho_v1, rho_v2, rho_v3, rho_e = u
+
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    v_square = v1^2 + v2^2 + v3^2
+    inv_rho_gammap1 = (1 / rho)^(equations.gamma + 1)
+
+    # The derivative vector for the modified specific entropy of Guermond et al.
+    w1 = inv_rho_gammap1 *
+         (0.5f0 * rho * (equations.gamma + 1) * v_square - equations.gamma * rho_e)
+    w2 = -rho_v1 * inv_rho_gammap1
+    w3 = -rho_v2 * inv_rho_gammap1
+    w4 = -rho_v3 * inv_rho_gammap1
+    w5 = (1 / rho)^equations.gamma
+
+    return SVector(w1, w2, w3, w4, w5)
+end
+
 @inline function entropy2cons(w, equations::CompressibleEulerEquations3D)
     # See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
     # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
@@ -1872,6 +1895,34 @@ end
     return S
 end
 
+@doc raw"""
+    entropy_guermond_etal(u, equations::CompressibleEulerEquations3D)
+
+Calculate the modified specific entropy of Guermond et al. (2019):
+```math
+s_0 = p * \rho^{-\gamma} / (\gamma-1).
+```
+Note: This is *not* the "conventional" specific entropy ``s = ln(p / \rho^\gamma)``.
+- Guermond at al. (2019)
+  Invariant domain preserving discretization-independent schemes and convex limiting for hyperbolic systems.
+  [DOI: 10.1016/j.cma.2018.11.036](https://doi.org/10.1016/j.cma.2018.11.036)
+"""
+@inline function entropy_guermond_etal(u, equations::CompressibleEulerEquations3D)
+    rho, rho_v1, rho_v2, rho_v3, rho_e = u
+
+    # Modified specific entropy from Guermond et al. (2019)
+    s = (rho_e - 0.5f0 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho) *
+        (1 / rho)^equations.gamma
+
+    return s
+end
+
+# Transformation from conservative variables u to d(s)/d(u)
+@inline function gradient_conservative(::typeof(entropy_guermond_etal),
+                                       u, equations::CompressibleEulerEquations3D)
+    return cons2entropy_guermond_etal(u, equations)
+end
+
 # Default entropy is the mathematical entropy
 @inline function entropy(cons, equations::CompressibleEulerEquations3D)
     entropy_math(cons, equations)
@@ -1890,4 +1941,13 @@ end
 @inline function energy_internal(cons, equations::CompressibleEulerEquations3D)
     return energy_total(cons, equations) - energy_kinetic(cons, equations)
 end
+
+# State validation for Newton-bisection method of subcell IDP limiting
+@inline function Base.isvalid(u, equations::CompressibleEulerEquations3D)
+    p = pressure(u, equations)
+    if u[1] <= 0 || p <= 0
+        return false
+    end
+    return true
+end
 end # @muladd
diff --git a/src/solvers/dgsem_p4est/dg.jl b/src/solvers/dgsem_p4est/dg.jl
index b0a8f0e07b0..66c16325e58 100644
--- a/src/solvers/dgsem_p4est/dg.jl
+++ b/src/solvers/dgsem_p4est/dg.jl
@@ -91,4 +91,5 @@ include("dg_3d_parabolic.jl")
 include("dg_parallel.jl")
 
 include("subcell_limiters_2d.jl")
+include("subcell_limiters_3d.jl")
 end # @muladd
diff --git a/src/solvers/dgsem_p4est/subcell_limiters_3d.jl b/src/solvers/dgsem_p4est/subcell_limiters_3d.jl
new file mode 100644
index 00000000000..657efb02f5b
--- /dev/null
+++ b/src/solvers/dgsem_p4est/subcell_limiters_3d.jl
@@ -0,0 +1,308 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+function calc_bounds_twosided_interface!(var_min, var_max, variable,
+                                         u, t, semi, mesh::P4estMesh{3}, equations)
+    _, _, dg, cache = mesh_equations_solver_cache(semi)
+    (; boundary_conditions) = semi
+
+    (; neighbor_ids, node_indices) = cache.interfaces
+    index_range = eachnode(dg)
+
+    # Calc bounds at interfaces and periodic boundaries
+    for interface in eachinterface(dg, cache)
+        # Get element and side index information on the primary element
+        primary_element = neighbor_ids[1, interface]
+        primary_indices = node_indices[1, interface]
+
+        # Get element and side index information on the secondary element
+        secondary_element = neighbor_ids[2, interface]
+        secondary_indices = node_indices[2, interface]
+
+        # Create the local i,j,k indexing
+        i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1],
+                                                                                     index_range)
+        j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2],
+                                                                                     index_range)
+        k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3],
+                                                                                     index_range)
+
+        i_primary = i_primary_start
+        j_primary = j_primary_start
+        k_primary = k_primary_start
+
+        i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1],
+                                                                                           index_range)
+        j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2],
+                                                                                           index_range)
+        k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3],
+                                                                                           index_range)
+
+        i_secondary = i_secondary_start
+        j_secondary = j_secondary_start
+        k_secondary = k_secondary_start
+
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                var_primary = u[variable, i_primary, j_primary, k_primary,
+                                primary_element]
+                var_secondary = u[variable, i_secondary, j_secondary, k_secondary,
+                                  secondary_element]
+
+                var_min[i_primary, j_primary, k_primary, primary_element] = min(var_min[i_primary,
+                                                                                        j_primary,
+                                                                                        k_primary,
+                                                                                        primary_element],
+                                                                                var_secondary)
+                var_max[i_primary, j_primary, k_primary, primary_element] = max(var_max[i_primary,
+                                                                                        j_primary,
+                                                                                        k_primary,
+                                                                                        primary_element],
+                                                                                var_secondary)
+
+                var_min[i_secondary, j_secondary, k_secondary, secondary_element] = min(var_min[i_secondary,
+                                                                                                j_secondary,
+                                                                                                k_secondary,
+                                                                                                secondary_element],
+                                                                                        var_primary)
+                var_max[i_secondary, j_secondary, k_secondary, secondary_element] = max(var_max[i_secondary,
+                                                                                                j_secondary,
+                                                                                                k_secondary,
+                                                                                                secondary_element],
+                                                                                        var_primary)
+
+                # Increment the primary element indices
+                i_primary += i_primary_step_i
+                j_primary += j_primary_step_i
+                k_primary += k_primary_step_i
+                # Increment the secondary element surface indices
+                i_secondary += i_secondary_step_i
+                j_secondary += j_secondary_step_i
+                k_secondary += k_secondary_step_i
+            end
+            # Increment the primary element indices
+            i_primary += i_primary_step_j
+            j_primary += j_primary_step_j
+            k_primary += k_primary_step_j
+            # Increment the secondary element surface indices
+            i_secondary += i_secondary_step_j
+            j_secondary += j_secondary_step_j
+            k_secondary += k_secondary_step_j
+        end
+    end
+
+    # Calc bounds at physical boundaries
+    # calc_bounds_twosided_boundary!(var_min, var_max, variable, u, t,
+    #                                boundary_conditions,
+    #                                mesh, equations, dg, cache)
+
+    return nothing
+end
+
+# @inline function calc_bounds_twosided_boundary!(var_min, var_max, variable, u, t,
+#                                                 boundary_conditions::BoundaryConditionPeriodic,
+#                                                 mesh::P4estMesh{2},
+#                                                 equations, dg, cache)
+#     return nothing
+# end
+
+# @inline function calc_bounds_twosided_boundary!(var_min, var_max, variable, u, t,
+#                                                 boundary_conditions,
+#                                                 mesh::P4estMesh{2},
+#                                                 equations, dg, cache)
+#     (; boundary_condition_types, boundary_indices) = boundary_conditions
+#     (; contravariant_vectors) = cache.elements
+
+#     (; boundaries) = cache
+#     index_range = eachnode(dg)
+
+#     foreach_enumerate(boundary_condition_types) do (i, boundary_condition)
+#         for boundary in boundary_indices[i]
+#             element = boundaries.neighbor_ids[boundary]
+#             node_indices = boundaries.node_indices[boundary]
+#             direction = indices2direction(node_indices)
+
+#             i_node_start, i_node_step = index_to_start_step_2d(node_indices[1],
+#                                                                index_range)
+#             j_node_start, j_node_step = index_to_start_step_2d(node_indices[2],
+#                                                                index_range)
+
+#             i_node = i_node_start
+#             j_node = j_node_start
+#             for i in eachnode(dg)
+#                 normal_direction = get_normal_direction(direction,
+#                                                         contravariant_vectors,
+#                                                         i_node, j_node, element)
+
+#                 u_inner = get_node_vars(u, equations, dg, i_node, j_node, element)
+
+#                 u_outer = get_boundary_outer_state(u_inner, t, boundary_condition,
+#                                                    normal_direction,
+#                                                    mesh, equations, dg, cache,
+#                                                    i_node, j_node, element)
+#                 var_outer = u_outer[variable]
+
+#                 var_min[i_node, j_node, element] = min(var_min[i_node, j_node, element],
+#                                                        var_outer)
+#                 var_max[i_node, j_node, element] = max(var_max[i_node, j_node, element],
+#                                                        var_outer)
+
+#                 i_node += i_node_step
+#                 j_node += j_node_step
+#             end
+#         end
+#     end
+
+#     return nothing
+# end
+
+function calc_bounds_onesided_interface!(var_minmax, minmax, variable, u, t, semi,
+                                         mesh::P4estMesh{3})
+    _, equations, dg, cache = mesh_equations_solver_cache(semi)
+    (; boundary_conditions) = semi
+
+    (; neighbor_ids, node_indices) = cache.interfaces
+    index_range = eachnode(dg)
+
+    # Calc bounds at interfaces and periodic boundaries
+    for interface in eachinterface(dg, cache)
+        # Get element and side index information on the primary element
+        primary_element = neighbor_ids[1, interface]
+        primary_indices = node_indices[1, interface]
+
+        # Get element and side index information on the secondary element
+        secondary_element = neighbor_ids[2, interface]
+        secondary_indices = node_indices[2, interface]
+
+        # Create the local i,j,k indexing
+        i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1],
+                                                                                     index_range)
+        j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2],
+                                                                                     index_range)
+        k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3],
+                                                                                     index_range)
+
+        i_primary = i_primary_start
+        j_primary = j_primary_start
+        k_primary = k_primary_start
+
+        i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1],
+                                                                                           index_range)
+        j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2],
+                                                                                           index_range)
+        k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3],
+                                                                                           index_range)
+
+        i_secondary = i_secondary_start
+        j_secondary = j_secondary_start
+        k_secondary = k_secondary_start
+
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                var_primary = variable(get_node_vars(u, equations, dg, i_primary,
+                                                     j_primary, k_primary,
+                                                     primary_element), equations)
+                var_secondary = variable(get_node_vars(u, equations, dg, i_secondary,
+                                                       j_secondary, k_secondary,
+                                                       secondary_element),
+                                         equations)
+
+                var_minmax[i_primary, j_primary, k_primary, primary_element] = minmax(var_minmax[i_primary,
+                                                                                                 j_primary,
+                                                                                                 k_primary,
+                                                                                                 primary_element],
+                                                                                      var_secondary)
+                var_minmax[i_secondary, j_secondary, k_secondary, secondary_element] = minmax(var_minmax[i_secondary,
+                                                                                                         j_secondary,
+                                                                                                         k_secondary,
+                                                                                                         secondary_element],
+                                                                                              var_primary)
+
+                # Increment the primary element indices
+                i_primary += i_primary_step_i
+                j_primary += j_primary_step_i
+                k_primary += k_primary_step_i
+                # Increment the secondary element surface indices
+                i_secondary += i_secondary_step_i
+                j_secondary += j_secondary_step_i
+                k_secondary += k_secondary_step_i
+            end
+            # Increment the primary element indices
+            i_primary += i_primary_step_j
+            j_primary += j_primary_step_j
+            k_primary += k_primary_step_j
+            # Increment the secondary element surface indices
+            i_secondary += i_secondary_step_j
+            j_secondary += j_secondary_step_j
+            k_secondary += k_secondary_step_j
+        end
+    end
+
+    # Calc bounds at physical boundaries
+    # calc_bounds_onesided_boundary!(var_minmax, minmax, variable, u, t,
+    #                                boundary_conditions,
+    #                                mesh, equations, dg, cache)
+
+    return nothing
+end
+
+# @inline function calc_bounds_onesided_boundary!(var_minmax, minmax, variable, u, t,
+#                                                 boundary_conditions::BoundaryConditionPeriodic,
+#                                                 mesh::P4estMesh{3},
+#                                                 equations, dg, cache)
+#     return nothing
+# end
+
+# @inline function calc_bounds_onesided_boundary!(var_minmax, minmax, variable, u, t,
+#                                                 boundary_conditions,
+#                                                 mesh::P4estMesh{3},
+#                                                 equations, dg, cache)
+#     (; boundary_condition_types, boundary_indices) = boundary_conditions
+#     (; contravariant_vectors) = cache.elements
+
+#     (; boundaries) = cache
+#     index_range = eachnode(dg)
+
+#     foreach_enumerate(boundary_condition_types) do (i, boundary_condition)
+#         for boundary in boundary_indices[i]
+#             element = boundaries.neighbor_ids[boundary]
+#             node_indices = boundaries.node_indices[boundary]
+#             direction = indices2direction(node_indices)
+
+#             i_node_start, i_node_step = index_to_start_step_2d(node_indices[1],
+#                                                                index_range)
+#             j_node_start, j_node_step = index_to_start_step_2d(node_indices[2],
+#                                                                index_range)
+
+#             i_node = i_node_start
+#             j_node = j_node_start
+#             for node in eachnode(dg)
+#                 normal_direction = get_normal_direction(direction,
+#                                                         contravariant_vectors,
+#                                                         i_node, j_node, element)
+
+#                 u_inner = get_node_vars(u, equations, dg, i_node, j_node, element)
+
+#                 u_outer = get_boundary_outer_state(u_inner, t, boundary_condition,
+#                                                    normal_direction,
+#                                                    mesh, equations, dg, cache,
+#                                                    i_node, j_node, element)
+#                 var_outer = variable(u_outer, equations)
+
+#                 var_minmax[i_node, j_node, element] = minmax(var_minmax[i_node, j_node,
+#                                                                         element],
+#                                                              var_outer)
+
+#                 i_node += i_node_step
+#                 j_node += j_node_step
+#             end
+#         end
+#     end
+
+#     return nothing
+# end
+end # @muladd
diff --git a/src/solvers/dgsem_structured/dg.jl b/src/solvers/dgsem_structured/dg.jl
index b54d4092f0c..3823eab33cb 100644
--- a/src/solvers/dgsem_structured/dg.jl
+++ b/src/solvers/dgsem_structured/dg.jl
@@ -156,6 +156,7 @@ include("indicators_3d.jl")
 
 include("subcell_limiters_2d.jl")
 include("dg_2d_subcell_limiters.jl")
+include("dg_3d_subcell_limiters.jl")
 
 # Specialized implementations used to improve performance
 include("dg_2d_compressible_euler.jl")
diff --git a/src/solvers/dgsem_structured/dg_3d_subcell_limiters.jl b/src/solvers/dgsem_structured/dg_3d_subcell_limiters.jl
new file mode 100644
index 00000000000..55ab7bfc62d
--- /dev/null
+++ b/src/solvers/dgsem_structured/dg_3d_subcell_limiters.jl
@@ -0,0 +1,668 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+# Calculate the DG staggered volume fluxes `fhat` in subcell FV-form inside the element
+# (**without non-conservative terms**).
+#
+# See also `flux_differencing_kernel!`.
+@inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, u,
+                                mesh::P4estMesh{3},
+                                nonconservative_terms::False, equations,
+                                volume_flux, dg::DGSEM, element, cache)
+    (; contravariant_vectors) = cache.elements
+    (; weights, derivative_split) = dg.basis
+    (; flux_temp_threaded) = cache
+
+    flux_temp = flux_temp_threaded[Threads.threadid()]
+
+    # The FV-form fluxes are calculated in a recursive manner, i.e.:
+    # fhat_(0,1)   = w_0 * FVol_0,
+    # fhat_(j,j+1) = fhat_(j-1,j) + w_j * FVol_j,   for j=1,...,N-1,
+    # with the split form volume fluxes FVol_j = -2 * sum_i=0^N D_ji f*_(j,i).
+
+    # To use the symmetry of the `volume_flux`, the split form volume flux is precalculated
+    # like in `calc_volume_integral!` for the `VolumeIntegralFluxDifferencing`
+    # and saved in in `flux_temp`.
+
+    # Split form volume flux in orientation 1: x direction
+    flux_temp .= zero(eltype(flux_temp))
+
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element) # x direction
+
+        # All diagonal entries of `derivative_split` are zero. Thus, we can skip
+        # the computation of the diagonal terms. In addition, we use the symmetry
+        # of the `volume_flux` to save half of the possible two-point flux
+        # computations.
+
+        # x direction
+        for ii in (i + 1):nnodes(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element)
+            # pull the contravariant vectors and compute the average
+            Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j, k,
+                                                   element)
+            Ja1_avg = 0.5f0 * (Ja1_node + Ja1_node_ii)
+
+            # compute the contravariant sharp flux in the direction of the averaged contravariant vector
+            fluxtilde1 = volume_flux(u_node, u_node_ii, Ja1_avg, equations)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[i, ii], fluxtilde1,
+                                       equations, dg, i, j, k)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[ii, i], fluxtilde1,
+                                       equations, dg, ii, j, k)
+        end
+    end
+
+    # FV-form flux `fhat` in x direction
+    fhat1_L[:, 1, :, :] .= zero(eltype(fhat1_L))
+    fhat1_L[:, nnodes(dg) + 1, :, :] .= zero(eltype(fhat1_L))
+    fhat1_R[:, 1, :, :] .= zero(eltype(fhat1_R))
+    fhat1_R[:, nnodes(dg) + 1, :, :] .= zero(eltype(fhat1_R))
+
+    for k in eachnode(dg), j in eachnode(dg), i in 1:(nnodes(dg) - 1),
+        v in eachvariable(equations)
+
+        fhat1_L[v, i + 1, j, k] = fhat1_L[v, i, j, k] +
+                                  weights[i] * flux_temp[v, i, j, k]
+        fhat1_R[v, i + 1, j, k] = fhat1_L[v, i + 1, j, k]
+    end
+
+    # Split form volume flux in orientation 2: y direction
+    flux_temp .= zero(eltype(flux_temp))
+
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
+
+        # y direction
+        for jj in (j + 1):nnodes(dg)
+            u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element)
+            # pull the contravariant vectors and compute the average
+            Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj, k,
+                                                   element)
+            Ja2_avg = 0.5f0 * (Ja2_node + Ja2_node_jj)
+            # compute the contravariant sharp flux in the direction of the averaged contravariant vector
+            fluxtilde2 = volume_flux(u_node, u_node_jj, Ja2_avg, equations)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[j, jj], fluxtilde2,
+                                       equations, dg, i, j, k)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[jj, j], fluxtilde2,
+                                       equations, dg, i, jj, k)
+        end
+    end
+
+    # FV-form flux `fhat` in y direction
+    fhat2_L[:, :, 1, :] .= zero(eltype(fhat2_L))
+    fhat2_L[:, :, nnodes(dg) + 1, :] .= zero(eltype(fhat2_L))
+    fhat2_R[:, :, 1, :] .= zero(eltype(fhat2_R))
+    fhat2_R[:, :, nnodes(dg) + 1, :] .= zero(eltype(fhat2_R))
+
+    for k in eachnode(dg), j in 1:(nnodes(dg) - 1), i in eachnode(dg),
+        v in eachvariable(equations)
+
+        fhat2_L[v, i, j + 1, k] = fhat2_L[v, i, j, k] +
+                                  weights[j] * flux_temp[v, i, j, k]
+        fhat2_R[v, i, j + 1, k] = fhat2_L[v, i, j + 1, k]
+    end
+
+    # Split form volume flux in orientation 3: z direction
+    flux_temp .= zero(eltype(flux_temp))
+
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)
+
+        # y direction
+        for kk in (k + 1):nnodes(dg)
+            u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element)
+            # pull the contravariant vectors and compute the average
+            Ja3_node_kk = get_contravariant_vector(3, contravariant_vectors, i, j, kk,
+                                                   element)
+            Ja3_avg = 0.5f0 * (Ja3_node + Ja3_node_kk)
+            # compute the contravariant sharp flux in the direction of the averaged contravariant vector
+            fluxtilde3 = volume_flux(u_node, u_node_kk, Ja3_avg, equations)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[k, kk], fluxtilde3,
+                                       equations, dg, i, j, k)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[kk, k], fluxtilde3,
+                                       equations, dg, i, j, kk)
+        end
+    end
+
+    # FV-form flux `fhat` in y direction
+    fhat3_L[:, :, :, 1] .= zero(eltype(fhat3_L))
+    fhat3_L[:, :, :, nnodes(dg) + 1] .= zero(eltype(fhat3_L))
+    fhat3_R[:, :, :, 1] .= zero(eltype(fhat3_R))
+    fhat3_R[:, :, :, nnodes(dg) + 1] .= zero(eltype(fhat3_R))
+
+    for k in 1:(nnodes(dg) - 1), j in eachnode(dg), i in eachnode(dg),
+        v in eachvariable(equations)
+
+        fhat3_L[v, i, j, k + 1] = fhat3_L[v, i, j, k] +
+                                  weights[k] * flux_temp[v, i, j, k]
+        fhat3_R[v, i, j, k + 1] = fhat3_L[v, i, j, k + 1]
+    end
+
+    return nothing
+end
+
+# TODO: This is the 2d version for now.
+# Calculate the DG staggered volume fluxes `fhat` in subcell FV-form inside the element
+# (**with non-conservative terms in "local * symmetric" form**).
+#
+# See also `flux_differencing_kernel!`.
+#
+# The calculation of the non-conservative staggered "fluxes" requires non-conservative
+# terms that can be written as a product of local and a symmetric contributions. See, e.g.,
+#
+# - Rueda-Ramírez, Gassner (2023). A Flux-Differencing Formula for Split-Form Summation By Parts
+#   Discretizations of Non-Conservative Systems. https://arxiv.org/pdf/2211.14009.pdf.
+#
+# @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u,
+#                                 mesh::P4estMesh{3},
+#                                 nonconservative_terms::True, equations,
+#                                 volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM,
+#                                 element,
+#                                 cache) where {
+#                                               F_CONS <: Function,
+#                                               F_NONCONS <:
+#                                               FluxNonConservative{NonConservativeSymmetric()}
+#                                               }
+#     (; contravariant_vectors) = cache.elements
+#     (; weights, derivative_split) = dg.basis
+#     (; flux_temp_threaded, flux_nonconservative_temp_threaded) = cache
+#     (; fhat_temp_threaded, fhat_nonconservative_temp_threaded, phi_threaded) = cache
+
+#     volume_flux_cons, volume_flux_noncons = volume_flux
+
+#     flux_temp = flux_temp_threaded[Threads.threadid()]
+#     flux_noncons_temp = flux_nonconservative_temp_threaded[Threads.threadid()]
+
+#     fhat_temp = fhat_temp_threaded[Threads.threadid()]
+#     fhat_noncons_temp = fhat_nonconservative_temp_threaded[Threads.threadid()]
+#     phi = phi_threaded[Threads.threadid()]
+
+#     # The FV-form fluxes are calculated in a recursive manner, i.e.:
+#     # fhat_(0,1)   = w_0 * FVol_0,
+#     # fhat_(j,j+1) = fhat_(j-1,j) + w_j * FVol_j,   for j=1,...,N-1,
+#     # with the split form volume fluxes FVol_j = -2 * sum_i=0^N D_ji f*_(j,i).
+
+#     # To use the symmetry of the `volume_flux`, the split form volume flux is precalculated
+#     # like in `calc_volume_integral!` for the `VolumeIntegralFluxDifferencing`
+#     # and saved in in `flux_temp`.
+
+#     # Split form volume flux in orientation 1: x direction
+#     flux_temp .= zero(eltype(flux_temp))
+#     flux_noncons_temp .= zero(eltype(flux_noncons_temp))
+
+#     for j in eachnode(dg), i in eachnode(dg)
+#         u_node = get_node_vars(u, equations, dg, i, j, element)
+
+#         # pull the contravariant vectors in each coordinate direction
+#         Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element) # x direction
+
+#         # All diagonal entries of `derivative_split` are zero. Thus, we can skip
+#         # the computation of the diagonal terms. In addition, we use the symmetry
+#         # of `volume_flux_cons` and `volume_flux_noncons` to save half of the possible two-point flux
+#         # computations.
+#         for ii in (i + 1):nnodes(dg)
+#             u_node_ii = get_node_vars(u, equations, dg, ii, j, element)
+#             # pull the contravariant vectors and compute the average
+#             Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j,
+#                                                    element)
+#             Ja1_avg = 0.5f0 * (Ja1_node + Ja1_node_ii)
+
+#             # compute the contravariant sharp flux in the direction of the averaged contravariant vector
+#             fluxtilde1 = volume_flux_cons(u_node, u_node_ii, Ja1_avg, equations)
+#             multiply_add_to_node_vars!(flux_temp, derivative_split[i, ii], fluxtilde1,
+#                                        equations, dg, i, j)
+#             multiply_add_to_node_vars!(flux_temp, derivative_split[ii, i], fluxtilde1,
+#                                        equations, dg, ii, j)
+#             for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+#                 # We multiply by 0.5 because that is done in other parts of Trixi
+#                 flux1_noncons = volume_flux_noncons(u_node, u_node_ii, Ja1_avg,
+#                                                     equations,
+#                                                     NonConservativeSymmetric(), noncons)
+#                 multiply_add_to_node_vars!(flux_noncons_temp,
+#                                            0.5f0 * derivative_split[i, ii],
+#                                            flux1_noncons,
+#                                            equations, dg, noncons, i, j)
+#                 multiply_add_to_node_vars!(flux_noncons_temp,
+#                                            0.5f0 * derivative_split[ii, i],
+#                                            flux1_noncons,
+#                                            equations, dg, noncons, ii, j)
+#             end
+#         end
+#     end
+
+#     # FV-form flux `fhat` in x direction
+#     fhat1_L[:, 1, :] .= zero(eltype(fhat1_L))
+#     fhat1_L[:, nnodes(dg) + 1, :] .= zero(eltype(fhat1_L))
+#     fhat1_R[:, 1, :] .= zero(eltype(fhat1_R))
+#     fhat1_R[:, nnodes(dg) + 1, :] .= zero(eltype(fhat1_R))
+
+#     fhat_temp[:, 1, :] .= zero(eltype(fhat1_L))
+#     fhat_noncons_temp[:, :, 1, :] .= zero(eltype(fhat1_L))
+
+#     # Compute local contribution to non-conservative flux
+#     for j in eachnode(dg), i in eachnode(dg)
+#         u_local = get_node_vars(u, equations, dg, i, j, element)
+#         # pull the local contravariant vector
+#         Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element)
+#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+#             set_node_vars!(phi,
+#                            volume_flux_noncons(u_local, Ja1_node, equations,
+#                                                NonConservativeLocal(), noncons),
+#                            equations, dg, noncons, i, j)
+#         end
+#     end
+
+#     for j in eachnode(dg), i in 1:(nnodes(dg) - 1)
+#         # Conservative part
+#         for v in eachvariable(equations)
+#             value = fhat_temp[v, i, j] + weights[i] * flux_temp[v, i, j]
+#             fhat_temp[v, i + 1, j] = value
+#             fhat1_L[v, i + 1, j] = value
+#             fhat1_R[v, i + 1, j] = value
+#         end
+#         # Nonconservative part
+#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons),
+#             v in eachvariable(equations)
+
+#             value = fhat_noncons_temp[v, noncons, i, j] +
+#                     weights[i] * flux_noncons_temp[v, noncons, i, j]
+#             fhat_noncons_temp[v, noncons, i + 1, j] = value
+
+#             fhat1_L[v, i + 1, j] = fhat1_L[v, i + 1, j] + phi[v, noncons, i, j] * value
+#             fhat1_R[v, i + 1, j] = fhat1_R[v, i + 1, j] +
+#                                    phi[v, noncons, i + 1, j] * value
+#         end
+#     end
+
+#     # Split form volume flux in orientation 2: y direction
+#     flux_temp .= zero(eltype(flux_temp))
+#     flux_noncons_temp .= zero(eltype(flux_noncons_temp))
+
+#     for j in eachnode(dg), i in eachnode(dg)
+#         u_node = get_node_vars(u, equations, dg, i, j, element)
+
+#         # pull the contravariant vectors in each coordinate direction
+#         Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element)
+
+#         for jj in (j + 1):nnodes(dg)
+#             u_node_jj = get_node_vars(u, equations, dg, i, jj, element)
+#             # pull the contravariant vectors and compute the average
+#             Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj,
+#                                                    element)
+#             Ja2_avg = 0.5f0 * (Ja2_node + Ja2_node_jj)
+#             # compute the contravariant sharp flux in the direction of the averaged contravariant vector
+#             fluxtilde2 = volume_flux_cons(u_node, u_node_jj, Ja2_avg, equations)
+#             multiply_add_to_node_vars!(flux_temp, derivative_split[j, jj], fluxtilde2,
+#                                        equations, dg, i, j)
+#             multiply_add_to_node_vars!(flux_temp, derivative_split[jj, j], fluxtilde2,
+#                                        equations, dg, i, jj)
+#             for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+#                 # We multiply by 0.5 because that is done in other parts of Trixi
+#                 flux2_noncons = volume_flux_noncons(u_node, u_node_jj, Ja2_avg,
+#                                                     equations,
+#                                                     NonConservativeSymmetric(), noncons)
+#                 multiply_add_to_node_vars!(flux_noncons_temp,
+#                                            0.5f0 * derivative_split[j, jj],
+#                                            flux2_noncons,
+#                                            equations, dg, noncons, i, j)
+#                 multiply_add_to_node_vars!(flux_noncons_temp,
+#                                            0.5f0 * derivative_split[jj, j],
+#                                            flux2_noncons,
+#                                            equations, dg, noncons, i, jj)
+#             end
+#         end
+#     end
+
+#     # FV-form flux `fhat` in y direction
+#     fhat2_L[:, :, 1] .= zero(eltype(fhat2_L))
+#     fhat2_L[:, :, nnodes(dg) + 1] .= zero(eltype(fhat2_L))
+#     fhat2_R[:, :, 1] .= zero(eltype(fhat2_R))
+#     fhat2_R[:, :, nnodes(dg) + 1] .= zero(eltype(fhat2_R))
+
+#     fhat_temp[:, :, 1] .= zero(eltype(fhat1_L))
+#     fhat_noncons_temp[:, :, :, 1] .= zero(eltype(fhat1_L))
+
+#     # Compute local contribution to non-conservative flux
+#     for j in eachnode(dg), i in eachnode(dg)
+#         u_local = get_node_vars(u, equations, dg, i, j, element)
+#         # pull the local contravariant vector
+#         Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element)
+#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+#             set_node_vars!(phi,
+#                            volume_flux_noncons(u_local, Ja2_node, equations,
+#                                                NonConservativeLocal(), noncons),
+#                            equations, dg, noncons, i, j)
+#         end
+#     end
+
+#     for j in 1:(nnodes(dg) - 1), i in eachnode(dg)
+#         # Conservative part
+#         for v in eachvariable(equations)
+#             value = fhat_temp[v, i, j] + weights[j] * flux_temp[v, i, j]
+#             fhat_temp[v, i, j + 1] = value
+#             fhat2_L[v, i, j + 1] = value
+#             fhat2_R[v, i, j + 1] = value
+#         end
+#         # Nonconservative part
+#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons),
+#             v in eachvariable(equations)
+
+#             value = fhat_noncons_temp[v, noncons, i, j] +
+#                     weights[j] * flux_noncons_temp[v, noncons, i, j]
+#             fhat_noncons_temp[v, noncons, i, j + 1] = value
+
+#             fhat2_L[v, i, j + 1] = fhat2_L[v, i, j + 1] + phi[v, noncons, i, j] * value
+#             fhat2_R[v, i, j + 1] = fhat2_R[v, i, j + 1] +
+#                                    phi[v, noncons, i, j + 1] * value
+#         end
+#     end
+
+#     return nothing
+# end
+
+# TODO: This is the 2d version for now.
+# Calculate the DG staggered volume fluxes `fhat` in subcell FV-form inside the element
+# (**with non-conservative terms in "local * jump" form**).
+#
+# See also `flux_differencing_kernel!`.
+#
+# The calculation of the non-conservative staggered "fluxes" requires non-conservative
+# terms that can be written as a product of local and jump contributions.
+# @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u,
+#                                 mesh::P4estMesh{3},
+#                                 nonconservative_terms::True, equations,
+#                                 volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM,
+#                                 element,
+#                                 cache) where {
+#                                               F_CONS <: Function,
+#                                               F_NONCONS <:
+#                                               FluxNonConservative{NonConservativeJump()}
+#                                               }
+#     (; contravariant_vectors) = cache.elements
+#     (; weights, derivative_split) = dg.basis
+#     (; flux_temp_threaded, flux_nonconservative_temp_threaded) = cache
+#     (; fhat_temp_threaded, fhat_nonconservative_temp_threaded, phi_threaded) = cache
+
+#     volume_flux_cons, volume_flux_noncons = volume_flux
+
+#     flux_temp = flux_temp_threaded[Threads.threadid()]
+#     flux_noncons_temp = flux_nonconservative_temp_threaded[Threads.threadid()]
+
+#     fhat_temp = fhat_temp_threaded[Threads.threadid()]
+#     fhat_noncons_temp = fhat_nonconservative_temp_threaded[Threads.threadid()]
+#     phi = phi_threaded[Threads.threadid()]
+
+#     # The FV-form fluxes are calculated in a recursive manner, i.e.:
+#     # fhat_(0,1)   = w_0 * FVol_0,
+#     # fhat_(j,j+1) = fhat_(j-1,j) + w_j * FVol_j,   for j=1,...,N-1,
+#     # with the split form volume fluxes FVol_j = -2 * sum_i=0^N D_ji f*_(j,i).
+
+#     # To use the symmetry of the `volume_flux`, the split form volume flux is precalculated
+#     # like in `calc_volume_integral!` for the `VolumeIntegralFluxDifferencing`
+#     # and saved in in `flux_temp`.
+
+#     # Split form volume flux in orientation 1: x direction
+#     flux_temp .= zero(eltype(flux_temp))
+#     flux_noncons_temp .= zero(eltype(flux_noncons_temp))
+
+#     for j in eachnode(dg), i in eachnode(dg)
+#         u_node = get_node_vars(u, equations, dg, i, j, element)
+
+#         # pull the contravariant vectors in each coordinate direction
+#         Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element) # x direction
+
+#         # All diagonal entries of `derivative_split` are zero. Thus, we can skip
+#         # the computation of the diagonal terms. In addition, we use the symmetry
+#         # of `volume_flux_cons` and `volume_flux_noncons` to save half of the possible two-point flux
+#         # computations.
+#         for ii in (i + 1):nnodes(dg)
+#             u_node_ii = get_node_vars(u, equations, dg, ii, j, element)
+#             # pull the contravariant vectors and compute the average
+#             Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j,
+#                                                    element)
+#             Ja1_avg = 0.5f0 * (Ja1_node + Ja1_node_ii)
+
+#             # compute the contravariant sharp flux in the direction of the averaged contravariant vector
+#             fluxtilde1 = volume_flux_cons(u_node, u_node_ii, Ja1_avg, equations)
+#             multiply_add_to_node_vars!(flux_temp, derivative_split[i, ii], fluxtilde1,
+#                                        equations, dg, i, j)
+#             multiply_add_to_node_vars!(flux_temp, derivative_split[ii, i], fluxtilde1,
+#                                        equations, dg, ii, j)
+#             for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+#                 # We multiply by 0.5 because that is done in other parts of Trixi
+#                 flux1_noncons = volume_flux_noncons(u_node, u_node_ii, Ja1_avg,
+#                                                     equations,
+#                                                     NonConservativeJump(), noncons)
+#                 multiply_add_to_node_vars!(flux_noncons_temp,
+#                                            0.5f0 * derivative_split[i, ii],
+#                                            flux1_noncons,
+#                                            equations, dg, noncons, i, j)
+#                 multiply_add_to_node_vars!(flux_noncons_temp,
+#                                            -0.5f0 * derivative_split[ii, i],
+#                                            flux1_noncons,
+#                                            equations, dg, noncons, ii, j)
+#             end
+#         end
+#     end
+
+#     # FV-form flux `fhat` in x direction
+#     fhat1_L[:, 1, :] .= zero(eltype(fhat1_L))
+#     fhat1_L[:, nnodes(dg) + 1, :] .= zero(eltype(fhat1_L))
+#     fhat1_R[:, 1, :] .= zero(eltype(fhat1_R))
+#     fhat1_R[:, nnodes(dg) + 1, :] .= zero(eltype(fhat1_R))
+
+#     fhat_temp[:, 1, :] .= zero(eltype(fhat1_L))
+#     fhat_noncons_temp[:, :, 1, :] .= zero(eltype(fhat1_L))
+
+#     # Compute local contribution to non-conservative flux
+#     for j in eachnode(dg), i in eachnode(dg)
+#         u_local = get_node_vars(u, equations, dg, i, j, element)
+#         # pull the local contravariant vector
+#         Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element)
+#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+#             set_node_vars!(phi,
+#                            volume_flux_noncons(u_local, Ja1_node, equations,
+#                                                NonConservativeLocal(), noncons),
+#                            equations, dg, noncons, i, j)
+#         end
+#     end
+
+#     for j in eachnode(dg), i in 1:(nnodes(dg) - 1)
+#         # Conservative part
+#         for v in eachvariable(equations)
+#             value = fhat_temp[v, i, j] + weights[i] * flux_temp[v, i, j]
+#             fhat_temp[v, i + 1, j] = value
+#             fhat1_L[v, i + 1, j] = value
+#             fhat1_R[v, i + 1, j] = value
+#         end
+#         # Nonconservative part
+#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons),
+#             v in eachvariable(equations)
+
+#             value = fhat_noncons_temp[v, noncons, i, j] +
+#                     weights[i] * flux_noncons_temp[v, noncons, i, j]
+#             fhat_noncons_temp[v, noncons, i + 1, j] = value
+
+#             fhat1_L[v, i + 1, j] = fhat1_L[v, i + 1, j] + phi[v, noncons, i, j] * value
+#             fhat1_R[v, i + 1, j] = fhat1_R[v, i + 1, j] +
+#                                    phi[v, noncons, i + 1, j] * value
+#         end
+#     end
+
+#     # Apply correction term to the flux-differencing formula for nonconservative local * jump fluxes.
+#     for j in eachnode(dg)
+#         u_0 = get_node_vars(u, equations, dg, 1, j, element)
+#         Ja1_node_0 = get_contravariant_vector(1, contravariant_vectors, 1, j, element)
+
+#         for i in 2:(nnodes(dg) - 1)
+#             u_i = get_node_vars(u, equations, dg, i, j, element)
+#             Ja1_node_i = get_contravariant_vector(1, contravariant_vectors, i, j,
+#                                                   element)
+#             Ja1_avg = 0.5f0 * (Ja1_node_0 + Ja1_node_i)
+
+#             for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+#                 phi_jump = volume_flux_noncons(u_0, u_i, Ja1_avg, equations,
+#                                                NonConservativeJump(), noncons)
+
+#                 for v in eachvariable(equations)
+#                     # The factor of 2 is missing on each term because Trixi multiplies all the non-cons terms with 0.5
+#                     fhat1_R[v, i, j] -= phi[v, noncons, i, j] * phi_jump[v]
+#                     fhat1_L[v, i + 1, j] -= phi[v, noncons, i, j] * phi_jump[v]
+#                 end
+#             end
+#         end
+#         u_N = get_node_vars(u, equations, dg, nnodes(dg), j, element)
+#         Ja1_node_N = get_contravariant_vector(1, contravariant_vectors, nnodes(dg), j,
+#                                               element)
+#         Ja1_avg = 0.5f0 * (Ja1_node_0 + Ja1_node_N)
+
+#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+#             phi_jump = volume_flux_noncons(u_0, u_N, Ja1_avg, equations,
+#                                            NonConservativeJump(), noncons)
+
+#             for v in eachvariable(equations)
+#                 # The factor of 2 is missing because Trixi multiplies all the non-cons terms with 0.5
+#                 fhat1_R[v, nnodes(dg), j] -= phi[v, noncons, nnodes(dg), j] *
+#                                              phi_jump[v]
+#             end
+#         end
+#     end
+
+#     # Split form volume flux in orientation 2: y direction
+#     flux_temp .= zero(eltype(flux_temp))
+#     flux_noncons_temp .= zero(eltype(flux_noncons_temp))
+
+#     for j in eachnode(dg), i in eachnode(dg)
+#         u_node = get_node_vars(u, equations, dg, i, j, element)
+
+#         # pull the contravariant vectors in each coordinate direction
+#         Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element)
+
+#         for jj in (j + 1):nnodes(dg)
+#             u_node_jj = get_node_vars(u, equations, dg, i, jj, element)
+#             # pull the contravariant vectors and compute the average
+#             Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj,
+#                                                    element)
+#             Ja2_avg = 0.5f0 * (Ja2_node + Ja2_node_jj)
+#             # compute the contravariant sharp flux in the direction of the averaged contravariant vector
+#             fluxtilde2 = volume_flux_cons(u_node, u_node_jj, Ja2_avg, equations)
+#             multiply_add_to_node_vars!(flux_temp, derivative_split[j, jj], fluxtilde2,
+#                                        equations, dg, i, j)
+#             multiply_add_to_node_vars!(flux_temp, derivative_split[jj, j], fluxtilde2,
+#                                        equations, dg, i, jj)
+#             for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+#                 # We multiply by 0.5 because that is done in other parts of Trixi
+#                 flux2_noncons = volume_flux_noncons(u_node, u_node_jj, Ja2_avg,
+#                                                     equations,
+#                                                     NonConservativeJump(), noncons)
+#                 multiply_add_to_node_vars!(flux_noncons_temp,
+#                                            0.5f0 * derivative_split[j, jj],
+#                                            flux2_noncons,
+#                                            equations, dg, noncons, i, j)
+#                 multiply_add_to_node_vars!(flux_noncons_temp,
+#                                            -0.5f0 * derivative_split[jj, j],
+#                                            flux2_noncons,
+#                                            equations, dg, noncons, i, jj)
+#             end
+#         end
+#     end
+
+#     # FV-form flux `fhat` in y direction
+#     fhat2_L[:, :, 1] .= zero(eltype(fhat2_L))
+#     fhat2_L[:, :, nnodes(dg) + 1] .= zero(eltype(fhat2_L))
+#     fhat2_R[:, :, 1] .= zero(eltype(fhat2_R))
+#     fhat2_R[:, :, nnodes(dg) + 1] .= zero(eltype(fhat2_R))
+
+#     fhat_temp[:, :, 1] .= zero(eltype(fhat1_L))
+#     fhat_noncons_temp[:, :, :, 1] .= zero(eltype(fhat1_L))
+
+#     # Compute local contribution to non-conservative flux
+#     for j in eachnode(dg), i in eachnode(dg)
+#         u_local = get_node_vars(u, equations, dg, i, j, element)
+#         # pull the local contravariant vector
+#         Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element)
+#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+#             set_node_vars!(phi,
+#                            volume_flux_noncons(u_local, Ja2_node, equations,
+#                                                NonConservativeLocal(), noncons),
+#                            equations, dg, noncons, i, j)
+#         end
+#     end
+
+#     for j in 1:(nnodes(dg) - 1), i in eachnode(dg)
+#         # Conservative part
+#         for v in eachvariable(equations)
+#             value = fhat_temp[v, i, j] + weights[j] * flux_temp[v, i, j]
+#             fhat_temp[v, i, j + 1] = value
+#             fhat2_L[v, i, j + 1] = value
+#             fhat2_R[v, i, j + 1] = value
+#         end
+#         # Nonconservative part
+#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons),
+#             v in eachvariable(equations)
+
+#             value = fhat_noncons_temp[v, noncons, i, j] +
+#                     weights[j] * flux_noncons_temp[v, noncons, i, j]
+#             fhat_noncons_temp[v, noncons, i, j + 1] = value
+
+#             fhat2_L[v, i, j + 1] = fhat2_L[v, i, j + 1] + phi[v, noncons, i, j] * value
+#             fhat2_R[v, i, j + 1] = fhat2_R[v, i, j + 1] +
+#                                    phi[v, noncons, i, j + 1] * value
+#         end
+#     end
+
+#     # Apply correction term to the flux-differencing formula for nonconservative local * jump fluxes.
+#     for i in eachnode(dg)
+#         u_0 = get_node_vars(u, equations, dg, i, 1, element)
+#         Ja2_node_0 = get_contravariant_vector(2, contravariant_vectors, i, 1, element)
+
+#         for j in 2:(nnodes(dg) - 1)
+#             u_j = get_node_vars(u, equations, dg, i, j, element)
+#             Ja2_node_j = get_contravariant_vector(2, contravariant_vectors, i, j,
+#                                                   element)
+#             Ja2_avg = 0.5f0 * (Ja2_node_0 + Ja2_node_j)
+
+#             for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+#                 phi_jump = volume_flux_noncons(u_0, u_j, Ja2_avg, equations,
+#                                                NonConservativeJump(), noncons)
+
+#                 for v in eachvariable(equations)
+#                     # The factor of 2 is missing on each term because Trixi multiplies all the non-cons terms with 0.5
+#                     fhat2_R[v, i, j] -= phi[v, noncons, i, j] * phi_jump[v]
+#                     fhat2_L[v, i, j + 1] -= phi[v, noncons, i, j] * phi_jump[v]
+#                 end
+#             end
+#         end
+#         u_N = get_node_vars(u, equations, dg, i, nnodes(dg), element)
+#         Ja2_node_N = get_contravariant_vector(2, contravariant_vectors, i, nnodes(dg),
+#                                               element)
+#         Ja2_avg = 0.5f0 * (Ja2_node_0 + Ja2_node_N)
+
+#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+#             phi_jump = volume_flux_noncons(u_0, u_N, Ja2_avg, equations,
+#                                            NonConservativeJump(), noncons)
+
+#             for v in eachvariable(equations)
+#                 # The factor of 2 is missing cause Trixi multiplies all the non-cons terms with 0.5
+#                 fhat2_R[v, i, nnodes(dg)] -= phi[v, noncons, i, nnodes(dg)] *
+#                                              phi_jump[v]
+#             end
+#         end
+#     end
+
+#     return nothing
+# end
+end # @muladd
diff --git a/src/solvers/dgsem_tree/containers_3d.jl b/src/solvers/dgsem_tree/containers_3d.jl
index 5fc027ad001..bf639b0d976 100644
--- a/src/solvers/dgsem_tree/containers_3d.jl
+++ b/src/solvers/dgsem_tree/containers_3d.jl
@@ -79,7 +79,7 @@ nelements(elements::ElementContainer3D) = length(elements.cell_ids)
     eachelement(elements::ElementContainer3D)
 
 Return an iterator over the indices that specify the location in relevant data structures
-for the elements in `elements`. 
+for the elements in `elements`.
 In particular, not the elements themselves are returned.
 """
 @inline eachelement(elements::ElementContainer3D) = Base.OneTo(nelements(elements))
@@ -812,4 +812,219 @@ function init_mortars!(mortars, elements, mesh::TreeMesh3D)
     @assert count==nmortars(mortars) ("Actual mortar count ($count) does not match "*
                                       "expectations $(nmortars(mortars))")
 end
+
+# TODO: Does it make sense to implement those dimension independent
+# with indices like [variables, **direction**, i, j, k, elements]?
+mutable struct ContainerAntidiffusiveFlux3D{uEltype <: Real}
+    antidiffusive_flux1_L::Array{uEltype, 5} # [variables, i, j, k, elements]
+    antidiffusive_flux1_R::Array{uEltype, 5} # [variables, i, j, k, elements]
+    antidiffusive_flux2_L::Array{uEltype, 5} # [variables, i, j, k, elements]
+    antidiffusive_flux2_R::Array{uEltype, 5} # [variables, i, j, k, elements]
+    antidiffusive_flux3_L::Array{uEltype, 5} # [variables, i, j, k, elements]
+    antidiffusive_flux3_R::Array{uEltype, 5} # [variables, i, j, k, elements]
+    # internal `resize!`able storage
+    _antidiffusive_flux1_L::Vector{uEltype}
+    _antidiffusive_flux1_R::Vector{uEltype}
+    _antidiffusive_flux2_L::Vector{uEltype}
+    _antidiffusive_flux2_R::Vector{uEltype}
+    _antidiffusive_flux3_L::Vector{uEltype}
+    _antidiffusive_flux3_R::Vector{uEltype}
+end
+
+function ContainerAntidiffusiveFlux3D{uEltype}(capacity::Integer, n_variables,
+                                               n_nodes) where {uEltype <: Real}
+    nan_uEltype = convert(uEltype, NaN)
+
+    # Initialize fields with defaults
+    _antidiffusive_flux1_L = fill(nan_uEltype,
+                                  n_variables * (n_nodes + 1) * n_nodes * n_nodes *
+                                  capacity)
+    antidiffusive_flux1_L = unsafe_wrap(Array, pointer(_antidiffusive_flux1_L),
+                                        (n_variables, n_nodes + 1, n_nodes, n_nodes,
+                                         capacity))
+    _antidiffusive_flux1_R = fill(nan_uEltype,
+                                  n_variables * (n_nodes + 1) * n_nodes * n_nodes *
+                                  capacity)
+    antidiffusive_flux1_R = unsafe_wrap(Array, pointer(_antidiffusive_flux1_R),
+                                        (n_variables, n_nodes + 1, n_nodes, n_nodes,
+                                         capacity))
+
+    _antidiffusive_flux2_L = fill(nan_uEltype,
+                                  n_variables * n_nodes * (n_nodes + 1) * n_nodes *
+                                  capacity)
+    antidiffusive_flux2_L = unsafe_wrap(Array, pointer(_antidiffusive_flux2_L),
+                                        (n_variables, n_nodes, n_nodes + 1, n_nodes,
+                                         capacity))
+    _antidiffusive_flux2_R = fill(nan_uEltype,
+                                  n_variables * n_nodes * (n_nodes + 1) * n_nodes *
+                                  capacity)
+    antidiffusive_flux2_R = unsafe_wrap(Array, pointer(_antidiffusive_flux2_R),
+                                        (n_variables, n_nodes, n_nodes + 1, n_nodes,
+                                         capacity))
+
+    _antidiffusive_flux3_L = fill(nan_uEltype,
+                                  n_variables * n_nodes * n_nodes * (n_nodes + 1) *
+                                  capacity)
+    antidiffusive_flux3_L = unsafe_wrap(Array, pointer(_antidiffusive_flux3_L),
+                                        (n_variables, n_nodes, n_nodes, n_nodes + 1,
+                                         capacity))
+    _antidiffusive_flux3_R = fill(nan_uEltype,
+                                  n_variables * n_nodes * n_nodes * (n_nodes + 1) *
+                                  capacity)
+    antidiffusive_flux3_R = unsafe_wrap(Array, pointer(_antidiffusive_flux3_R),
+                                        (n_variables, n_nodes, n_nodes, n_nodes + 1,
+                                         capacity))
+    return ContainerAntidiffusiveFlux3D{uEltype}(antidiffusive_flux1_L,
+                                                 antidiffusive_flux1_R,
+                                                 antidiffusive_flux2_L,
+                                                 antidiffusive_flux2_R,
+                                                 antidiffusive_flux3_L,
+                                                 antidiffusive_flux3_R,
+                                                 _antidiffusive_flux1_L,
+                                                 _antidiffusive_flux1_R,
+                                                 _antidiffusive_flux2_L,
+                                                 _antidiffusive_flux2_R,
+                                                 _antidiffusive_flux3_L,
+                                                 _antidiffusive_flux3_R)
+end
+
+nvariables(fluxes::ContainerAntidiffusiveFlux3D) = size(fluxes.antidiffusive_flux1_L, 1)
+nnodes(fluxes::ContainerAntidiffusiveFlux3D) = size(fluxes.antidiffusive_flux1_L, 3)
+
+# Only one-dimensional `Array`s are `resize!`able in Julia.
+# Hence, we use `Vector`s as internal storage and `resize!`
+# them whenever needed. Then, we reuse the same memory by
+# `unsafe_wrap`ping multi-dimensional `Array`s around the
+# internal storage.
+function Base.resize!(fluxes::ContainerAntidiffusiveFlux3D, capacity)
+    n_nodes = nnodes(fluxes)
+    n_variables = nvariables(fluxes)
+
+    @unpack _antidiffusive_flux1_L, _antidiffusive_flux1_R, _antidiffusive_flux2_L, _antidiffusive_flux2_R, _antidiffusive_flux3_L, _antidiffusive_flux3_R = fluxes
+
+    resize!(_antidiffusive_flux1_L,
+            n_variables * (n_nodes + 1) * n_nodes * n_nodes * capacity)
+    fluxes.antidiffusive_flux1_L = unsafe_wrap(Array, pointer(_antidiffusive_flux1_L),
+                                               (n_variables, n_nodes + 1, n_nodes,
+                                                n_nodes,
+                                                capacity))
+    resize!(_antidiffusive_flux1_R,
+            n_variables * (n_nodes + 1) * n_nodes * n_nodes * capacity)
+    fluxes.antidiffusive_flux1_R = unsafe_wrap(Array, pointer(_antidiffusive_flux1_R),
+                                               (n_variables, n_nodes + 1, n_nodes,
+                                                n_nodes,
+                                                capacity))
+    resize!(_antidiffusive_flux2_L,
+            n_variables * n_nodes * (n_nodes + 1) * n_nodes * capacity)
+    fluxes.antidiffusive_flux2_L = unsafe_wrap(Array, pointer(_antidiffusive_flux2_L),
+                                               (n_variables, n_nodes, n_nodes + 1,
+                                                n_nodes,
+                                                capacity))
+    resize!(_antidiffusive_flux2_R,
+            n_variables * n_nodes * (n_nodes + 1) * n_nodes * capacity)
+    fluxes.antidiffusive_flux2_R = unsafe_wrap(Array, pointer(_antidiffusive_flux2_R),
+                                               (n_variables, n_nodes, n_nodes + 1,
+                                                n_nodes,
+                                                capacity))
+
+    resize!(_antidiffusive_flux3_L,
+            n_variables * n_nodes * n_nodes * (n_nodes + 1) * capacity)
+    fluxes.antidiffusive_flux3_L = unsafe_wrap(Array, pointer(_antidiffusive_flux3_L),
+                                               (n_variables, n_nodes, n_nodes,
+                                                n_nodes + 1,
+                                                capacity))
+    resize!(_antidiffusive_flux3_R,
+            n_variables * n_nodes * n_nodes * (n_nodes + 1) * capacity)
+    fluxes.antidiffusive_flux3_R = unsafe_wrap(Array, pointer(_antidiffusive_flux3_R),
+                                               (n_variables, n_nodes, n_nodes,
+                                                n_nodes + 1,
+                                                capacity))
+    return nothing
+end
+
+# TODO: Does it make sense to implement the alpha1/2/3 dimension independent
+# with indices like [**direction**, i, j, k, elements]?
+# Container data structure (structure-of-arrays style) for variables used for IDP limiting
+mutable struct ContainerSubcellLimiterIDP3D{uEltype <: Real}
+    alpha::Array{uEltype, 4} # [i, j, k, element]
+    alpha1::Array{uEltype, 4}
+    alpha2::Array{uEltype, 4}
+    alpha3::Array{uEltype, 4}
+    variable_bounds::Dict{Symbol, Array{uEltype, 4}}
+    # internal `resize!`able storage
+    _alpha::Vector{uEltype}
+    _alpha1::Vector{uEltype}
+    _alpha2::Vector{uEltype}
+    _alpha3::Vector{uEltype}
+    _variable_bounds::Dict{Symbol, Vector{uEltype}}
+end
+
+function ContainerSubcellLimiterIDP3D{uEltype}(capacity::Integer, n_nodes,
+                                               bound_keys) where {uEltype <: Real}
+    nan_uEltype = convert(uEltype, NaN)
+
+    # Initialize fields with defaults
+    _alpha = fill(nan_uEltype, n_nodes * n_nodes * n_nodes * capacity)
+    alpha = unsafe_wrap(Array, pointer(_alpha), (n_nodes, n_nodes, n_nodes, capacity))
+    _alpha1 = fill(nan_uEltype, (n_nodes + 1) * n_nodes * n_nodes * capacity)
+    alpha1 = unsafe_wrap(Array, pointer(_alpha1),
+                         (n_nodes + 1, n_nodes, n_nodes, capacity))
+    _alpha2 = fill(nan_uEltype, n_nodes * (n_nodes + 1) * n_nodes * capacity)
+    alpha2 = unsafe_wrap(Array, pointer(_alpha2),
+                         (n_nodes, n_nodes + 1, n_nodes, capacity))
+    _alpha3 = fill(nan_uEltype, n_nodes * n_nodes * (n_nodes + 1) * capacity)
+    alpha3 = unsafe_wrap(Array, pointer(_alpha3),
+                         (n_nodes, n_nodes, n_nodes + 1, capacity))
+
+    _variable_bounds = Dict{Symbol, Vector{uEltype}}()
+    variable_bounds = Dict{Symbol, Array{uEltype, 4}}()
+    for key in bound_keys
+        _variable_bounds[key] = fill(nan_uEltype,
+                                     n_nodes * n_nodes * n_nodes * capacity)
+        variable_bounds[key] = unsafe_wrap(Array, pointer(_variable_bounds[key]),
+                                           (n_nodes, n_nodes, n_nodes, capacity))
+    end
+
+    return ContainerSubcellLimiterIDP3D{uEltype}(alpha, alpha1, alpha2, alpha3,
+                                                 variable_bounds,
+                                                 _alpha, _alpha1, _alpha2, _alpha3,
+                                                 _variable_bounds)
+end
+
+nnodes(container::ContainerSubcellLimiterIDP3D) = size(container.alpha, 1)
+
+# Only one-dimensional `Array`s are `resize!`able in Julia.
+# Hence, we use `Vector`s as internal storage and `resize!`
+# them whenever needed. Then, we reuse the same memory by
+# `unsafe_wrap`ping multi-dimensional `Array`s around the
+# internal storage.
+function Base.resize!(container::ContainerSubcellLimiterIDP3D, capacity)
+    n_nodes = nnodes(container)
+
+    (; _alpha, _alpha1, _alpha2, _alpha3) = container
+    resize!(_alpha, n_nodes * n_nodes * n_nodes * capacity)
+    container.alpha = unsafe_wrap(Array, pointer(_alpha),
+                                  (n_nodes, n_nodes, n_nodes, capacity))
+    container.alpha .= convert(eltype(container.alpha), NaN)
+    resize!(_alpha1, (n_nodes + 1) * n_nodes * n_nodes * capacity)
+    container.alpha1 = unsafe_wrap(Array, pointer(_alpha1),
+                                   (n_nodes + 1, n_nodes, n_nodes, capacity))
+    resize!(_alpha2, n_nodes * (n_nodes + 1) * n_nodes * capacity)
+    container.alpha2 = unsafe_wrap(Array, pointer(_alpha2),
+                                   (n_nodes, n_nodes + 1, n_nodes, capacity))
+    resize!(_alpha3, n_nodes * n_nodes * (n_nodes + 1) * capacity)
+    container.alpha3 = unsafe_wrap(Array, pointer(_alpha3),
+                                   (n_nodes, n_nodes, n_nodes + 1, capacity))
+
+    (; _variable_bounds) = container
+    for (key, _) in _variable_bounds
+        resize!(_variable_bounds[key], n_nodes * n_nodes * n_nodes * capacity)
+        container.variable_bounds[key] = unsafe_wrap(Array,
+                                                     pointer(_variable_bounds[key]),
+                                                     (n_nodes, n_nodes, n_nodes,
+                                                      capacity))
+    end
+
+    return nothing
+end
 end # @muladd
diff --git a/src/solvers/dgsem_tree/dg.jl b/src/solvers/dgsem_tree/dg.jl
index fb7f7f9dbf1..a72e14a434b 100644
--- a/src/solvers/dgsem_tree/dg.jl
+++ b/src/solvers/dgsem_tree/dg.jl
@@ -61,5 +61,7 @@ include("dg_3d_compressible_euler.jl")
 # Subcell limiters
 include("subcell_limiters.jl")
 include("subcell_limiters_2d.jl")
+include("subcell_limiters_3d.jl")
 include("dg_2d_subcell_limiters.jl")
+include("dg_3d_subcell_limiters.jl")
 end # @muladd
diff --git a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl
index 03d0cfd33a1..db78036a2bb 100644
--- a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl
+++ b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl
@@ -61,7 +61,7 @@ end
 
 function calc_volume_integral!(du, u,
                                mesh::Union{TreeMesh{2}, StructuredMesh{2},
-                                           P4estMesh{2}},
+                                           P4estMesh{2}, P4estMesh{3}},
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralSubcellLimiting,
                                dg::DGSEM, cache)
@@ -735,6 +735,7 @@ end
     return nothing
 end
 
+# TODO: dimension independent implementation
 """
     get_boundary_outer_state(u_inner, t,
                              boundary_condition::BoundaryConditionDirichlet,
diff --git a/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl
new file mode 100644
index 00000000000..f9b671568d0
--- /dev/null
+++ b/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl
@@ -0,0 +1,224 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+function create_cache(mesh::P4estMesh{3},
+                      equations, volume_integral::VolumeIntegralSubcellLimiting,
+                      dg::DG, uEltype)
+    cache = create_cache(mesh, equations,
+                         VolumeIntegralPureLGLFiniteVolume(volume_integral.volume_flux_fv),
+                         dg, uEltype)
+
+    A4dp1_x = Array{uEltype, 4}
+    A4dp1_y = Array{uEltype, 4}
+    A4dp1_z = Array{uEltype, 4}
+    A4d = Array{uEltype, 4}
+
+    fhat1_L_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg) + 1,
+                                       nnodes(dg), nnodes(dg))
+                               for _ in 1:Threads.nthreads()]
+    fhat1_R_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg) + 1,
+                                       nnodes(dg), nnodes(dg))
+                               for _ in 1:Threads.nthreads()]
+    fhat2_L_threaded = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg),
+                                       nnodes(dg) + 1, nnodes(dg))
+                               for _ in 1:Threads.nthreads()]
+    fhat2_R_threaded = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg),
+                                       nnodes(dg) + 1, nnodes(dg))
+                               for _ in 1:Threads.nthreads()]
+    fhat3_L_threaded = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg),
+                                       nnodes(dg), nnodes(dg) + 1)
+                               for _ in 1:Threads.nthreads()]
+    fhat3_R_threaded = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg),
+                                       nnodes(dg), nnodes(dg) + 1)
+                               for _ in 1:Threads.nthreads()]
+    flux_temp_threaded = A4d[A4d(undef, nvariables(equations), nnodes(dg), nnodes(dg),
+                                 nnodes(dg))
+                             for _ in 1:Threads.nthreads()]
+    fhat_temp_threaded = A4d[A4d(undef, nvariables(equations), nnodes(dg),
+                                 nnodes(dg), nnodes(dg)) for _ in 1:Threads.nthreads()]
+    antidiffusive_fluxes = ContainerAntidiffusiveFlux3D{uEltype}(0,
+                                                                 nvariables(equations),
+                                                                 nnodes(dg))
+
+    # TODO: nonconservative terms
+    # if have_nonconservative_terms(equations) == true
+    #     # Extract the nonconservative flux as a dispatch argument for `n_nonconservative_terms`
+    #     _, volume_flux_noncons = volume_integral.volume_flux_dg
+
+    #     flux_nonconservative_temp_threaded = A4d[A4d(undef, nvariables(equations),
+    #                                                  n_nonconservative_terms(volume_flux_noncons),
+    #                                                  nnodes(dg), nnodes(dg))
+    #                                              for _ in 1:Threads.nthreads()]
+    #     fhat_nonconservative_temp_threaded = A4d[A4d(undef, nvariables(equations),
+    #                                                  n_nonconservative_terms(volume_flux_noncons),
+    #                                                  nnodes(dg), nnodes(dg))
+    #                                              for _ in 1:Threads.nthreads()]
+    #     phi_threaded = A4d[A4d(undef, nvariables(equations),
+    #                            n_nonconservative_terms(volume_flux_noncons),
+    #                            nnodes(dg), nnodes(dg))
+    #                        for _ in 1:Threads.nthreads()]
+    #     cache = (; cache..., flux_nonconservative_temp_threaded,
+    #              fhat_nonconservative_temp_threaded, phi_threaded)
+    # end
+
+    return (; cache..., antidiffusive_fluxes,
+            fhat1_L_threaded, fhat1_R_threaded, fhat2_L_threaded, fhat2_R_threaded,
+            fhat3_L_threaded, fhat3_R_threaded, flux_temp_threaded, fhat_temp_threaded)
+end
+
+@inline function subcell_limiting_kernel!(du, u, element,
+                                          mesh::P4estMesh{3},
+                                          nonconservative_terms, equations,
+                                          volume_integral, limiter::SubcellLimiterIDP,
+                                          dg::DGSEM, cache)
+    @unpack inverse_weights = dg.basis
+    @unpack volume_flux_dg, volume_flux_fv = volume_integral
+
+    # high-order DG fluxes
+    @unpack fhat1_L_threaded, fhat1_R_threaded, fhat2_L_threaded, fhat2_R_threaded, fhat3_L_threaded, fhat3_R_threaded = cache
+
+    fhat1_L = fhat1_L_threaded[Threads.threadid()]
+    fhat1_R = fhat1_R_threaded[Threads.threadid()]
+    fhat2_L = fhat2_L_threaded[Threads.threadid()]
+    fhat2_R = fhat2_R_threaded[Threads.threadid()]
+    fhat3_L = fhat3_L_threaded[Threads.threadid()]
+    fhat3_R = fhat3_R_threaded[Threads.threadid()]
+    calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, u, mesh,
+                   nonconservative_terms, equations, volume_flux_dg, dg, element,
+                   cache)
+
+    # low-order FV fluxes
+    @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded = cache
+
+    fstar1_L = fstar1_L_threaded[Threads.threadid()]
+    fstar1_R = fstar1_R_threaded[Threads.threadid()]
+    fstar2_L = fstar2_L_threaded[Threads.threadid()]
+    fstar2_R = fstar2_R_threaded[Threads.threadid()]
+    fstar3_L = fstar3_L_threaded[Threads.threadid()]
+    fstar3_R = fstar3_R_threaded[Threads.threadid()]
+    calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u, mesh,
+                 nonconservative_terms, equations, volume_flux_fv, dg, element,
+                 cache)
+
+    # antidiffusive flux
+    calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R,
+                            fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R,
+                            u, mesh, nonconservative_terms, equations, limiter, dg,
+                            element, cache)
+
+    # Calculate volume integral contribution of low-order FV flux
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        for v in eachvariable(equations)
+            du[v, i, j, k, element] += inverse_weights[i] *
+                                       (fstar1_L[v, i + 1, j, k] - fstar1_R[v, i, j, k]) +
+                                       inverse_weights[j] *
+                                       (fstar2_L[v, i, j + 1, k] - fstar2_R[v, i, j, k]) +
+                                       inverse_weights[k] *
+                                       (fstar3_L[v, i, j, k + 1] - fstar3_R[v, i, j, k])
+        end
+    end
+
+    return nothing
+end
+
+# Calculate the antidiffusive flux `antidiffusive_flux` as the subtraction between `fhat` and `fstar` for conservative systems.
+@inline function calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R,
+                                         fhat3_L, fhat3_R,
+                                         fstar1_L, fstar1_R, fstar2_L, fstar2_R,
+                                         fstar3_L, fstar3_R,
+                                         u, mesh::P4estMesh{3},
+                                         nonconservative_terms::False, equations,
+                                         limiter::SubcellLimiterIDP, dg, element, cache)
+    @unpack antidiffusive_flux1_L, antidiffusive_flux1_R, antidiffusive_flux2_L, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R = cache.antidiffusive_fluxes
+
+    for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg)
+        for v in eachvariable(equations)
+            antidiffusive_flux1_L[v, i, j, k, element] = fhat1_L[v, i, j, k] -
+                                                         fstar1_L[v, i, j, k]
+            antidiffusive_flux1_R[v, i, j, k, element] = antidiffusive_flux1_L[v, i, j,
+                                                                               k,
+                                                                               element]
+        end
+    end
+    for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg)
+        for v in eachvariable(equations)
+            antidiffusive_flux2_L[v, i, j, k, element] = fhat2_L[v, i, j, k] -
+                                                         fstar2_L[v, i, j, k]
+            antidiffusive_flux2_R[v, i, j, k, element] = antidiffusive_flux2_L[v, i, j,
+                                                                               k,
+                                                                               element]
+        end
+    end
+    for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg)
+        for v in eachvariable(equations)
+            antidiffusive_flux3_L[v, i, j, k, element] = fhat3_L[v, i, j, k] -
+                                                         fstar3_L[v, i, j, k]
+            antidiffusive_flux3_R[v, i, j, k, element] = antidiffusive_flux3_L[v, i, j,
+                                                                               k,
+                                                                               element]
+        end
+    end
+
+    antidiffusive_flux1_L[:, 1, :, :, element] .= zero(eltype(antidiffusive_flux1_L))
+    antidiffusive_flux1_L[:, nnodes(dg) + 1, :, :, element] .= zero(eltype(antidiffusive_flux1_L))
+    antidiffusive_flux1_R[:, 1, :, :, element] .= zero(eltype(antidiffusive_flux1_R))
+    antidiffusive_flux1_R[:, nnodes(dg) + 1, :, :, element] .= zero(eltype(antidiffusive_flux1_R))
+
+    antidiffusive_flux2_L[:, :, 1, :, element] .= zero(eltype(antidiffusive_flux2_L))
+    antidiffusive_flux2_L[:, :, nnodes(dg) + 1, :, element] .= zero(eltype(antidiffusive_flux2_L))
+    antidiffusive_flux2_R[:, :, 1, :, element] .= zero(eltype(antidiffusive_flux2_R))
+    antidiffusive_flux2_R[:, :, nnodes(dg) + 1, :, element] .= zero(eltype(antidiffusive_flux2_R))
+
+    antidiffusive_flux3_L[:, :, :, 1, element] .= zero(eltype(antidiffusive_flux3_L))
+    antidiffusive_flux3_L[:, :, :, nnodes(dg) + 1, element] .= zero(eltype(antidiffusive_flux3_L))
+    antidiffusive_flux3_R[:, :, :, 1, element] .= zero(eltype(antidiffusive_flux3_R))
+    antidiffusive_flux3_R[:, :, :, nnodes(dg) + 1, element] .= zero(eltype(antidiffusive_flux3_R))
+
+    return nothing
+end
+
+# TODO: 2d version for now.
+# # Calculate the antidiffusive flux `antidiffusive_flux` as the subtraction between `fhat` and `fstar` for conservative systems.
+# @inline function calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R,
+#                                          fstar1_L, fstar1_R, fstar2_L, fstar2_R,
+#                                          u,
+#                                          mesh::Union{TreeMesh{2}, StructuredMesh{2},
+#                                                      P4estMesh{2}},
+#                                          nonconservative_terms::True, equations,
+#                                          limiter::SubcellLimiterIDP, dg, element, cache)
+#     @unpack antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R = cache.antidiffusive_fluxes
+
+#     for j in eachnode(dg), i in 2:nnodes(dg)
+#         for v in eachvariable(equations)
+#             antidiffusive_flux1_L[v, i, j, element] = fhat1_L[v, i, j] -
+#                                                       fstar1_L[v, i, j]
+#             antidiffusive_flux1_R[v, i, j, element] = fhat1_R[v, i, j] -
+#                                                       fstar1_R[v, i, j]
+#         end
+#     end
+#     for j in 2:nnodes(dg), i in eachnode(dg)
+#         for v in eachvariable(equations)
+#             antidiffusive_flux2_L[v, i, j, element] = fhat2_L[v, i, j] -
+#                                                       fstar2_L[v, i, j]
+#             antidiffusive_flux2_R[v, i, j, element] = fhat2_R[v, i, j] -
+#                                                       fstar2_R[v, i, j]
+#         end
+#     end
+
+#     antidiffusive_flux1_L[:, 1, :, element] .= zero(eltype(antidiffusive_flux1_L))
+#     antidiffusive_flux1_L[:, nnodes(dg) + 1, :, element] .= zero(eltype(antidiffusive_flux1_L))
+#     antidiffusive_flux1_R[:, 1, :, element] .= zero(eltype(antidiffusive_flux1_R))
+#     antidiffusive_flux1_R[:, nnodes(dg) + 1, :, element] .= zero(eltype(antidiffusive_flux1_R))
+
+#     antidiffusive_flux2_L[:, :, 1, element] .= zero(eltype(antidiffusive_flux2_L))
+#     antidiffusive_flux2_L[:, :, nnodes(dg) + 1, element] .= zero(eltype(antidiffusive_flux2_L))
+#     antidiffusive_flux2_R[:, :, 1, element] .= zero(eltype(antidiffusive_flux2_R))
+#     antidiffusive_flux2_R[:, :, nnodes(dg) + 1, element] .= zero(eltype(antidiffusive_flux2_R))
+
+#     return nothing
+# end
+end # @muladd
diff --git a/src/solvers/dgsem_tree/subcell_limiters_2d.jl b/src/solvers/dgsem_tree/subcell_limiters_2d.jl
index 539405f0a1e..40022f171e5 100644
--- a/src/solvers/dgsem_tree/subcell_limiters_2d.jl
+++ b/src/solvers/dgsem_tree/subcell_limiters_2d.jl
@@ -72,7 +72,7 @@ end
 # Calculation of local bounds using low-order FV solution
 
 @inline function calc_bounds_twosided!(var_min, var_max, variable,
-                                       u, t, semi, equations)
+                                       u::AbstractArray{<:Any, 4}, t, semi, equations)
     mesh, _, dg, cache = mesh_equations_solver_cache(semi)
     # Calc bounds inside elements
     @threaded for element in eachelement(dg, cache)
@@ -176,7 +176,8 @@ end
     return nothing
 end
 
-@inline function calc_bounds_onesided!(var_minmax, min_or_max, variable, u, t, semi)
+@inline function calc_bounds_onesided!(var_minmax, min_or_max, variable,
+                                       u::AbstractArray{<:Any, 4}, t, semi)
     mesh, equations, dg, cache = mesh_equations_solver_cache(semi)
     # Calc bounds inside elements
     @threaded for element in eachelement(dg, cache)
@@ -287,6 +288,7 @@ end
 ###############################################################################
 # Local two-sided limiting of conservative variables
 
+# TODO: dimension independent implementation
 @inline function idp_local_twosided!(alpha, limiter, u, t, dt, semi)
     for variable in limiter.local_twosided_variables_cons
         idp_local_twosided!(alpha, limiter, u, t, dt, semi, variable)
@@ -295,7 +297,8 @@ end
     return nothing
 end
 
-@inline function idp_local_twosided!(alpha, limiter, u, t, dt, semi, variable)
+@inline function idp_local_twosided!(alpha, limiter, u::AbstractArray{<:Any, 4}, t, dt,
+                                     semi, variable)
     mesh, equations, dg, cache = mesh_equations_solver_cache(semi)
     (; antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R) = cache.antidiffusive_fluxes
     (; inverse_weights) = dg.basis
@@ -357,6 +360,7 @@ end
 ##############################################################################
 # Local one-sided limiting of nonlinear variables
 
+# TODO: dimension independent implementation
 @inline function idp_local_onesided!(alpha, limiter, u, t, dt, semi)
     for (variable, min_or_max) in limiter.local_onesided_variables_nonlinear
         idp_local_onesided!(alpha, limiter, u, t, dt, semi, variable, min_or_max)
@@ -365,8 +369,8 @@ end
     return nothing
 end
 
-@inline function idp_local_onesided!(alpha, limiter, u, t, dt, semi,
-                                     variable, min_or_max)
+@inline function idp_local_onesided!(alpha, limiter, u::AbstractArray{<:Real, 4}, t, dt,
+                                     semi, variable, min_or_max)
     mesh, equations, dg, cache = mesh_equations_solver_cache(semi)
     (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
     var_minmax = variable_bounds[Symbol(string(variable), "_", string(min_or_max))]
@@ -392,6 +396,7 @@ end
 ###############################################################################
 # Global positivity limiting
 
+# TODO: dimension independent implementation
 @inline function idp_positivity!(alpha, limiter, u, dt, semi)
     # Conservative variables
     for variable in limiter.positivity_variables_cons
@@ -418,7 +423,9 @@ end
 ###############################################################################
 # Global positivity limiting of conservative variables
 
-@inline function idp_positivity_conservative!(alpha, limiter, u, dt, semi, variable)
+@inline function idp_positivity_conservative!(alpha, limiter,
+                                              u::AbstractArray{<:Real, 4}, dt, semi,
+                                              variable)
     mesh, _, dg, cache = mesh_equations_solver_cache(semi)
     (; antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R) = cache.antidiffusive_fluxes
     (; inverse_weights) = dg.basis
@@ -483,7 +490,8 @@ end
 ###############################################################################
 # Global positivity limiting of nonlinear variables
 
-@inline function idp_positivity_nonlinear!(alpha, limiter, u, dt, semi, variable)
+@inline function idp_positivity_nonlinear!(alpha, limiter, u::AbstractArray{<:Real, 4},
+                                           dt, semi, variable)
     mesh, equations, dg, cache = mesh_equations_solver_cache(semi)
     (; positivity_correction_factor) = limiter
 
@@ -530,7 +538,7 @@ end
     antidiffusive_flux = gamma_constant_newton * inverse_jacobian * inverse_weights[i] *
                          get_node_vars(antidiffusive_flux1_R, equations, dg, i, j,
                                        element)
-    newton_loop!(alpha, bound, u, i, j, element, variable, min_or_max, initial_check,
+    newton_loop!(alpha, bound, u, (i, j, element), variable, min_or_max, initial_check,
                  final_check, equations, dt, limiter, antidiffusive_flux)
 
     # positive xi direction
@@ -538,14 +546,14 @@ end
                          inverse_weights[i] *
                          get_node_vars(antidiffusive_flux1_L, equations, dg, i + 1, j,
                                        element)
-    newton_loop!(alpha, bound, u, i, j, element, variable, min_or_max, initial_check,
+    newton_loop!(alpha, bound, u, (i, j, element), variable, min_or_max, initial_check,
                  final_check, equations, dt, limiter, antidiffusive_flux)
 
     # negative eta direction
     antidiffusive_flux = gamma_constant_newton * inverse_jacobian * inverse_weights[j] *
                          get_node_vars(antidiffusive_flux2_R, equations, dg, i, j,
                                        element)
-    newton_loop!(alpha, bound, u, i, j, element, variable, min_or_max, initial_check,
+    newton_loop!(alpha, bound, u, (i, j, element), variable, min_or_max, initial_check,
                  final_check, equations, dt, limiter, antidiffusive_flux)
 
     # positive eta direction
@@ -553,18 +561,19 @@ end
                          inverse_weights[j] *
                          get_node_vars(antidiffusive_flux2_L, equations, dg, i, j + 1,
                                        element)
-    newton_loop!(alpha, bound, u, i, j, element, variable, min_or_max, initial_check,
+    newton_loop!(alpha, bound, u, (i, j, element), variable, min_or_max, initial_check,
                  final_check, equations, dt, limiter, antidiffusive_flux)
 
     return nothing
 end
 
-@inline function newton_loop!(alpha, bound, u, i, j, element, variable, min_or_max,
+# TODO: dimension independent implementation
+@inline function newton_loop!(alpha, bound, u, indices, variable, min_or_max,
                               initial_check, final_check, equations, dt, limiter,
                               antidiffusive_flux)
     newton_reltol, newton_abstol = limiter.newton_tolerances
 
-    beta = 1 - alpha[i, j, element]
+    beta = 1 - alpha[indices...]
 
     beta_L = 0 # alpha = 1
     beta_R = beta # No higher beta (lower alpha) than the current one
@@ -643,13 +652,14 @@ end
     end
 
     new_alpha = 1 - beta
-    alpha[i, j, element] = new_alpha
+    alpha[indices...] = new_alpha
 
     return nothing
 end
 
 ### Auxiliary routines for Newton's bisection method ###
 # Initial checks
+# TODO: This is all dimensioon independent. So, move to a subcell_limiter.jl.
 @inline function initial_check_local_onesided_newton_idp(::typeof(min), bound,
                                                          goal, newton_abstol)
     goal <= max(newton_abstol, abs(bound) * newton_abstol)
diff --git a/src/solvers/dgsem_tree/subcell_limiters_3d.jl b/src/solvers/dgsem_tree/subcell_limiters_3d.jl
new file mode 100644
index 00000000000..e246513564c
--- /dev/null
+++ b/src/solvers/dgsem_tree/subcell_limiters_3d.jl
@@ -0,0 +1,586 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+###############################################################################
+# IDP Limiting
+###############################################################################
+
+# this method is used when the limiter is constructed as for shock-capturing volume integrals
+function create_cache(limiter::Type{SubcellLimiterIDP}, equations::AbstractEquations{3},
+                      basis::LobattoLegendreBasis, bound_keys)
+    subcell_limiter_coefficients = Trixi.ContainerSubcellLimiterIDP3D{real(basis)}(0,
+                                                                                   nnodes(basis),
+                                                                                   bound_keys)
+
+    # Memory for bounds checking routine with `BoundsCheckCallback`.
+    # Local variable contains the maximum deviation since the last export.
+    idp_bounds_delta_local = Dict{Symbol, real(basis)}()
+    # Global variable contains the total maximum deviation.
+    idp_bounds_delta_global = Dict{Symbol, real(basis)}()
+    for key in bound_keys
+        idp_bounds_delta_local[key] = zero(real(basis))
+        idp_bounds_delta_global[key] = zero(real(basis))
+    end
+
+    return (; subcell_limiter_coefficients, idp_bounds_delta_local,
+            idp_bounds_delta_global)
+end
+
+function (limiter::SubcellLimiterIDP)(u::AbstractArray{<:Any, 5},
+                                      semi, equations, dg::DGSEM,
+                                      t, dt;
+                                      kwargs...)
+    @unpack alpha = limiter.cache.subcell_limiter_coefficients
+    # TODO: Do not abuse `reset_du!` but maybe implement a generic `set_zero!`
+    @trixi_timeit timer() "reset alpha" reset_du!(alpha, dg, semi.cache)
+
+    if limiter.local_twosided
+        @trixi_timeit timer() "local twosided" idp_local_twosided!(alpha, limiter,
+                                                                   u, t, dt, semi)
+    end
+    if limiter.positivity
+        @trixi_timeit timer() "positivity" idp_positivity!(alpha, limiter, u, dt, semi)
+    end
+    if limiter.local_onesided
+        @trixi_timeit timer() "local onesided" idp_local_onesided!(alpha, limiter,
+                                                                   u, t, dt, semi)
+    end
+
+    # Calculate alpha1, alpha2 and alpha3
+    @unpack alpha1, alpha2, alpha3 = limiter.cache.subcell_limiter_coefficients
+    @threaded for element in eachelement(dg, semi.cache)
+        for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg)
+            alpha1[i, j, k, element] = max(alpha[i - 1, j, k, element],
+                                           alpha[i, j, k, element])
+        end
+        for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg)
+            alpha2[i, j, k, element] = max(alpha[i, j - 1, k, element],
+                                           alpha[i, j, k, element])
+        end
+        for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg)
+            alpha3[i, j, k, element] = max(alpha[i, j, k - 1, element],
+                                           alpha[i, j, k, element])
+        end
+        alpha1[1, :, :, element] .= zero(eltype(alpha1))
+        alpha1[nnodes(dg) + 1, :, :, element] .= zero(eltype(alpha1))
+        alpha2[:, 1, :, element] .= zero(eltype(alpha2))
+        alpha2[:, nnodes(dg) + 1, :, element] .= zero(eltype(alpha2))
+        alpha3[:, :, 1, element] .= zero(eltype(alpha3))
+        alpha3[:, :, nnodes(dg) + 1, element] .= zero(eltype(alpha3))
+    end
+
+    return nothing
+end
+
+###############################################################################
+# Calculation of local bounds using low-order FV solution
+
+@inline function calc_bounds_twosided!(var_min, var_max, variable,
+                                       u::AbstractArray{<:Any, 5}, t, semi, equations)
+    mesh, _, dg, cache = mesh_equations_solver_cache(semi)
+    # Calc bounds inside elements
+    @threaded for element in eachelement(dg, cache)
+        var_min[:, :, :, element] .= typemax(eltype(var_min))
+        var_max[:, :, :, element] .= typemin(eltype(var_max))
+        # Calculate bounds at Gauss-Lobatto nodes using u
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            var = u[variable, i, j, k, element]
+            var_min[i, j, k, element] = min(var_min[i, j, k, element], var)
+            var_max[i, j, k, element] = max(var_max[i, j, k, element], var)
+
+            if i > 1
+                var_min[i - 1, j, k, element] = min(var_min[i - 1, j, k, element], var)
+                var_max[i - 1, j, k, element] = max(var_max[i - 1, j, k, element], var)
+            end
+            if i < nnodes(dg)
+                var_min[i + 1, j, k, element] = min(var_min[i + 1, j, k, element], var)
+                var_max[i + 1, j, k, element] = max(var_max[i + 1, j, k, element], var)
+            end
+            if j > 1
+                var_min[i, j - 1, k, element] = min(var_min[i, j - 1, k, element], var)
+                var_max[i, j - 1, k, element] = max(var_max[i, j - 1, k, element], var)
+            end
+            if j < nnodes(dg)
+                var_min[i, j + 1, k, element] = min(var_min[i, j + 1, k, element], var)
+                var_max[i, j + 1, k, element] = max(var_max[i, j + 1, k, element], var)
+            end
+            if k > 1
+                var_min[i, j, k - 1, element] = min(var_min[i, j, k - 1, element], var)
+                var_max[i, j, k - 1, element] = max(var_max[i, j, k - 1, element], var)
+            end
+            if k < nnodes(dg)
+                var_min[i, j, k + 1, element] = min(var_min[i, j, k + 1, element], var)
+                var_max[i, j, k + 1, element] = max(var_max[i, j, k + 1, element], var)
+            end
+        end
+    end
+
+    # Values at element boundary
+    calc_bounds_twosided_interface!(var_min, var_max, variable,
+                                    u, t, semi, mesh, equations)
+    return nothing
+end
+
+# @inline function calc_bounds_twosided_interface!(var_min, var_max, variable,
+#                                                  u, t, semi, mesh::TreeMesh3D,
+#                                                  equations)
+#     _, _, dg, cache = mesh_equations_solver_cache(semi)
+#     (; boundary_conditions) = semi
+#     # Calc bounds at interfaces and periodic boundaries
+#     for interface in eachinterface(dg, cache)
+#         # Get neighboring element ids
+#         left = cache.interfaces.neighbor_ids[1, interface]
+#         right = cache.interfaces.neighbor_ids[2, interface]
+
+#         orientation = cache.interfaces.orientations[interface]
+
+#         for i in eachnode(dg)
+#             index_left = (nnodes(dg), i)
+#             index_right = (1, i)
+#             if orientation == 2
+#                 index_left = reverse(index_left)
+#                 index_right = reverse(index_right)
+#             end
+#             var_left = u[variable, index_left..., left]
+#             var_right = u[variable, index_right..., right]
+
+#             var_min[index_right..., right] = min(var_min[index_right..., right],
+#                                                  var_left)
+#             var_max[index_right..., right] = max(var_max[index_right..., right],
+#                                                  var_left)
+
+#             var_min[index_left..., left] = min(var_min[index_left..., left], var_right)
+#             var_max[index_left..., left] = max(var_max[index_left..., left], var_right)
+#         end
+#     end
+
+#     # Calc bounds at physical boundaries
+#     for boundary in eachboundary(dg, cache)
+#         element = cache.boundaries.neighbor_ids[boundary]
+#         orientation = cache.boundaries.orientations[boundary]
+#         neighbor_side = cache.boundaries.neighbor_sides[boundary]
+
+#         for i in eachnode(dg)
+#             if neighbor_side == 2 # Element is on the right, boundary on the left
+#                 index = (1, i)
+#                 boundary_index = 1
+#             else # Element is on the left, boundary on the right
+#                 index = (nnodes(dg), i)
+#                 boundary_index = 2
+#             end
+#             if orientation == 2
+#                 index = reverse(index)
+#                 boundary_index += 2
+#             end
+#             u_inner = get_node_vars(u, equations, dg, index..., element)
+#             u_outer = get_boundary_outer_state(u_inner, t,
+#                                                boundary_conditions[boundary_index],
+#                                                orientation, boundary_index,
+#                                                mesh, equations, dg, cache,
+#                                                index..., element)
+#             var_outer = u_outer[variable]
+
+#             var_min[index..., element] = min(var_min[index..., element], var_outer)
+#             var_max[index..., element] = max(var_max[index..., element], var_outer)
+#         end
+#     end
+
+#     return nothing
+# end
+
+@inline function calc_bounds_onesided!(var_minmax, min_or_max, variable,
+                                       u::AbstractArray{<:Any, 5}, t, semi)
+    mesh, equations, dg, cache = mesh_equations_solver_cache(semi)
+    # Calc bounds inside elements
+
+    @threaded for element in eachelement(dg, cache)
+        # Reset bounds
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            if min_or_max === max
+                var_minmax[i, j, k, element] = typemin(eltype(var_minmax))
+            else
+                var_minmax[i, j, k, element] = typemax(eltype(var_minmax))
+            end
+        end
+
+        # Calculate bounds at Gauss-Lobatto nodes using u
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            var = variable(get_node_vars(u, equations, dg, i, j, k, element), equations)
+            var_minmax[i, j, k, element] = min_or_max(var_minmax[i, j, k, element], var)
+
+            if i > 1
+                var_minmax[i - 1, j, k, element] = min_or_max(var_minmax[i - 1, j, k,
+                                                                         element], var)
+            end
+            if i < nnodes(dg)
+                var_minmax[i + 1, j, k, element] = min_or_max(var_minmax[i + 1, j, k,
+                                                                         element], var)
+            end
+            if j > 1
+                var_minmax[i, j - 1, k, element] = min_or_max(var_minmax[i, j - 1, k,
+                                                                         element], var)
+            end
+            if j < nnodes(dg)
+                var_minmax[i, j + 1, k, element] = min_or_max(var_minmax[i, j + 1, k,
+                                                                         element], var)
+            end
+            if k > 1
+                var_minmax[i, j, k - 1, element] = min_or_max(var_minmax[i, j, k - 1,
+                                                                         element], var)
+            end
+            if k < nnodes(dg)
+                var_minmax[i, j, k + 1, element] = min_or_max(var_minmax[i, j, k + 1,
+                                                                         element], var)
+            end
+        end
+    end
+
+    # Values at element boundary
+    calc_bounds_onesided_interface!(var_minmax, min_or_max, variable, u, t, semi, mesh)
+
+    return nothing
+end
+
+@inline function calc_bounds_onesided_interface!(var_minmax, min_or_max, variable, u, t,
+                                                 semi, mesh::TreeMesh3D)
+    _, equations, dg, cache = mesh_equations_solver_cache(semi)
+    (; boundary_conditions) = semi
+    error("TODO")
+    # Calc bounds at interfaces and periodic boundaries
+    for interface in eachinterface(dg, cache)
+        # Get neighboring element ids
+        left = cache.interfaces.neighbor_ids[1, interface]
+        right = cache.interfaces.neighbor_ids[2, interface]
+
+        orientation = cache.interfaces.orientations[interface]
+
+        for i in eachnode(dg)
+            index_left = (nnodes(dg), i)
+            index_right = (1, i)
+            if orientation == 2
+                index_left = reverse(index_left)
+                index_right = reverse(index_right)
+            end
+            var_left = variable(get_node_vars(u, equations, dg, index_left..., left),
+                                equations)
+            var_right = variable(get_node_vars(u, equations, dg, index_right..., right),
+                                 equations)
+
+            var_minmax[index_right..., right] = min_or_max(var_minmax[index_right...,
+                                                                      right], var_left)
+            var_minmax[index_left..., left] = min_or_max(var_minmax[index_left...,
+                                                                    left], var_right)
+        end
+    end
+
+    # Calc bounds at physical boundaries
+    for boundary in eachboundary(dg, cache)
+        element = cache.boundaries.neighbor_ids[boundary]
+        orientation = cache.boundaries.orientations[boundary]
+        neighbor_side = cache.boundaries.neighbor_sides[boundary]
+
+        for i in eachnode(dg)
+            if neighbor_side == 2 # Element is on the right, boundary on the left
+                index = (1, i)
+                boundary_index = 1
+            else # Element is on the left, boundary on the right
+                index = (nnodes(dg), i)
+                boundary_index = 2
+            end
+            if orientation == 2
+                index = reverse(index)
+                boundary_index += 2
+            end
+            u_inner = get_node_vars(u, equations, dg, index..., element)
+            u_outer = get_boundary_outer_state(u_inner, t,
+                                               boundary_conditions[boundary_index],
+                                               orientation, boundary_index,
+                                               mesh, equations, dg, cache,
+                                               index..., element)
+            var_outer = variable(u_outer, equations)
+
+            var_minmax[index..., element] = min_or_max(var_minmax[index..., element],
+                                                       var_outer)
+        end
+    end
+
+    return nothing
+end
+
+###############################################################################
+# Local two-sided limiting of conservative variables
+
+@inline function idp_local_twosided!(alpha, limiter, u::AbstractArray{<:Any, 5}, t, dt,
+                                     semi, variable)
+    mesh, equations, dg, cache = mesh_equations_solver_cache(semi)
+    (; antidiffusive_flux1_L, antidiffusive_flux1_R, antidiffusive_flux2_L, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R) = cache.antidiffusive_fluxes
+    (; inverse_weights) = dg.basis
+
+    (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
+    variable_string = string(variable)
+    var_min = variable_bounds[Symbol(variable_string, "_min")]
+    var_max = variable_bounds[Symbol(variable_string, "_max")]
+    calc_bounds_twosided!(var_min, var_max, variable, u, t, semi, equations)
+
+    @threaded for element in eachelement(dg, semi.cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            inverse_jacobian = get_inverse_jacobian(cache.elements.inverse_jacobian,
+                                                    mesh, i, j, k, element)
+            var = u[variable, i, j, k, element]
+            # Real Zalesak type limiter
+            #   * Zalesak (1979). "Fully multidimensional flux-corrected transport algorithms for fluids"
+            #   * Kuzmin et al. (2010). "Failsafe flux limiting and constrained data projections for equations of gas dynamics"
+            #   Note: The Zalesak limiter has to be computed, even if the state is valid, because the correction is
+            #         for each interface, not each node
+
+            Qp = max(0, (var_max[i, j, k, element] - var) / dt)
+            Qm = min(0, (var_min[i, j, k, element] - var) / dt)
+
+            # Calculate Pp and Pm
+            # Note: Boundaries of antidiffusive_flux1/2 are constant 0, so they make no difference here.
+            val_flux1_local = inverse_weights[i] *
+                              antidiffusive_flux1_R[variable, i, j, k, element]
+            val_flux1_local_ip1 = -inverse_weights[i] *
+                                  antidiffusive_flux1_L[variable, i + 1, j, k, element]
+            val_flux2_local = inverse_weights[j] *
+                              antidiffusive_flux2_R[variable, i, j, k, element]
+            val_flux2_local_jp1 = -inverse_weights[j] *
+                                  antidiffusive_flux2_L[variable, i, j + 1, k, element]
+            val_flux3_local = inverse_weights[k] *
+                              antidiffusive_flux3_R[variable, i, j, k, element]
+            val_flux3_local_jp1 = -inverse_weights[k] *
+                                  antidiffusive_flux3_L[variable, i, j, k + 1, element]
+
+            Pp = max(0, val_flux1_local) + max(0, val_flux1_local_ip1) +
+                 max(0, val_flux2_local) + max(0, val_flux2_local_jp1) +
+                 max(0, val_flux3_local) + max(0, val_flux3_local_jp1)
+            Pm = min(0, val_flux1_local) + min(0, val_flux1_local_ip1) +
+                 min(0, val_flux2_local) + min(0, val_flux2_local_jp1) +
+                 min(0, val_flux3_local) + min(0, val_flux3_local_jp1)
+
+            Pp = inverse_jacobian * Pp
+            Pm = inverse_jacobian * Pm
+
+            # Compute blending coefficient avoiding division by zero
+            # (as in paper of [Guermond, Nazarov, Popov, Thomas] (4.8))
+            Qp = abs(Qp) /
+                 (abs(Pp) + eps(typeof(Qp)) * 100 * abs(var_max[i, j, k, element]))
+            Qm = abs(Qm) /
+                 (abs(Pm) + eps(typeof(Qm)) * 100 * abs(var_max[i, j, k, element]))
+
+            # Calculate alpha at nodes
+            alpha[i, j, k, element] = max(alpha[i, j, k, element], 1 - min(1, Qp, Qm))
+        end
+    end
+
+    return nothing
+end
+
+##############################################################################
+# Local one-sided limiting of nonlinear variables
+
+@inline function idp_local_onesided!(alpha, limiter, u::AbstractArray{<:Real, 5}, t, dt,
+                                     semi,
+                                     variable, min_or_max)
+    mesh, equations, dg, cache = mesh_equations_solver_cache(semi)
+    (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
+    var_minmax = variable_bounds[Symbol(string(variable), "_", string(min_or_max))]
+    calc_bounds_onesided!(var_minmax, min_or_max, variable, u, t, semi)
+
+    # Perform Newton's bisection method to find new alpha
+    @threaded for element in eachelement(dg, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            inverse_jacobian = get_inverse_jacobian(cache.elements.inverse_jacobian,
+                                                    mesh, i, j, k, element)
+            u_local = get_node_vars(u, equations, dg, i, j, k, element)
+            newton_loops_alpha!(alpha, var_minmax[i, j, k, element], u_local,
+                                i, j, k, element, variable, min_or_max,
+                                initial_check_local_onesided_newton_idp,
+                                final_check_local_onesided_newton_idp, inverse_jacobian,
+                                dt, equations, dg, cache, limiter)
+        end
+    end
+
+    return nothing
+end
+
+###############################################################################
+# Global positivity limiting of conservative variables
+
+@inline function idp_positivity_conservative!(alpha, limiter,
+                                              u::AbstractArray{<:Real, 5}, dt, semi,
+                                              variable)
+    mesh, _, dg, cache = mesh_equations_solver_cache(semi)
+    (; antidiffusive_flux1_L, antidiffusive_flux1_R, antidiffusive_flux2_L, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R) = cache.antidiffusive_fluxes
+    (; inverse_weights) = dg.basis
+    (; positivity_correction_factor) = limiter
+
+    (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
+    var_min = variable_bounds[Symbol(string(variable), "_min")]
+
+    @threaded for element in eachelement(dg, semi.cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            inverse_jacobian = get_inverse_jacobian(cache.elements.inverse_jacobian,
+                                                    mesh, i, j, k, element)
+            var = u[variable, i, j, k, element]
+            if var < 0
+                error("Safe low-order method produces negative value for conservative variable $variable. Try a smaller time step.")
+            end
+
+            # Compute bound
+            if limiter.local_twosided &&
+               variable in limiter.local_twosided_variables_cons &&
+               var_min[i, j, k, element] >= positivity_correction_factor * var
+                # Local limiting is more restrictive that positivity limiting
+                # => Skip positivity limiting for this node
+                continue
+            end
+            var_min[i, j, k, element] = positivity_correction_factor * var
+
+            # Real one-sided Zalesak-type limiter
+            # * Zalesak (1979). "Fully multidimensional flux-corrected transport algorithms for fluids"
+            # * Kuzmin et al. (2010). "Failsafe flux limiting and constrained data projections for equations of gas dynamics"
+            # Note: The Zalesak limiter has to be computed, even if the state is valid, because the correction is
+            #       for each interface, not each node
+            Qm = min(0, (var_min[i, j, k, element] - var) / dt)
+
+            # Calculate Pm
+            # Note: Boundaries of antidiffusive_flux1/2 are constant 0, so they make no difference here.
+            val_flux1_local = inverse_weights[i] *
+                              antidiffusive_flux1_R[variable, i, j, k, element]
+            val_flux1_local_ip1 = -inverse_weights[i] *
+                                  antidiffusive_flux1_L[variable, i + 1, j, k, element]
+            val_flux2_local = inverse_weights[j] *
+                              antidiffusive_flux2_R[variable, i, j, k, element]
+            val_flux2_local_jp1 = -inverse_weights[j] *
+                                  antidiffusive_flux2_L[variable, i, j + 1, k, element]
+            val_flux3_local = inverse_weights[k] *
+                              antidiffusive_flux3_R[variable, i, j, k, element]
+            val_flux3_local_jp1 = -inverse_weights[k] *
+                                  antidiffusive_flux3_L[variable, i, j, k + 1, element]
+
+            Pm = min(0, val_flux1_local) + min(0, val_flux1_local_ip1) +
+                 min(0, val_flux2_local) + min(0, val_flux2_local_jp1) +
+                 min(0, val_flux3_local) + min(0, val_flux3_local_jp1)
+            Pm = inverse_jacobian * Pm
+
+            # Compute blending coefficient avoiding division by zero
+            # (as in paper of [Guermond, Nazarov, Popov, Thomas] (4.8))
+            Qm = abs(Qm) / (abs(Pm) + eps(typeof(Qm)) * 100)
+
+            # Calculate alpha
+            alpha[i, j, k, element] = max(alpha[i, j, k, element], 1 - Qm)
+        end
+    end
+
+    return nothing
+end
+
+###############################################################################
+# Global positivity limiting of nonlinear variables
+
+@inline function idp_positivity_nonlinear!(alpha, limiter, u::AbstractArray{<:Real, 5},
+                                           dt, semi, variable)
+    mesh, equations, dg, cache = mesh_equations_solver_cache(semi)
+    (; positivity_correction_factor) = limiter
+
+    (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
+    var_min = variable_bounds[Symbol(string(variable), "_min")]
+
+    @threaded for element in eachelement(dg, semi.cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            inverse_jacobian = get_inverse_jacobian(cache.elements.inverse_jacobian,
+                                                    mesh, i, j, k, element)
+
+            # Compute bound
+            u_local = get_node_vars(u, equations, dg, i, j, k, element)
+            var = variable(u_local, equations)
+            if var < 0
+                error("Safe low-order method produces negative value for variable $variable. Try a smaller time step.")
+            end
+            var_min[i, j, k, element] = positivity_correction_factor * var
+
+            # Perform Newton's bisection method to find new alpha
+            newton_loops_alpha!(alpha, var_min[i, j, k, element], u_local, i, j, k,
+                                element,
+                                variable, min, initial_check_nonnegative_newton_idp,
+                                final_check_nonnegative_newton_idp, inverse_jacobian,
+                                dt, equations, dg, cache, limiter)
+        end
+    end
+
+    return nothing
+end
+
+###############################################################################
+# Newton-bisection method
+
+@inline function newton_loops_alpha!(alpha, bound, u, i, j, k, element, variable,
+                                     min_or_max, initial_check, final_check,
+                                     inverse_jacobian, dt, equations, dg, cache,
+                                     limiter)
+    (; inverse_weights) = dg.basis
+    (; antidiffusive_flux1_L, antidiffusive_flux1_R, antidiffusive_flux2_L, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R) = cache.antidiffusive_fluxes
+
+    (; gamma_constant_newton) = limiter
+
+    # negative xi direction
+    antidiffusive_flux = gamma_constant_newton * inverse_jacobian * inverse_weights[i] *
+                         get_node_vars(antidiffusive_flux1_R, equations, dg, i, j, k,
+                                       element)
+    newton_loop!(alpha, bound, u, (i, j, k, element), variable, min_or_max,
+                 initial_check,
+                 final_check, equations, dt, limiter, antidiffusive_flux)
+
+    # positive xi direction
+    antidiffusive_flux = -gamma_constant_newton * inverse_jacobian *
+                         inverse_weights[i] *
+                         get_node_vars(antidiffusive_flux1_L, equations, dg, i + 1, j,
+                                       k,
+                                       element)
+    newton_loop!(alpha, bound, u, (i, j, k, element), variable, min_or_max,
+                 initial_check,
+                 final_check, equations, dt, limiter, antidiffusive_flux)
+
+    # negative eta direction
+    antidiffusive_flux = gamma_constant_newton * inverse_jacobian * inverse_weights[j] *
+                         get_node_vars(antidiffusive_flux2_R, equations, dg, i, j, k,
+                                       element)
+    newton_loop!(alpha, bound, u, (i, j, k, element), variable, min_or_max,
+                 initial_check,
+                 final_check, equations, dt, limiter, antidiffusive_flux)
+
+    # positive eta direction
+    antidiffusive_flux = -gamma_constant_newton * inverse_jacobian *
+                         inverse_weights[j] *
+                         get_node_vars(antidiffusive_flux2_L, equations, dg, i, j + 1,
+                                       k,
+                                       element)
+    newton_loop!(alpha, bound, u, (i, j, k, element), variable, min_or_max,
+                 initial_check,
+                 final_check, equations, dt, limiter, antidiffusive_flux)
+
+    # negative zeta direction
+    antidiffusive_flux = gamma_constant_newton * inverse_jacobian * inverse_weights[j] *
+                         get_node_vars(antidiffusive_flux3_R, equations, dg, i, j, k,
+                                       element)
+    newton_loop!(alpha, bound, u, (i, j, k, element), variable, min_or_max,
+                 initial_check,
+                 final_check, equations, dt, limiter, antidiffusive_flux)
+
+    # positive zeta direction
+    antidiffusive_flux = -gamma_constant_newton * inverse_jacobian *
+                         inverse_weights[j] *
+                         get_node_vars(antidiffusive_flux3_L, equations, dg, i, j,
+                                       k + 1,
+                                       element)
+    newton_loop!(alpha, bound, u, (i, j, k, element), variable, min_or_max,
+                 initial_check,
+                 final_check, equations, dt, limiter, antidiffusive_flux)
+    return nothing
+end
+end # @muladd
diff --git a/test/test_p4est_3d.jl b/test/test_p4est_3d.jl
index fbca9fc9091..732bca13230 100644
--- a/test/test_p4est_3d.jl
+++ b/test/test_p4est_3d.jl
@@ -409,6 +409,71 @@ end
     end
 end
 
+@trixi_testset "elixir_euler_sedov_sc_subcell (positivity bounds).jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov_sc_subcell.jl"),
+                        l2=[
+                            0.20192400360203736,
+                            0.07695890787669382,
+                            0.07710756354791536,
+                            0.07941551494813669,
+                            0.37221345244744125
+                        ],
+                        linf=[
+                            1.5810218765306052,
+                            1.1429216064031185,
+                            1.225663052060263,
+                            1.3174846939209266,
+                            4.941059740343397
+                        ],
+                        tspan=(0.0, 0.3),)
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        # Larger values for allowed allocations due to usage of custom
+        # integrator which are not *recorded* for the methods from
+        # OrdinaryDiffEq.jl
+        # Corresponding issue: https://github.com/trixi-framework/Trixi.jl/issues/1877
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 15000
+    end
+end
+
+@trixi_testset "elixir_euler_sedov_sc_subcell.jl (local bounds)" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov_sc_subcell.jl"),
+                        local_twosided_variables_cons=["rho"],
+                        local_onesided_variables_nonlinear=[(Trixi.entropy_guermond_etal,
+                                                             min)],
+                        l2=[
+                            0.16348251452644771,
+                            0.06401049853623296,
+                            0.06395702968703486,
+                            0.06474601071653036,
+                            0.3619869209395113
+                        ],
+                        linf=[
+                            0.9035169696663888,
+                            0.5570207276440609,
+                            0.5709859402279465,
+                            0.5827205521982685,
+                            4.778582414871622
+                        ],
+                        tspan=(0.0, 0.3),)
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        # Larger values for allowed allocations due to usage of custom
+        # integrator which are not *recorded* for the methods from
+        # OrdinaryDiffEq.jl
+        # Corresponding issue: https://github.com/trixi-framework/Trixi.jl/issues/1877
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 15000
+    end
+end
+
 @trixi_testset "elixir_euler_sedov.jl (HLLE)" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov.jl"),
                         l2=[

From 9d75c6b891733aaf83cae82b744f7a9b51b39674 Mon Sep 17 00:00:00 2001
From: bennibolm <benjamin.bolm@gmx.de>
Date: Wed, 17 Sep 2025 17:21:14 +0200
Subject: [PATCH 02/18] Add support for non-periodic meshes

---
 ...e_terms_nonperiodic_hohqmesh_sc_subcell.jl |  75 ++++++
 .../dgsem_p4est/subcell_limiters_2d.jl        |   4 +-
 .../dgsem_p4est/subcell_limiters_3d.jl        | 250 ++++++++++--------
 .../dgsem_tree/dg_2d_subcell_limiters.jl      |  13 +
 test/test_p4est_3d.jl                         |  65 +++++
 5 files changed, 288 insertions(+), 119 deletions(-)
 create mode 100644 examples/p4est_3d_dgsem/elixir_euler_source_terms_nonperiodic_hohqmesh_sc_subcell.jl

diff --git a/examples/p4est_3d_dgsem/elixir_euler_source_terms_nonperiodic_hohqmesh_sc_subcell.jl b/examples/p4est_3d_dgsem/elixir_euler_source_terms_nonperiodic_hohqmesh_sc_subcell.jl
new file mode 100644
index 00000000000..b37c533840b
--- /dev/null
+++ b/examples/p4est_3d_dgsem/elixir_euler_source_terms_nonperiodic_hohqmesh_sc_subcell.jl
@@ -0,0 +1,75 @@
+using OrdinaryDiffEqLowStorageRK
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible Euler equations
+
+equations = CompressibleEulerEquations3D(1.4)
+
+initial_condition = initial_condition_convergence_test
+
+boundary_condition = BoundaryConditionDirichlet(initial_condition)
+boundary_conditions = Dict(:Bottom => boundary_condition,
+                           :Top => boundary_condition,
+                           :Circle => boundary_condition,
+                           :Cut => boundary_condition)
+
+surface_flux = flux_lax_friedrichs
+volume_flux = flux_ranocha
+polydeg = 4
+basis = LobattoLegendreBasis(polydeg)
+limiter_idp = SubcellLimiterIDP(equations, basis;
+                                positivity_variables_cons = ["rho"],
+                                positivity_variables_nonlinear = [pressure],
+                                local_twosided_variables_cons = [],
+                                local_onesided_variables_nonlinear = [])
+volume_integral = VolumeIntegralSubcellLimiting(limiter_idp;
+                                                volume_flux_dg = volume_flux,
+                                                volume_flux_fv = surface_flux)
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+# Unstructured 3D half circle mesh from HOHQMesh
+mesh_file = Trixi.download("https://gist.githubusercontent.com/andrewwinters5000/11461efbfb02c42e06aca338b3d0b645/raw/81deeb1ebc4945952c30af5bb75fe222a18d975c/abaqus_half_circle_3d.inp",
+                           joinpath(@__DIR__, "abaqus_half_circle_3d.inp"))
+
+mesh = P4estMesh{3}(mesh_file, initial_refinement_level = 0)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    source_terms = source_terms_convergence_test,
+                                    boundary_conditions = boundary_conditions)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
+                                     extra_analysis_integrals = (entropy,))
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+save_solution = SaveSolutionCallback(interval = 1,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim,
+                                     extra_node_variables = (:limiting_coefficient,))
+
+stepsize_callback = StepsizeCallback(cfl = 0.5)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, alive_callback,
+                        save_solution,
+                        stepsize_callback)
+
+###############################################################################
+# run the simulation
+
+stage_callbacks = (SubcellLimiterIDPCorrection(), BoundsCheckCallback())
+
+sol = Trixi.solve(ode, Trixi.SimpleSSPRK33(stage_callbacks = stage_callbacks);
+                  dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  ode_default_options()..., callback = callbacks);
diff --git a/src/solvers/dgsem_p4est/subcell_limiters_2d.jl b/src/solvers/dgsem_p4est/subcell_limiters_2d.jl
index d42b63789b1..3cbfcc143a4 100644
--- a/src/solvers/dgsem_p4est/subcell_limiters_2d.jl
+++ b/src/solvers/dgsem_p4est/subcell_limiters_2d.jl
@@ -78,7 +78,7 @@ end
 
 @inline function calc_bounds_twosided_boundary!(var_min, var_max, variable, u, t,
                                                 boundary_conditions::BoundaryConditionPeriodic,
-                                                mesh::P4estMesh{2},
+                                                mesh::Union{P4estMesh{2}, P4estMesh{3}},
                                                 equations, dg, cache)
     return nothing
 end
@@ -201,7 +201,7 @@ end
 
 @inline function calc_bounds_onesided_boundary!(var_minmax, minmax, variable, u, t,
                                                 boundary_conditions::BoundaryConditionPeriodic,
-                                                mesh::P4estMesh{2},
+                                                mesh::Union{P4estMesh{2}, P4estMesh{3}},
                                                 equations, dg, cache)
     return nothing
 end
diff --git a/src/solvers/dgsem_p4est/subcell_limiters_3d.jl b/src/solvers/dgsem_p4est/subcell_limiters_3d.jl
index 657efb02f5b..1f2d4fdd853 100644
--- a/src/solvers/dgsem_p4est/subcell_limiters_3d.jl
+++ b/src/solvers/dgsem_p4est/subcell_limiters_3d.jl
@@ -96,69 +96,79 @@ function calc_bounds_twosided_interface!(var_min, var_max, variable,
     end
 
     # Calc bounds at physical boundaries
-    # calc_bounds_twosided_boundary!(var_min, var_max, variable, u, t,
-    #                                boundary_conditions,
-    #                                mesh, equations, dg, cache)
+    calc_bounds_twosided_boundary!(var_min, var_max, variable, u, t,
+                                   boundary_conditions,
+                                   mesh, equations, dg, cache)
 
     return nothing
 end
 
-# @inline function calc_bounds_twosided_boundary!(var_min, var_max, variable, u, t,
-#                                                 boundary_conditions::BoundaryConditionPeriodic,
-#                                                 mesh::P4estMesh{2},
-#                                                 equations, dg, cache)
-#     return nothing
-# end
-
-# @inline function calc_bounds_twosided_boundary!(var_min, var_max, variable, u, t,
-#                                                 boundary_conditions,
-#                                                 mesh::P4estMesh{2},
-#                                                 equations, dg, cache)
-#     (; boundary_condition_types, boundary_indices) = boundary_conditions
-#     (; contravariant_vectors) = cache.elements
-
-#     (; boundaries) = cache
-#     index_range = eachnode(dg)
-
-#     foreach_enumerate(boundary_condition_types) do (i, boundary_condition)
-#         for boundary in boundary_indices[i]
-#             element = boundaries.neighbor_ids[boundary]
-#             node_indices = boundaries.node_indices[boundary]
-#             direction = indices2direction(node_indices)
-
-#             i_node_start, i_node_step = index_to_start_step_2d(node_indices[1],
-#                                                                index_range)
-#             j_node_start, j_node_step = index_to_start_step_2d(node_indices[2],
-#                                                                index_range)
-
-#             i_node = i_node_start
-#             j_node = j_node_start
-#             for i in eachnode(dg)
-#                 normal_direction = get_normal_direction(direction,
-#                                                         contravariant_vectors,
-#                                                         i_node, j_node, element)
-
-#                 u_inner = get_node_vars(u, equations, dg, i_node, j_node, element)
-
-#                 u_outer = get_boundary_outer_state(u_inner, t, boundary_condition,
-#                                                    normal_direction,
-#                                                    mesh, equations, dg, cache,
-#                                                    i_node, j_node, element)
-#                 var_outer = u_outer[variable]
-
-#                 var_min[i_node, j_node, element] = min(var_min[i_node, j_node, element],
-#                                                        var_outer)
-#                 var_max[i_node, j_node, element] = max(var_max[i_node, j_node, element],
-#                                                        var_outer)
-
-#                 i_node += i_node_step
-#                 j_node += j_node_step
-#             end
-#         end
-#     end
-
-#     return nothing
-# end
+@inline function calc_bounds_twosided_boundary!(var_min, var_max, variable, u, t,
+                                                boundary_conditions,
+                                                mesh::P4estMesh{3},
+                                                equations, dg, cache)
+    (; boundary_condition_types, boundary_indices) = boundary_conditions
+    (; contravariant_vectors) = cache.elements
+
+    (; boundaries) = cache
+    index_range = eachnode(dg)
+
+    foreach_enumerate(boundary_condition_types) do (i, boundary_condition)
+        for boundary in boundary_indices[i]
+            element = boundaries.neighbor_ids[boundary]
+            node_indices = boundaries.node_indices[boundary]
+            direction = indices2direction(node_indices)
+
+            i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1],
+                                                                                index_range)
+            j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2],
+                                                                                index_range)
+            k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3],
+                                                                                index_range)
+
+            i_node = i_node_start
+            j_node = j_node_start
+            k_node = k_node_start
+            for j in eachnode(dg)
+                for i in eachnode(dg)
+                    normal_direction = get_normal_direction(direction,
+                                                            contravariant_vectors,
+                                                            i_node, j_node, k_node,
+                                                            element)
+
+                    u_inner = get_node_vars(u, equations, dg, i_node, j_node, k_node,
+                                            element)
+
+                    u_outer = get_boundary_outer_state(u_inner, t, boundary_condition,
+                                                       normal_direction,
+                                                       mesh, equations, dg, cache,
+                                                       i_node, j_node, k_node, element)
+                    var_outer = u_outer[variable]
+
+                    var_min[i_node, j_node, k_node, element] = min(var_min[i_node,
+                                                                           j_node,
+                                                                           k_node,
+                                                                           element],
+                                                                   var_outer)
+                    var_max[i_node, j_node, k_node, element] = max(var_max[i_node,
+                                                                           j_node,
+                                                                           k_node,
+                                                                           element],
+                                                                   var_outer)
+
+                    i_node += i_node_step_i
+                    j_node += j_node_step_i
+                    k_node += k_node_step_i
+                end
+                i_node += i_node_step_j
+                j_node += j_node_step_j
+                k_node += k_node_step_j
+            end
+        end
+    end
+
+    return nothing
+end
 
 function calc_bounds_onesided_interface!(var_minmax, minmax, variable, u, t, semi,
                                          mesh::P4estMesh{3})
@@ -243,66 +253,72 @@ function calc_bounds_onesided_interface!(var_minmax, minmax, variable, u, t, sem
     end
 
     # Calc bounds at physical boundaries
-    # calc_bounds_onesided_boundary!(var_minmax, minmax, variable, u, t,
-    #                                boundary_conditions,
-    #                                mesh, equations, dg, cache)
+    calc_bounds_onesided_boundary!(var_minmax, minmax, variable, u, t,
+                                   boundary_conditions,
+                                   mesh, equations, dg, cache)
 
     return nothing
 end
 
-# @inline function calc_bounds_onesided_boundary!(var_minmax, minmax, variable, u, t,
-#                                                 boundary_conditions::BoundaryConditionPeriodic,
-#                                                 mesh::P4estMesh{3},
-#                                                 equations, dg, cache)
-#     return nothing
-# end
-
-# @inline function calc_bounds_onesided_boundary!(var_minmax, minmax, variable, u, t,
-#                                                 boundary_conditions,
-#                                                 mesh::P4estMesh{3},
-#                                                 equations, dg, cache)
-#     (; boundary_condition_types, boundary_indices) = boundary_conditions
-#     (; contravariant_vectors) = cache.elements
-
-#     (; boundaries) = cache
-#     index_range = eachnode(dg)
-
-#     foreach_enumerate(boundary_condition_types) do (i, boundary_condition)
-#         for boundary in boundary_indices[i]
-#             element = boundaries.neighbor_ids[boundary]
-#             node_indices = boundaries.node_indices[boundary]
-#             direction = indices2direction(node_indices)
-
-#             i_node_start, i_node_step = index_to_start_step_2d(node_indices[1],
-#                                                                index_range)
-#             j_node_start, j_node_step = index_to_start_step_2d(node_indices[2],
-#                                                                index_range)
-
-#             i_node = i_node_start
-#             j_node = j_node_start
-#             for node in eachnode(dg)
-#                 normal_direction = get_normal_direction(direction,
-#                                                         contravariant_vectors,
-#                                                         i_node, j_node, element)
-
-#                 u_inner = get_node_vars(u, equations, dg, i_node, j_node, element)
-
-#                 u_outer = get_boundary_outer_state(u_inner, t, boundary_condition,
-#                                                    normal_direction,
-#                                                    mesh, equations, dg, cache,
-#                                                    i_node, j_node, element)
-#                 var_outer = variable(u_outer, equations)
-
-#                 var_minmax[i_node, j_node, element] = minmax(var_minmax[i_node, j_node,
-#                                                                         element],
-#                                                              var_outer)
-
-#                 i_node += i_node_step
-#                 j_node += j_node_step
-#             end
-#         end
-#     end
-
-#     return nothing
-# end
+@inline function calc_bounds_onesided_boundary!(var_minmax, minmax, variable, u, t,
+                                                boundary_conditions,
+                                                mesh::P4estMesh{3},
+                                                equations, dg, cache)
+    (; boundary_condition_types, boundary_indices) = boundary_conditions
+    (; contravariant_vectors) = cache.elements
+
+    (; boundaries) = cache
+    index_range = eachnode(dg)
+
+    foreach_enumerate(boundary_condition_types) do (i, boundary_condition)
+        for boundary in boundary_indices[i]
+            element = boundaries.neighbor_ids[boundary]
+            node_indices = boundaries.node_indices[boundary]
+            direction = indices2direction(node_indices)
+
+            i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1],
+                                                                                index_range)
+            j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2],
+                                                                                index_range)
+            k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3],
+                                                                                index_range)
+
+            i_node = i_node_start
+            j_node = j_node_start
+            k_node = k_node_start
+            for j in eachnode(dg)
+                for i in eachnode(dg)
+                    normal_direction = get_normal_direction(direction,
+                                                            contravariant_vectors,
+                                                            i_node, j_node, k_node,
+                                                            element)
+
+                    u_inner = get_node_vars(u, equations, dg, i_node, j_node, k_node,
+                                            element)
+
+                    u_outer = get_boundary_outer_state(u_inner, t, boundary_condition,
+                                                       normal_direction,
+                                                       mesh, equations, dg, cache,
+                                                       i_node, j_node, k_node, element)
+                    var_outer = variable(u_outer, equations)
+
+                    var_minmax[i_node, j_node, k_node, element] = minmax(var_minmax[i_node,
+                                                                                    j_node,
+                                                                                    k_node,
+                                                                                    element],
+                                                                         var_outer)
+
+                    i_node += i_node_step_i
+                    j_node += j_node_step_i
+                    k_node += k_node_step_i
+                end
+                i_node += i_node_step_j
+                j_node += j_node_step_j
+                k_node += k_node_step_j
+            end
+        end
+    end
+
+    return nothing
+end
 end # @muladd
diff --git a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl
index db78036a2bb..396497a930a 100644
--- a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl
+++ b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl
@@ -763,4 +763,17 @@ Should be used together with [`TreeMesh`](@ref) or [`StructuredMesh`](@ref).
 
     return u_outer
 end
+
+@inline function get_boundary_outer_state(u_inner, t,
+                                          boundary_condition::BoundaryConditionDirichlet,
+                                          normal_direction,
+                                          mesh::P4estMesh,
+                                          equations, dg, cache, indices...)
+    (; node_coordinates) = cache.elements
+
+    x = get_node_coords(node_coordinates, equations, dg, indices...)
+    u_outer = boundary_condition.boundary_value_function(x, t, equations)
+
+    return u_outer
+end
 end # @muladd
diff --git a/test/test_p4est_3d.jl b/test/test_p4est_3d.jl
index 732bca13230..cdce79d0d7c 100644
--- a/test/test_p4est_3d.jl
+++ b/test/test_p4est_3d.jl
@@ -616,6 +616,71 @@ end
     end
 end
 
+@trixi_testset "elixir_euler_source_terms_nonperiodic_hohqmesh_sc_subcell.jl (positivity bounds)" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR,
+                                 "elixir_euler_source_terms_nonperiodic_hohqmesh_sc_subcell.jl"),
+                        l2=[
+                            0.003968297633693987,
+                            0.004219722654211142,
+                            0.004313961192612337,
+                            0.003994315173438687,
+                            0.008093257684168107
+                        ],
+                        linf=[
+                            0.03906896684353356,
+                            0.032089927158354126,
+                            0.04744970237203505,
+                            0.047720935760972694,
+                            0.10020886734372869
+                        ])
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        # Larger values for allowed allocations due to usage of custom
+        # integrator which are not *recorded* for the methods from
+        # OrdinaryDiffEq.jl
+        # Corresponding issue: https://github.com/trixi-framework/Trixi.jl/issues/1877
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 15000
+    end
+end
+
+@trixi_testset "elixir_euler_source_terms_nonperiodic_hohqmesh_sc_subcell.jl (local bounds)" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR,
+                                 "elixir_euler_source_terms_nonperiodic_hohqmesh_sc_subcell.jl"),
+                        local_twosided_variables_cons=["rho"],
+                        local_onesided_variables_nonlinear=[(Trixi.entropy_guermond_etal,
+                                                             min)],
+                        l2=[
+                            0.03408584424980049,
+                            0.02779678380021509,
+                            0.027798412637516465,
+                            0.028828887822172678,
+                            0.08583486245614604
+                        ],
+                        linf=[
+                            0.14985741416461962,
+                            0.14670921773754952,
+                            0.1682308073619827,
+                            0.15212001588109558,
+                            0.33596974695378323
+                        ])
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        # Larger values for allowed allocations due to usage of custom
+        # integrator which are not *recorded* for the methods from
+        # OrdinaryDiffEq.jl
+        # Corresponding issue: https://github.com/trixi-framework/Trixi.jl/issues/1877
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 15000
+    end
+end
+
 @trixi_testset "elixir_mhd_alfven_wave_er.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR,
                                  "elixir_mhd_alfven_wave_er.jl"),

From ae914d0834de4cec62f92158149fb7ceb13435d0 Mon Sep 17 00:00:00 2001
From: bennibolm <benjamin.bolm@gmx.de>
Date: Wed, 17 Sep 2025 17:51:08 +0200
Subject: [PATCH 03/18] Fix 2d p4est

---
 src/solvers/dgsem_p4est/subcell_limiters_2d.jl |  4 ++--
 src/solvers/dgsem_p4est/subcell_limiters_3d.jl | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/solvers/dgsem_p4est/subcell_limiters_2d.jl b/src/solvers/dgsem_p4est/subcell_limiters_2d.jl
index 3cbfcc143a4..d42b63789b1 100644
--- a/src/solvers/dgsem_p4est/subcell_limiters_2d.jl
+++ b/src/solvers/dgsem_p4est/subcell_limiters_2d.jl
@@ -78,7 +78,7 @@ end
 
 @inline function calc_bounds_twosided_boundary!(var_min, var_max, variable, u, t,
                                                 boundary_conditions::BoundaryConditionPeriodic,
-                                                mesh::Union{P4estMesh{2}, P4estMesh{3}},
+                                                mesh::P4estMesh{2},
                                                 equations, dg, cache)
     return nothing
 end
@@ -201,7 +201,7 @@ end
 
 @inline function calc_bounds_onesided_boundary!(var_minmax, minmax, variable, u, t,
                                                 boundary_conditions::BoundaryConditionPeriodic,
-                                                mesh::Union{P4estMesh{2}, P4estMesh{3}},
+                                                mesh::P4estMesh{2},
                                                 equations, dg, cache)
     return nothing
 end
diff --git a/src/solvers/dgsem_p4est/subcell_limiters_3d.jl b/src/solvers/dgsem_p4est/subcell_limiters_3d.jl
index 1f2d4fdd853..4b5d3f1a0ce 100644
--- a/src/solvers/dgsem_p4est/subcell_limiters_3d.jl
+++ b/src/solvers/dgsem_p4est/subcell_limiters_3d.jl
@@ -103,6 +103,13 @@ function calc_bounds_twosided_interface!(var_min, var_max, variable,
     return nothing
 end
 
+@inline function calc_bounds_twosided_boundary!(var_min, var_max, variable, u, t,
+                                                boundary_conditions::BoundaryConditionPeriodic,
+                                                mesh::P4estMesh{3},
+                                                equations, dg, cache)
+    return nothing
+end
+
 @inline function calc_bounds_twosided_boundary!(var_min, var_max, variable, u, t,
                                                 boundary_conditions,
                                                 mesh::P4estMesh{3},
@@ -260,6 +267,13 @@ function calc_bounds_onesided_interface!(var_minmax, minmax, variable, u, t, sem
     return nothing
 end
 
+@inline function calc_bounds_onesided_boundary!(var_minmax, minmax, variable, u, t,
+                                                boundary_conditions::BoundaryConditionPeriodic,
+                                                mesh::P4estMesh{3},
+                                                equations, dg, cache)
+    return nothing
+end
+
 @inline function calc_bounds_onesided_boundary!(var_minmax, minmax, variable, u, t,
                                                 boundary_conditions,
                                                 mesh::P4estMesh{3},

From 91f1b83bdccf544d3af41e694ee160ff20f477a8 Mon Sep 17 00:00:00 2001
From: bennibolm <benjamin.bolm@gmx.de>
Date: Thu, 18 Sep 2025 13:43:32 +0200
Subject: [PATCH 04/18] Support nonconservative terms

---
 .../elixir_mhd_shockcapturing_subcell.jl      |  118 ++
 .../dg_3d_subcell_limiters.jl                 | 1156 ++++++++++-------
 .../dgsem_tree/dg_3d_subcell_limiters.jl      |   42 +-
 3 files changed, 805 insertions(+), 511 deletions(-)
 create mode 100644 examples/p4est_3d_dgsem/elixir_mhd_shockcapturing_subcell.jl

diff --git a/examples/p4est_3d_dgsem/elixir_mhd_shockcapturing_subcell.jl b/examples/p4est_3d_dgsem/elixir_mhd_shockcapturing_subcell.jl
new file mode 100644
index 00000000000..7bc6c775232
--- /dev/null
+++ b/examples/p4est_3d_dgsem/elixir_mhd_shockcapturing_subcell.jl
@@ -0,0 +1,118 @@
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible ideal GLM-MHD equations
+
+equations = IdealGlmMhdEquations3D(1.4)
+
+"""
+    initial_condition_blast_wave(x, t, equations::IdealGlmMhdEquations3D)
+
+Weak magnetic blast wave setup taken from Section 6.1 of the paper:
+- A. M. Rueda-Ramírez, S. Hennemann, F. J. Hindenlang, A. R. Winters, G. J. Gassner (2021)
+  An entropy stable nodal discontinuous Galerkin method for the resistive MHD
+  equations. Part II: Subcell finite volume shock capturing
+  [doi: 10.1016/j.jcp.2021.110580](https://doi.org/10.1016/j.jcp.2021.110580)
+"""
+function initial_condition_blast_wave(x, t, equations::IdealGlmMhdEquations3D)
+    # Center of the blast wave is selected for the domain [0, 3]^3
+    inicenter = (1.5, 1.5, 1.5)
+    x_norm = x[1] - inicenter[1]
+    y_norm = x[2] - inicenter[2]
+    z_norm = x[3] - inicenter[3]
+    r = sqrt(x_norm^2 + y_norm^2 + z_norm^2)
+
+    delta_0 = 0.1
+    r_0 = 0.3
+    lambda = exp(5.0 / delta_0 * (r - r_0))
+
+    prim_inner = SVector(1.2, 0.1, 0.0, 0.1, 0.9, 1.0, 1.0, 1.0, 0.0)
+    prim_outer = SVector(1.2, 0.2, -0.4, 0.2, 0.3, 1.0, 1.0, 1.0, 0.0)
+    prim_vars = (prim_inner + lambda * prim_outer) / (1.0 + lambda)
+
+    return prim2cons(prim_vars, equations)
+end
+initial_condition = initial_condition_blast_wave
+
+# Up to version 0.13.0, `max_abs_speed_naive` was used as the default wave speed estimate of
+# `const flux_lax_friedrichs = FluxLaxFriedrichs(), i.e., `FluxLaxFriedrichs(max_abs_speed = max_abs_speed_naive)`.
+# In the `StepsizeCallback`, though, the less diffusive `max_abs_speeds` is employed which is consistent with `max_abs_speed`.
+# Thus, we exchanged in PR#2458 the default wave speed used in the LLF flux to `max_abs_speed`.
+# To ensure that every example still runs we specify explicitly `FluxLaxFriedrichs(max_abs_speed_naive)`.
+# We remark, however, that the now default `max_abs_speed` is in general recommended due to compliance with the
+# `StepsizeCallback` (CFL-Condition) and less diffusion.
+surface_flux = (FluxLaxFriedrichs(max_abs_speed_naive),
+                flux_nonconservative_powell_local_symmetric)
+# volume_flux = (flux_derigs_etal, flux_nonconservative_powell_local_symmetric)
+volume_flux = (flux_central, flux_nonconservative_powell_local_symmetric)
+
+# surface_flux = (FluxLaxFriedrichs(max_abs_speed_naive), flux_nonconservative_powell)
+# volume_flux = (flux_hindenlang_gassner, flux_nonconservative_powell)
+
+surface_flux = (FluxLaxFriedrichs(max_abs_speed_naive), flux_nonconservative_powell)
+volume_flux = (flux_hindenlang_gassner, flux_nonconservative_powell)
+
+# TODO: Test with working fluxes
+
+basis = LobattoLegendreBasis(3)
+
+limiter_idp = SubcellLimiterIDP(equations, basis;
+                                positivity_variables_cons = ["rho"],
+                                positivity_variables_nonlinear = [pressure],
+                                positivity_correction_factor = 0.1)
+volume_integral = VolumeIntegralSubcellLimiting(limiter_idp;
+                                                volume_flux_dg = volume_flux,
+                                                volume_flux_fv = surface_flux)
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+coordinates_min = (-0.5, -0.5, -0.5)
+coordinates_max = (0.5, 0.5, 0.5)
+trees_per_dimension = (2, 2, 2)
+mesh = P4estMesh(trees_per_dimension,
+                 polydeg = 3,
+                 #  mapping = mapping,
+                 coordinates_min = coordinates_min,
+                 coordinates_max = coordinates_max,
+                 initial_refinement_level = 2,
+                 periodicity = true)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 0.1)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim,
+                                     extra_node_variables = (:limiting_coefficient,))
+
+cfl = 0.4
+stepsize_callback = StepsizeCallback(cfl = cfl)
+
+glm_speed_callback = GlmSpeedCallback(glm_scale = 0.5, cfl = cfl)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback,
+                        save_solution,
+                        stepsize_callback,
+                        glm_speed_callback)
+
+###############################################################################
+# run the simulation
+stage_callbacks = (SubcellLimiterIDPCorrection(), BoundsCheckCallback())
+
+sol = Trixi.solve(ode, Trixi.SimpleSSPRK33(stage_callbacks = stage_callbacks);
+                  dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  ode_default_options()..., callback = callbacks);
diff --git a/src/solvers/dgsem_structured/dg_3d_subcell_limiters.jl b/src/solvers/dgsem_structured/dg_3d_subcell_limiters.jl
index 55ab7bfc62d..e9e5ba6a804 100644
--- a/src/solvers/dgsem_structured/dg_3d_subcell_limiters.jl
+++ b/src/solvers/dgsem_structured/dg_3d_subcell_limiters.jl
@@ -166,214 +166,299 @@ end
 # - Rueda-Ramírez, Gassner (2023). A Flux-Differencing Formula for Split-Form Summation By Parts
 #   Discretizations of Non-Conservative Systems. https://arxiv.org/pdf/2211.14009.pdf.
 #
-# @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u,
-#                                 mesh::P4estMesh{3},
-#                                 nonconservative_terms::True, equations,
-#                                 volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM,
-#                                 element,
-#                                 cache) where {
-#                                               F_CONS <: Function,
-#                                               F_NONCONS <:
-#                                               FluxNonConservative{NonConservativeSymmetric()}
-#                                               }
-#     (; contravariant_vectors) = cache.elements
-#     (; weights, derivative_split) = dg.basis
-#     (; flux_temp_threaded, flux_nonconservative_temp_threaded) = cache
-#     (; fhat_temp_threaded, fhat_nonconservative_temp_threaded, phi_threaded) = cache
-
-#     volume_flux_cons, volume_flux_noncons = volume_flux
-
-#     flux_temp = flux_temp_threaded[Threads.threadid()]
-#     flux_noncons_temp = flux_nonconservative_temp_threaded[Threads.threadid()]
-
-#     fhat_temp = fhat_temp_threaded[Threads.threadid()]
-#     fhat_noncons_temp = fhat_nonconservative_temp_threaded[Threads.threadid()]
-#     phi = phi_threaded[Threads.threadid()]
-
-#     # The FV-form fluxes are calculated in a recursive manner, i.e.:
-#     # fhat_(0,1)   = w_0 * FVol_0,
-#     # fhat_(j,j+1) = fhat_(j-1,j) + w_j * FVol_j,   for j=1,...,N-1,
-#     # with the split form volume fluxes FVol_j = -2 * sum_i=0^N D_ji f*_(j,i).
-
-#     # To use the symmetry of the `volume_flux`, the split form volume flux is precalculated
-#     # like in `calc_volume_integral!` for the `VolumeIntegralFluxDifferencing`
-#     # and saved in in `flux_temp`.
-
-#     # Split form volume flux in orientation 1: x direction
-#     flux_temp .= zero(eltype(flux_temp))
-#     flux_noncons_temp .= zero(eltype(flux_noncons_temp))
-
-#     for j in eachnode(dg), i in eachnode(dg)
-#         u_node = get_node_vars(u, equations, dg, i, j, element)
-
-#         # pull the contravariant vectors in each coordinate direction
-#         Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element) # x direction
-
-#         # All diagonal entries of `derivative_split` are zero. Thus, we can skip
-#         # the computation of the diagonal terms. In addition, we use the symmetry
-#         # of `volume_flux_cons` and `volume_flux_noncons` to save half of the possible two-point flux
-#         # computations.
-#         for ii in (i + 1):nnodes(dg)
-#             u_node_ii = get_node_vars(u, equations, dg, ii, j, element)
-#             # pull the contravariant vectors and compute the average
-#             Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j,
-#                                                    element)
-#             Ja1_avg = 0.5f0 * (Ja1_node + Ja1_node_ii)
-
-#             # compute the contravariant sharp flux in the direction of the averaged contravariant vector
-#             fluxtilde1 = volume_flux_cons(u_node, u_node_ii, Ja1_avg, equations)
-#             multiply_add_to_node_vars!(flux_temp, derivative_split[i, ii], fluxtilde1,
-#                                        equations, dg, i, j)
-#             multiply_add_to_node_vars!(flux_temp, derivative_split[ii, i], fluxtilde1,
-#                                        equations, dg, ii, j)
-#             for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
-#                 # We multiply by 0.5 because that is done in other parts of Trixi
-#                 flux1_noncons = volume_flux_noncons(u_node, u_node_ii, Ja1_avg,
-#                                                     equations,
-#                                                     NonConservativeSymmetric(), noncons)
-#                 multiply_add_to_node_vars!(flux_noncons_temp,
-#                                            0.5f0 * derivative_split[i, ii],
-#                                            flux1_noncons,
-#                                            equations, dg, noncons, i, j)
-#                 multiply_add_to_node_vars!(flux_noncons_temp,
-#                                            0.5f0 * derivative_split[ii, i],
-#                                            flux1_noncons,
-#                                            equations, dg, noncons, ii, j)
-#             end
-#         end
-#     end
-
-#     # FV-form flux `fhat` in x direction
-#     fhat1_L[:, 1, :] .= zero(eltype(fhat1_L))
-#     fhat1_L[:, nnodes(dg) + 1, :] .= zero(eltype(fhat1_L))
-#     fhat1_R[:, 1, :] .= zero(eltype(fhat1_R))
-#     fhat1_R[:, nnodes(dg) + 1, :] .= zero(eltype(fhat1_R))
-
-#     fhat_temp[:, 1, :] .= zero(eltype(fhat1_L))
-#     fhat_noncons_temp[:, :, 1, :] .= zero(eltype(fhat1_L))
-
-#     # Compute local contribution to non-conservative flux
-#     for j in eachnode(dg), i in eachnode(dg)
-#         u_local = get_node_vars(u, equations, dg, i, j, element)
-#         # pull the local contravariant vector
-#         Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element)
-#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
-#             set_node_vars!(phi,
-#                            volume_flux_noncons(u_local, Ja1_node, equations,
-#                                                NonConservativeLocal(), noncons),
-#                            equations, dg, noncons, i, j)
-#         end
-#     end
-
-#     for j in eachnode(dg), i in 1:(nnodes(dg) - 1)
-#         # Conservative part
-#         for v in eachvariable(equations)
-#             value = fhat_temp[v, i, j] + weights[i] * flux_temp[v, i, j]
-#             fhat_temp[v, i + 1, j] = value
-#             fhat1_L[v, i + 1, j] = value
-#             fhat1_R[v, i + 1, j] = value
-#         end
-#         # Nonconservative part
-#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons),
-#             v in eachvariable(equations)
-
-#             value = fhat_noncons_temp[v, noncons, i, j] +
-#                     weights[i] * flux_noncons_temp[v, noncons, i, j]
-#             fhat_noncons_temp[v, noncons, i + 1, j] = value
-
-#             fhat1_L[v, i + 1, j] = fhat1_L[v, i + 1, j] + phi[v, noncons, i, j] * value
-#             fhat1_R[v, i + 1, j] = fhat1_R[v, i + 1, j] +
-#                                    phi[v, noncons, i + 1, j] * value
-#         end
-#     end
-
-#     # Split form volume flux in orientation 2: y direction
-#     flux_temp .= zero(eltype(flux_temp))
-#     flux_noncons_temp .= zero(eltype(flux_noncons_temp))
-
-#     for j in eachnode(dg), i in eachnode(dg)
-#         u_node = get_node_vars(u, equations, dg, i, j, element)
-
-#         # pull the contravariant vectors in each coordinate direction
-#         Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element)
-
-#         for jj in (j + 1):nnodes(dg)
-#             u_node_jj = get_node_vars(u, equations, dg, i, jj, element)
-#             # pull the contravariant vectors and compute the average
-#             Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj,
-#                                                    element)
-#             Ja2_avg = 0.5f0 * (Ja2_node + Ja2_node_jj)
-#             # compute the contravariant sharp flux in the direction of the averaged contravariant vector
-#             fluxtilde2 = volume_flux_cons(u_node, u_node_jj, Ja2_avg, equations)
-#             multiply_add_to_node_vars!(flux_temp, derivative_split[j, jj], fluxtilde2,
-#                                        equations, dg, i, j)
-#             multiply_add_to_node_vars!(flux_temp, derivative_split[jj, j], fluxtilde2,
-#                                        equations, dg, i, jj)
-#             for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
-#                 # We multiply by 0.5 because that is done in other parts of Trixi
-#                 flux2_noncons = volume_flux_noncons(u_node, u_node_jj, Ja2_avg,
-#                                                     equations,
-#                                                     NonConservativeSymmetric(), noncons)
-#                 multiply_add_to_node_vars!(flux_noncons_temp,
-#                                            0.5f0 * derivative_split[j, jj],
-#                                            flux2_noncons,
-#                                            equations, dg, noncons, i, j)
-#                 multiply_add_to_node_vars!(flux_noncons_temp,
-#                                            0.5f0 * derivative_split[jj, j],
-#                                            flux2_noncons,
-#                                            equations, dg, noncons, i, jj)
-#             end
-#         end
-#     end
-
-#     # FV-form flux `fhat` in y direction
-#     fhat2_L[:, :, 1] .= zero(eltype(fhat2_L))
-#     fhat2_L[:, :, nnodes(dg) + 1] .= zero(eltype(fhat2_L))
-#     fhat2_R[:, :, 1] .= zero(eltype(fhat2_R))
-#     fhat2_R[:, :, nnodes(dg) + 1] .= zero(eltype(fhat2_R))
-
-#     fhat_temp[:, :, 1] .= zero(eltype(fhat1_L))
-#     fhat_noncons_temp[:, :, :, 1] .= zero(eltype(fhat1_L))
-
-#     # Compute local contribution to non-conservative flux
-#     for j in eachnode(dg), i in eachnode(dg)
-#         u_local = get_node_vars(u, equations, dg, i, j, element)
-#         # pull the local contravariant vector
-#         Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element)
-#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
-#             set_node_vars!(phi,
-#                            volume_flux_noncons(u_local, Ja2_node, equations,
-#                                                NonConservativeLocal(), noncons),
-#                            equations, dg, noncons, i, j)
-#         end
-#     end
-
-#     for j in 1:(nnodes(dg) - 1), i in eachnode(dg)
-#         # Conservative part
-#         for v in eachvariable(equations)
-#             value = fhat_temp[v, i, j] + weights[j] * flux_temp[v, i, j]
-#             fhat_temp[v, i, j + 1] = value
-#             fhat2_L[v, i, j + 1] = value
-#             fhat2_R[v, i, j + 1] = value
-#         end
-#         # Nonconservative part
-#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons),
-#             v in eachvariable(equations)
-
-#             value = fhat_noncons_temp[v, noncons, i, j] +
-#                     weights[j] * flux_noncons_temp[v, noncons, i, j]
-#             fhat_noncons_temp[v, noncons, i, j + 1] = value
-
-#             fhat2_L[v, i, j + 1] = fhat2_L[v, i, j + 1] + phi[v, noncons, i, j] * value
-#             fhat2_R[v, i, j + 1] = fhat2_R[v, i, j + 1] +
-#                                    phi[v, noncons, i, j + 1] * value
-#         end
-#     end
-
-#     return nothing
-# end
+@inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, u,
+                                mesh::P4estMesh{3},
+                                nonconservative_terms::True, equations,
+                                volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM,
+                                element,
+                                cache) where {
+                                              F_CONS <: Function,
+                                              F_NONCONS <:
+                                              FluxNonConservative{NonConservativeSymmetric()}
+                                              }
+    (; contravariant_vectors) = cache.elements
+    (; weights, derivative_split) = dg.basis
+    (; flux_temp_threaded, flux_nonconservative_temp_threaded) = cache
+    (; fhat_temp_threaded, fhat_nonconservative_temp_threaded, phi_threaded) = cache
+
+    volume_flux_cons, volume_flux_noncons = volume_flux
+
+    flux_temp = flux_temp_threaded[Threads.threadid()]
+    flux_noncons_temp = flux_nonconservative_temp_threaded[Threads.threadid()]
+
+    fhat_temp = fhat_temp_threaded[Threads.threadid()]
+    fhat_noncons_temp = fhat_nonconservative_temp_threaded[Threads.threadid()]
+    phi = phi_threaded[Threads.threadid()]
+
+    # The FV-form fluxes are calculated in a recursive manner, i.e.:
+    # fhat_(0,1)   = w_0 * FVol_0,
+    # fhat_(j,j+1) = fhat_(j-1,j) + w_j * FVol_j,   for j=1,...,N-1,
+    # with the split form volume fluxes FVol_j = -2 * sum_i=0^N D_ji f*_(j,i).
+
+    # To use the symmetry of the `volume_flux`, the split form volume flux is precalculated
+    # like in `calc_volume_integral!` for the `VolumeIntegralFluxDifferencing`
+    # and saved in in `flux_temp`.
+
+    # Split form volume flux in orientation 1: x direction
+    flux_temp .= zero(eltype(flux_temp))
+    flux_noncons_temp .= zero(eltype(flux_noncons_temp))
+
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element) # x direction
+
+        # All diagonal entries of `derivative_split` are zero. Thus, we can skip
+        # the computation of the diagonal terms. In addition, we use the symmetry
+        # of `volume_flux_cons` and `volume_flux_noncons` to save half of the possible two-point flux
+        # computations.
+        for ii in (i + 1):nnodes(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element)
+            # pull the contravariant vectors and compute the average
+            Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j, k,
+                                                   element)
+            Ja1_avg = 0.5f0 * (Ja1_node + Ja1_node_ii)
+
+            # compute the contravariant sharp flux in the direction of the averaged contravariant vector
+            fluxtilde1 = volume_flux_cons(u_node, u_node_ii, Ja1_avg, equations)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[i, ii], fluxtilde1,
+                                       equations, dg, i, j, k)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[ii, i], fluxtilde1,
+                                       equations, dg, ii, j, k)
+            for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+                # We multiply by 0.5 because that is done in other parts of Trixi
+                flux1_noncons = volume_flux_noncons(u_node, u_node_ii, Ja1_avg,
+                                                    equations,
+                                                    NonConservativeSymmetric(), noncons)
+                multiply_add_to_node_vars!(flux_noncons_temp,
+                                           0.5f0 * derivative_split[i, ii],
+                                           flux1_noncons,
+                                           equations, dg, noncons, i, j, k)
+                multiply_add_to_node_vars!(flux_noncons_temp,
+                                           0.5f0 * derivative_split[ii, i],
+                                           flux1_noncons,
+                                           equations, dg, noncons, ii, j, k)
+            end
+        end
+    end
+
+    # FV-form flux `fhat` in x direction
+    fhat1_L[:, 1, :, :] .= zero(eltype(fhat1_L))
+    fhat1_L[:, nnodes(dg) + 1, :, :] .= zero(eltype(fhat1_L))
+    fhat1_R[:, 1, :, :] .= zero(eltype(fhat1_R))
+    fhat1_R[:, nnodes(dg) + 1, :, :] .= zero(eltype(fhat1_R))
+
+    fhat_temp[:, 1, :, :] .= zero(eltype(fhat1_L))
+    fhat_noncons_temp[:, :, 1, :, :] .= zero(eltype(fhat1_L))
+
+    # Compute local contribution to non-conservative flux
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_local = get_node_vars(u, equations, dg, i, j, k, element)
+        # pull the local contravariant vector
+        Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element)
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+            set_node_vars!(phi,
+                           volume_flux_noncons(u_local, Ja1_node, equations,
+                                               NonConservativeLocal(), noncons),
+                           equations, dg, noncons, i, j, k)
+        end
+    end
+
+    for k in eachnode(dg), j in eachnode(dg), i in 1:(nnodes(dg) - 1)
+        # Conservative part
+        for v in eachvariable(equations)
+            value = fhat_temp[v, i, j, k] + weights[i] * flux_temp[v, i, j, k]
+            fhat_temp[v, i + 1, j, k] = value
+            fhat1_L[v, i + 1, j, k] = value
+            fhat1_R[v, i + 1, j, k] = value
+        end
+        # Nonconservative part
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons),
+            v in eachvariable(equations)
+
+            value = fhat_noncons_temp[v, noncons, i, j, k] +
+                    weights[i] * flux_noncons_temp[v, noncons, i, j, k]
+            fhat_noncons_temp[v, noncons, i + 1, j, k] = value
+
+            fhat1_L[v, i + 1, j, k] = fhat1_L[v, i + 1, j, k] +
+                                      phi[v, noncons, i, j, k] * value
+            fhat1_R[v, i + 1, j, k] = fhat1_R[v, i + 1, j, k] +
+                                      phi[v, noncons, i + 1, j, k] * value
+        end
+    end
+
+    # Split form volume flux in orientation 2: y direction
+    flux_temp .= zero(eltype(flux_temp))
+    flux_noncons_temp .= zero(eltype(flux_noncons_temp))
+
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
+
+        for jj in (j + 1):nnodes(dg)
+            u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element)
+            # pull the contravariant vectors and compute the average
+            Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj, k,
+                                                   element)
+            Ja2_avg = 0.5f0 * (Ja2_node + Ja2_node_jj)
+            # compute the contravariant sharp flux in the direction of the averaged contravariant vector
+            fluxtilde2 = volume_flux_cons(u_node, u_node_jj, Ja2_avg, equations)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[j, jj], fluxtilde2,
+                                       equations, dg, i, j, k)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[jj, j], fluxtilde2,
+                                       equations, dg, i, jj, k)
+            for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+                # We multiply by 0.5 because that is done in other parts of Trixi
+                flux2_noncons = volume_flux_noncons(u_node, u_node_jj, Ja2_avg,
+                                                    equations,
+                                                    NonConservativeSymmetric(), noncons)
+                multiply_add_to_node_vars!(flux_noncons_temp,
+                                           0.5f0 * derivative_split[j, jj],
+                                           flux2_noncons,
+                                           equations, dg, noncons, i, j, k)
+                multiply_add_to_node_vars!(flux_noncons_temp,
+                                           0.5f0 * derivative_split[jj, j],
+                                           flux2_noncons,
+                                           equations, dg, noncons, i, jj, k)
+            end
+        end
+    end
+
+    # FV-form flux `fhat` in y direction
+    fhat2_L[:, :, 1, :] .= zero(eltype(fhat2_L))
+    fhat2_L[:, :, nnodes(dg) + 1, :] .= zero(eltype(fhat2_L))
+    fhat2_R[:, :, 1, :] .= zero(eltype(fhat2_R))
+    fhat2_R[:, :, nnodes(dg) + 1, :] .= zero(eltype(fhat2_R))
+
+    fhat_temp[:, :, 1, :] .= zero(eltype(fhat1_L))
+    fhat_noncons_temp[:, :, :, 1, :] .= zero(eltype(fhat1_L))
+
+    # Compute local contribution to non-conservative flux
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_local = get_node_vars(u, equations, dg, i, j, k, element)
+        # pull the local contravariant vector
+        Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+            set_node_vars!(phi,
+                           volume_flux_noncons(u_local, Ja2_node, equations,
+                                               NonConservativeLocal(), noncons),
+                           equations, dg, noncons, i, j, k)
+        end
+    end
+
+    for k in eachnode(dg), j in 1:(nnodes(dg) - 1), i in eachnode(dg)
+        # Conservative part
+        for v in eachvariable(equations)
+            value = fhat_temp[v, i, j, k] + weights[j] * flux_temp[v, i, j, k]
+            fhat_temp[v, i, j + 1, k] = value
+            fhat2_L[v, i, j + 1, k] = value
+            fhat2_R[v, i, j + 1, k] = value
+        end
+        # Nonconservative part
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons),
+            v in eachvariable(equations)
+
+            value = fhat_noncons_temp[v, noncons, i, j, k] +
+                    weights[j] * flux_noncons_temp[v, noncons, i, j, k]
+            fhat_noncons_temp[v, noncons, i, j + 1, k] = value
+
+            fhat2_L[v, i, j + 1, k] = fhat2_L[v, i, j + 1, k] +
+                                      phi[v, noncons, i, j, k] * value
+            fhat2_R[v, i, j + 1, k] = fhat2_R[v, i, j + 1, k] +
+                                      phi[v, noncons, i, j + 1, k] * value
+        end
+    end
+
+    # Split form volume flux in orientation 3: z direction
+    flux_temp .= zero(eltype(flux_temp))
+    flux_noncons_temp .= zero(eltype(flux_noncons_temp))
+
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)
+
+        for kk in (k + 1):nnodes(dg)
+            u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element)
+            # pull the contravariant vectors and compute the average
+            Ja3_node_kk = get_contravariant_vector(3, contravariant_vectors, i, j, kk,
+                                                   element)
+            Ja3_avg = 0.5f0 * (Ja3_node + Ja3_node_kk)
+            # compute the contravariant sharp flux in the direction of the averaged contravariant vector
+            fluxtilde3 = volume_flux_cons(u_node, u_node_kk, Ja3_avg, equations)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[k, kk], fluxtilde3,
+                                       equations, dg, i, j, k)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[kk, k], fluxtilde3,
+                                       equations, dg, i, j, kk)
+            for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+                # We multiply by 0.5 because that is done in other parts of Trixi
+                flux3_noncons = volume_flux_noncons(u_node, u_node_kk, Ja3_avg,
+                                                    equations,
+                                                    NonConservativeSymmetric(), noncons)
+                multiply_add_to_node_vars!(flux_noncons_temp,
+                                           0.5f0 * derivative_split[k, kk],
+                                           flux3_noncons,
+                                           equations, dg, noncons, i, j, k)
+                multiply_add_to_node_vars!(flux_noncons_temp,
+                                           0.5f0 * derivative_split[kk, k],
+                                           flux3_noncons,
+                                           equations, dg, noncons, i, j, kk)
+            end
+        end
+    end
+
+    # FV-form flux `fhat` in y direction
+    fhat3_L[:, :, :, 1] .= zero(eltype(fhat3_L))
+    fhat3_L[:, :, :, nnodes(dg) + 1] .= zero(eltype(fhat3_L))
+    fhat3_R[:, :, :, 1] .= zero(eltype(fhat3_R))
+    fhat3_R[:, :, :, nnodes(dg) + 1] .= zero(eltype(fhat3_R))
+
+    fhat_temp[:, :, :, 1] .= zero(eltype(fhat1_L))
+    fhat_noncons_temp[:, :, :, :, 1] .= zero(eltype(fhat1_L))
+
+    # Compute local contribution to non-conservative flux
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_local = get_node_vars(u, equations, dg, i, j, k, element)
+        # pull the local contravariant vector
+        Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+            set_node_vars!(phi,
+                           volume_flux_noncons(u_local, Ja3_node, equations,
+                                               NonConservativeLocal(), noncons),
+                           equations, dg, noncons, i, j, k)
+        end
+    end
+
+    for k in 1:(nnodes(dg) - 1), j in eachnode(dg), i in eachnode(dg)
+        # Conservative part
+        for v in eachvariable(equations)
+            value = fhat_temp[v, i, j, k] + weights[k] * flux_temp[v, i, j, k]
+            fhat_temp[v, i, j, k + 1] = value
+            fhat3_L[v, i, j, k + 1] = value
+            fhat3_R[v, i, j, k + 1] = value
+        end
+        # Nonconservative part
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons),
+            v in eachvariable(equations)
+
+            value = fhat_noncons_temp[v, noncons, i, j, k] +
+                    weights[k] * flux_noncons_temp[v, noncons, i, j, k]
+            fhat_noncons_temp[v, noncons, i, j, k + 1] = value
+
+            fhat3_L[v, i, j, k + 1] = fhat3_L[v, i, j, k + 1] +
+                                      phi[v, noncons, i, j, k] * value
+            fhat3_R[v, i, j, k + 1] = fhat3_R[v, i, j, k + 1] +
+                                      phi[v, noncons, i, j, k + 1] * value
+        end
+    end
+
+    return nothing
+end
 
-# TODO: This is the 2d version for now.
 # Calculate the DG staggered volume fluxes `fhat` in subcell FV-form inside the element
 # (**with non-conservative terms in "local * jump" form**).
 #
@@ -381,288 +466,377 @@ end
 #
 # The calculation of the non-conservative staggered "fluxes" requires non-conservative
 # terms that can be written as a product of local and jump contributions.
-# @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u,
-#                                 mesh::P4estMesh{3},
-#                                 nonconservative_terms::True, equations,
-#                                 volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM,
-#                                 element,
-#                                 cache) where {
-#                                               F_CONS <: Function,
-#                                               F_NONCONS <:
-#                                               FluxNonConservative{NonConservativeJump()}
-#                                               }
-#     (; contravariant_vectors) = cache.elements
-#     (; weights, derivative_split) = dg.basis
-#     (; flux_temp_threaded, flux_nonconservative_temp_threaded) = cache
-#     (; fhat_temp_threaded, fhat_nonconservative_temp_threaded, phi_threaded) = cache
-
-#     volume_flux_cons, volume_flux_noncons = volume_flux
-
-#     flux_temp = flux_temp_threaded[Threads.threadid()]
-#     flux_noncons_temp = flux_nonconservative_temp_threaded[Threads.threadid()]
-
-#     fhat_temp = fhat_temp_threaded[Threads.threadid()]
-#     fhat_noncons_temp = fhat_nonconservative_temp_threaded[Threads.threadid()]
-#     phi = phi_threaded[Threads.threadid()]
-
-#     # The FV-form fluxes are calculated in a recursive manner, i.e.:
-#     # fhat_(0,1)   = w_0 * FVol_0,
-#     # fhat_(j,j+1) = fhat_(j-1,j) + w_j * FVol_j,   for j=1,...,N-1,
-#     # with the split form volume fluxes FVol_j = -2 * sum_i=0^N D_ji f*_(j,i).
-
-#     # To use the symmetry of the `volume_flux`, the split form volume flux is precalculated
-#     # like in `calc_volume_integral!` for the `VolumeIntegralFluxDifferencing`
-#     # and saved in in `flux_temp`.
-
-#     # Split form volume flux in orientation 1: x direction
-#     flux_temp .= zero(eltype(flux_temp))
-#     flux_noncons_temp .= zero(eltype(flux_noncons_temp))
-
-#     for j in eachnode(dg), i in eachnode(dg)
-#         u_node = get_node_vars(u, equations, dg, i, j, element)
-
-#         # pull the contravariant vectors in each coordinate direction
-#         Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element) # x direction
-
-#         # All diagonal entries of `derivative_split` are zero. Thus, we can skip
-#         # the computation of the diagonal terms. In addition, we use the symmetry
-#         # of `volume_flux_cons` and `volume_flux_noncons` to save half of the possible two-point flux
-#         # computations.
-#         for ii in (i + 1):nnodes(dg)
-#             u_node_ii = get_node_vars(u, equations, dg, ii, j, element)
-#             # pull the contravariant vectors and compute the average
-#             Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j,
-#                                                    element)
-#             Ja1_avg = 0.5f0 * (Ja1_node + Ja1_node_ii)
-
-#             # compute the contravariant sharp flux in the direction of the averaged contravariant vector
-#             fluxtilde1 = volume_flux_cons(u_node, u_node_ii, Ja1_avg, equations)
-#             multiply_add_to_node_vars!(flux_temp, derivative_split[i, ii], fluxtilde1,
-#                                        equations, dg, i, j)
-#             multiply_add_to_node_vars!(flux_temp, derivative_split[ii, i], fluxtilde1,
-#                                        equations, dg, ii, j)
-#             for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
-#                 # We multiply by 0.5 because that is done in other parts of Trixi
-#                 flux1_noncons = volume_flux_noncons(u_node, u_node_ii, Ja1_avg,
-#                                                     equations,
-#                                                     NonConservativeJump(), noncons)
-#                 multiply_add_to_node_vars!(flux_noncons_temp,
-#                                            0.5f0 * derivative_split[i, ii],
-#                                            flux1_noncons,
-#                                            equations, dg, noncons, i, j)
-#                 multiply_add_to_node_vars!(flux_noncons_temp,
-#                                            -0.5f0 * derivative_split[ii, i],
-#                                            flux1_noncons,
-#                                            equations, dg, noncons, ii, j)
-#             end
-#         end
-#     end
-
-#     # FV-form flux `fhat` in x direction
-#     fhat1_L[:, 1, :] .= zero(eltype(fhat1_L))
-#     fhat1_L[:, nnodes(dg) + 1, :] .= zero(eltype(fhat1_L))
-#     fhat1_R[:, 1, :] .= zero(eltype(fhat1_R))
-#     fhat1_R[:, nnodes(dg) + 1, :] .= zero(eltype(fhat1_R))
-
-#     fhat_temp[:, 1, :] .= zero(eltype(fhat1_L))
-#     fhat_noncons_temp[:, :, 1, :] .= zero(eltype(fhat1_L))
-
-#     # Compute local contribution to non-conservative flux
-#     for j in eachnode(dg), i in eachnode(dg)
-#         u_local = get_node_vars(u, equations, dg, i, j, element)
-#         # pull the local contravariant vector
-#         Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element)
-#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
-#             set_node_vars!(phi,
-#                            volume_flux_noncons(u_local, Ja1_node, equations,
-#                                                NonConservativeLocal(), noncons),
-#                            equations, dg, noncons, i, j)
-#         end
-#     end
-
-#     for j in eachnode(dg), i in 1:(nnodes(dg) - 1)
-#         # Conservative part
-#         for v in eachvariable(equations)
-#             value = fhat_temp[v, i, j] + weights[i] * flux_temp[v, i, j]
-#             fhat_temp[v, i + 1, j] = value
-#             fhat1_L[v, i + 1, j] = value
-#             fhat1_R[v, i + 1, j] = value
-#         end
-#         # Nonconservative part
-#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons),
-#             v in eachvariable(equations)
-
-#             value = fhat_noncons_temp[v, noncons, i, j] +
-#                     weights[i] * flux_noncons_temp[v, noncons, i, j]
-#             fhat_noncons_temp[v, noncons, i + 1, j] = value
-
-#             fhat1_L[v, i + 1, j] = fhat1_L[v, i + 1, j] + phi[v, noncons, i, j] * value
-#             fhat1_R[v, i + 1, j] = fhat1_R[v, i + 1, j] +
-#                                    phi[v, noncons, i + 1, j] * value
-#         end
-#     end
-
-#     # Apply correction term to the flux-differencing formula for nonconservative local * jump fluxes.
-#     for j in eachnode(dg)
-#         u_0 = get_node_vars(u, equations, dg, 1, j, element)
-#         Ja1_node_0 = get_contravariant_vector(1, contravariant_vectors, 1, j, element)
-
-#         for i in 2:(nnodes(dg) - 1)
-#             u_i = get_node_vars(u, equations, dg, i, j, element)
-#             Ja1_node_i = get_contravariant_vector(1, contravariant_vectors, i, j,
-#                                                   element)
-#             Ja1_avg = 0.5f0 * (Ja1_node_0 + Ja1_node_i)
-
-#             for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
-#                 phi_jump = volume_flux_noncons(u_0, u_i, Ja1_avg, equations,
-#                                                NonConservativeJump(), noncons)
-
-#                 for v in eachvariable(equations)
-#                     # The factor of 2 is missing on each term because Trixi multiplies all the non-cons terms with 0.5
-#                     fhat1_R[v, i, j] -= phi[v, noncons, i, j] * phi_jump[v]
-#                     fhat1_L[v, i + 1, j] -= phi[v, noncons, i, j] * phi_jump[v]
-#                 end
-#             end
-#         end
-#         u_N = get_node_vars(u, equations, dg, nnodes(dg), j, element)
-#         Ja1_node_N = get_contravariant_vector(1, contravariant_vectors, nnodes(dg), j,
-#                                               element)
-#         Ja1_avg = 0.5f0 * (Ja1_node_0 + Ja1_node_N)
-
-#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
-#             phi_jump = volume_flux_noncons(u_0, u_N, Ja1_avg, equations,
-#                                            NonConservativeJump(), noncons)
-
-#             for v in eachvariable(equations)
-#                 # The factor of 2 is missing because Trixi multiplies all the non-cons terms with 0.5
-#                 fhat1_R[v, nnodes(dg), j] -= phi[v, noncons, nnodes(dg), j] *
-#                                              phi_jump[v]
-#             end
-#         end
-#     end
-
-#     # Split form volume flux in orientation 2: y direction
-#     flux_temp .= zero(eltype(flux_temp))
-#     flux_noncons_temp .= zero(eltype(flux_noncons_temp))
-
-#     for j in eachnode(dg), i in eachnode(dg)
-#         u_node = get_node_vars(u, equations, dg, i, j, element)
-
-#         # pull the contravariant vectors in each coordinate direction
-#         Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element)
-
-#         for jj in (j + 1):nnodes(dg)
-#             u_node_jj = get_node_vars(u, equations, dg, i, jj, element)
-#             # pull the contravariant vectors and compute the average
-#             Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj,
-#                                                    element)
-#             Ja2_avg = 0.5f0 * (Ja2_node + Ja2_node_jj)
-#             # compute the contravariant sharp flux in the direction of the averaged contravariant vector
-#             fluxtilde2 = volume_flux_cons(u_node, u_node_jj, Ja2_avg, equations)
-#             multiply_add_to_node_vars!(flux_temp, derivative_split[j, jj], fluxtilde2,
-#                                        equations, dg, i, j)
-#             multiply_add_to_node_vars!(flux_temp, derivative_split[jj, j], fluxtilde2,
-#                                        equations, dg, i, jj)
-#             for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
-#                 # We multiply by 0.5 because that is done in other parts of Trixi
-#                 flux2_noncons = volume_flux_noncons(u_node, u_node_jj, Ja2_avg,
-#                                                     equations,
-#                                                     NonConservativeJump(), noncons)
-#                 multiply_add_to_node_vars!(flux_noncons_temp,
-#                                            0.5f0 * derivative_split[j, jj],
-#                                            flux2_noncons,
-#                                            equations, dg, noncons, i, j)
-#                 multiply_add_to_node_vars!(flux_noncons_temp,
-#                                            -0.5f0 * derivative_split[jj, j],
-#                                            flux2_noncons,
-#                                            equations, dg, noncons, i, jj)
-#             end
-#         end
-#     end
-
-#     # FV-form flux `fhat` in y direction
-#     fhat2_L[:, :, 1] .= zero(eltype(fhat2_L))
-#     fhat2_L[:, :, nnodes(dg) + 1] .= zero(eltype(fhat2_L))
-#     fhat2_R[:, :, 1] .= zero(eltype(fhat2_R))
-#     fhat2_R[:, :, nnodes(dg) + 1] .= zero(eltype(fhat2_R))
-
-#     fhat_temp[:, :, 1] .= zero(eltype(fhat1_L))
-#     fhat_noncons_temp[:, :, :, 1] .= zero(eltype(fhat1_L))
-
-#     # Compute local contribution to non-conservative flux
-#     for j in eachnode(dg), i in eachnode(dg)
-#         u_local = get_node_vars(u, equations, dg, i, j, element)
-#         # pull the local contravariant vector
-#         Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element)
-#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
-#             set_node_vars!(phi,
-#                            volume_flux_noncons(u_local, Ja2_node, equations,
-#                                                NonConservativeLocal(), noncons),
-#                            equations, dg, noncons, i, j)
-#         end
-#     end
-
-#     for j in 1:(nnodes(dg) - 1), i in eachnode(dg)
-#         # Conservative part
-#         for v in eachvariable(equations)
-#             value = fhat_temp[v, i, j] + weights[j] * flux_temp[v, i, j]
-#             fhat_temp[v, i, j + 1] = value
-#             fhat2_L[v, i, j + 1] = value
-#             fhat2_R[v, i, j + 1] = value
-#         end
-#         # Nonconservative part
-#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons),
-#             v in eachvariable(equations)
-
-#             value = fhat_noncons_temp[v, noncons, i, j] +
-#                     weights[j] * flux_noncons_temp[v, noncons, i, j]
-#             fhat_noncons_temp[v, noncons, i, j + 1] = value
-
-#             fhat2_L[v, i, j + 1] = fhat2_L[v, i, j + 1] + phi[v, noncons, i, j] * value
-#             fhat2_R[v, i, j + 1] = fhat2_R[v, i, j + 1] +
-#                                    phi[v, noncons, i, j + 1] * value
-#         end
-#     end
-
-#     # Apply correction term to the flux-differencing formula for nonconservative local * jump fluxes.
-#     for i in eachnode(dg)
-#         u_0 = get_node_vars(u, equations, dg, i, 1, element)
-#         Ja2_node_0 = get_contravariant_vector(2, contravariant_vectors, i, 1, element)
-
-#         for j in 2:(nnodes(dg) - 1)
-#             u_j = get_node_vars(u, equations, dg, i, j, element)
-#             Ja2_node_j = get_contravariant_vector(2, contravariant_vectors, i, j,
-#                                                   element)
-#             Ja2_avg = 0.5f0 * (Ja2_node_0 + Ja2_node_j)
-
-#             for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
-#                 phi_jump = volume_flux_noncons(u_0, u_j, Ja2_avg, equations,
-#                                                NonConservativeJump(), noncons)
-
-#                 for v in eachvariable(equations)
-#                     # The factor of 2 is missing on each term because Trixi multiplies all the non-cons terms with 0.5
-#                     fhat2_R[v, i, j] -= phi[v, noncons, i, j] * phi_jump[v]
-#                     fhat2_L[v, i, j + 1] -= phi[v, noncons, i, j] * phi_jump[v]
-#                 end
-#             end
-#         end
-#         u_N = get_node_vars(u, equations, dg, i, nnodes(dg), element)
-#         Ja2_node_N = get_contravariant_vector(2, contravariant_vectors, i, nnodes(dg),
-#                                               element)
-#         Ja2_avg = 0.5f0 * (Ja2_node_0 + Ja2_node_N)
-
-#         for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
-#             phi_jump = volume_flux_noncons(u_0, u_N, Ja2_avg, equations,
-#                                            NonConservativeJump(), noncons)
-
-#             for v in eachvariable(equations)
-#                 # The factor of 2 is missing cause Trixi multiplies all the non-cons terms with 0.5
-#                 fhat2_R[v, i, nnodes(dg)] -= phi[v, noncons, i, nnodes(dg)] *
-#                                              phi_jump[v]
-#             end
-#         end
-#     end
-
-#     return nothing
-# end
+@inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, fhat3_L, fhat3_R, u,
+                                mesh::P4estMesh{3},
+                                nonconservative_terms::True, equations,
+                                volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM,
+                                element,
+                                cache) where {
+                                              F_CONS <: Function,
+                                              F_NONCONS <:
+                                              FluxNonConservative{NonConservativeJump()}
+                                              }
+    (; contravariant_vectors) = cache.elements
+    (; weights, derivative_split) = dg.basis
+    (; flux_temp_threaded, flux_nonconservative_temp_threaded) = cache
+    (; fhat_temp_threaded, fhat_nonconservative_temp_threaded, phi_threaded) = cache
+
+    volume_flux_cons, volume_flux_noncons = volume_flux
+
+    flux_temp = flux_temp_threaded[Threads.threadid()]
+    flux_noncons_temp = flux_nonconservative_temp_threaded[Threads.threadid()]
+
+    fhat_temp = fhat_temp_threaded[Threads.threadid()]
+    fhat_noncons_temp = fhat_nonconservative_temp_threaded[Threads.threadid()]
+    phi = phi_threaded[Threads.threadid()]
+
+    # The FV-form fluxes are calculated in a recursive manner, i.e.:
+    # fhat_(0,1)   = w_0 * FVol_0,
+    # fhat_(j,j+1) = fhat_(j-1,j) + w_j * FVol_j,   for j=1,...,N-1,
+    # with the split form volume fluxes FVol_j = -2 * sum_i=0^N D_ji f*_(j,i).
+
+    # To use the symmetry of the `volume_flux`, the split form volume flux is precalculated
+    # like in `calc_volume_integral!` for the `VolumeIntegralFluxDifferencing`
+    # and saved in in `flux_temp`.
+
+    # Split form volume flux in orientation 1: x direction
+    flux_temp .= zero(eltype(flux_temp))
+    flux_noncons_temp .= zero(eltype(flux_noncons_temp))
+
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element) # x direction
+
+        # All diagonal entries of `derivative_split` are zero. Thus, we can skip
+        # the computation of the diagonal terms. In addition, we use the symmetry
+        # of `volume_flux_cons` and `volume_flux_noncons` to save half of the possible two-point flux
+        # computations.
+        for ii in (i + 1):nnodes(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element)
+            # pull the contravariant vectors and compute the average
+            Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j, k,
+                                                   element)
+            Ja1_avg = 0.5f0 * (Ja1_node + Ja1_node_ii)
+
+            # compute the contravariant sharp flux in the direction of the averaged contravariant vector
+            fluxtilde1 = volume_flux_cons(u_node, u_node_ii, Ja1_avg, equations)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[i, ii], fluxtilde1,
+                                       equations, dg, i, j, k)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[ii, i], fluxtilde1,
+                                       equations, dg, ii, j, k)
+            for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+                # We multiply by 0.5 because that is done in other parts of Trixi
+                flux1_noncons = volume_flux_noncons(u_node, u_node_ii, Ja1_avg,
+                                                    equations,
+                                                    NonConservativeJump(), noncons)
+                multiply_add_to_node_vars!(flux_noncons_temp,
+                                           0.5f0 * derivative_split[i, ii],
+                                           flux1_noncons,
+                                           equations, dg, noncons, i, j, k)
+                multiply_add_to_node_vars!(flux_noncons_temp,
+                                           -0.5f0 * derivative_split[ii, i],
+                                           flux1_noncons,
+                                           equations, dg, noncons, ii, j, k)
+            end
+        end
+    end
+
+    # FV-form flux `fhat` in x direction
+    fhat1_L[:, 1, :, :] .= zero(eltype(fhat1_L))
+    fhat1_L[:, nnodes(dg) + 1, :, :] .= zero(eltype(fhat1_L))
+    fhat1_R[:, 1, :, :] .= zero(eltype(fhat1_R))
+    fhat1_R[:, nnodes(dg) + 1, :, :] .= zero(eltype(fhat1_R))
+
+    fhat_temp[:, 1, :, :] .= zero(eltype(fhat1_L))
+    fhat_noncons_temp[:, :, 1, :, :] .= zero(eltype(fhat1_L))
+
+    # Compute local contribution to non-conservative flux
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_local = get_node_vars(u, equations, dg, i, j, k, element)
+        # pull the local contravariant vector
+        Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element)
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+            set_node_vars!(phi,
+                           volume_flux_noncons(u_local, Ja1_node, equations,
+                                               NonConservativeLocal(), noncons),
+                           equations, dg, noncons, i, j, k)
+        end
+    end
+
+    for k in eachnode(dg), j in eachnode(dg), i in 1:(nnodes(dg) - 1)
+        # Conservative part
+        for v in eachvariable(equations)
+            value = fhat_temp[v, i, j, k] + weights[i] * flux_temp[v, i, j, k]
+            fhat_temp[v, i + 1, j, k] = value
+            fhat1_L[v, i + 1, j, k] = value
+            fhat1_R[v, i + 1, j, k] = value
+        end
+        # Nonconservative part
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons),
+            v in eachvariable(equations)
+
+            value = fhat_noncons_temp[v, noncons, i, j, k] +
+                    weights[i] * flux_noncons_temp[v, noncons, i, j, k]
+            fhat_noncons_temp[v, noncons, i + 1, j, k] = value
+
+            fhat1_L[v, i + 1, j, k] = fhat1_L[v, i + 1, j, k] +
+                                      phi[v, noncons, i, j, k] * value
+            fhat1_R[v, i + 1, j, k] = fhat1_R[v, i + 1, j, k] +
+                                      phi[v, noncons, i + 1, j, k] * value
+        end
+    end
+
+    # Apply correction term to the flux-differencing formula for nonconservative local * jump fluxes.
+    for k in eachnode(dg), j in eachnode(dg)
+        u_0 = get_node_vars(u, equations, dg, 1, j, k, element)
+        Ja1_node_0 = get_contravariant_vector(1, contravariant_vectors, 1, j, k,
+                                              element)
+
+        for i in 2:(nnodes(dg) - 1)
+            u_i = get_node_vars(u, equations, dg, i, j, k, element)
+            Ja1_node_i = get_contravariant_vector(1, contravariant_vectors, i, j, k,
+                                                  element)
+            Ja1_avg = 0.5f0 * (Ja1_node_0 + Ja1_node_i)
+
+            for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+                phi_jump = volume_flux_noncons(u_0, u_i, Ja1_avg, equations,
+                                               NonConservativeJump(), noncons)
+
+                for v in eachvariable(equations)
+                    # The factor of 2 is missing on each term because Trixi multiplies all the non-cons terms with 0.5
+                    fhat1_R[v, i, j, k] -= phi[v, noncons, i, j, k] * phi_jump[v]
+                    fhat1_L[v, i + 1, j, k] -= phi[v, noncons, i, j, k] * phi_jump[v]
+                end
+            end
+        end
+        u_N = get_node_vars(u, equations, dg, nnodes(dg), j, k, element)
+        Ja1_node_N = get_contravariant_vector(1, contravariant_vectors, nnodes(dg), j,
+                                              k, element)
+        Ja1_avg = 0.5f0 * (Ja1_node_0 + Ja1_node_N)
+
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+            phi_jump = volume_flux_noncons(u_0, u_N, Ja1_avg, equations,
+                                           NonConservativeJump(), noncons)
+
+            for v in eachvariable(equations)
+                # The factor of 2 is missing because Trixi multiplies all the non-cons terms with 0.5
+                fhat1_R[v, nnodes(dg), j, k] -= phi[v, noncons, nnodes(dg), j, k] *
+                                                phi_jump[v]
+            end
+        end
+    end
+
+    # Split form volume flux in orientation 2: y direction
+    flux_temp .= zero(eltype(flux_temp))
+    flux_noncons_temp .= zero(eltype(flux_noncons_temp))
+
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
+
+        for jj in (j + 1):nnodes(dg)
+            u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element)
+            # pull the contravariant vectors and compute the average
+            Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj, k,
+                                                   element)
+            Ja2_avg = 0.5f0 * (Ja2_node + Ja2_node_jj)
+            # compute the contravariant sharp flux in the direction of the averaged contravariant vector
+            fluxtilde2 = volume_flux_cons(u_node, u_node_jj, Ja2_avg, equations)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[j, jj], fluxtilde2,
+                                       equations, dg, i, j, k)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[jj, j], fluxtilde2,
+                                       equations, dg, i, jj, k)
+            for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+                # We multiply by 0.5 because that is done in other parts of Trixi
+                flux2_noncons = volume_flux_noncons(u_node, u_node_jj, Ja2_avg,
+                                                    equations,
+                                                    NonConservativeJump(), noncons)
+                multiply_add_to_node_vars!(flux_noncons_temp,
+                                           0.5f0 * derivative_split[j, jj],
+                                           flux2_noncons,
+                                           equations, dg, noncons, i, j, k)
+                multiply_add_to_node_vars!(flux_noncons_temp,
+                                           -0.5f0 * derivative_split[jj, j],
+                                           flux2_noncons,
+                                           equations, dg, noncons, i, jj, k)
+            end
+        end
+    end
+
+    # FV-form flux `fhat` in y direction
+    fhat2_L[:, :, 1, :] .= zero(eltype(fhat2_L))
+    fhat2_L[:, :, nnodes(dg) + 1, :] .= zero(eltype(fhat2_L))
+    fhat2_R[:, :, 1, :] .= zero(eltype(fhat2_R))
+    fhat2_R[:, :, nnodes(dg) + 1, :] .= zero(eltype(fhat2_R))
+
+    fhat_temp[:, :, 1, :] .= zero(eltype(fhat1_L))
+    fhat_noncons_temp[:, :, :, 1, :] .= zero(eltype(fhat1_L))
+
+    # Compute local contribution to non-conservative flux
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_local = get_node_vars(u, equations, dg, i, j, k, element)
+        # pull the local contravariant vector
+        Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+            set_node_vars!(phi,
+                           volume_flux_noncons(u_local, Ja2_node, equations,
+                                               NonConservativeLocal(), noncons),
+                           equations, dg, noncons, i, j, k)
+        end
+    end
+
+    for k in eachnode(dg), j in 1:(nnodes(dg) - 1), i in eachnode(dg)
+        # Conservative part
+        for v in eachvariable(equations)
+            value = fhat_temp[v, i, j, k] + weights[j] * flux_temp[v, i, j, k]
+            fhat_temp[v, i, j + 1, k] = value
+            fhat2_L[v, i, j + 1, k] = value
+            fhat2_R[v, i, j + 1, k] = value
+        end
+        # Nonconservative part
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons),
+            v in eachvariable(equations)
+
+            value = fhat_noncons_temp[v, noncons, i, j, k] +
+                    weights[j] * flux_noncons_temp[v, noncons, i, j, k]
+            fhat_noncons_temp[v, noncons, i, j + 1, k] = value
+
+            fhat2_L[v, i, j + 1, k] = fhat2_L[v, i, j + 1, k] +
+                                      phi[v, noncons, i, j, k] * value
+            fhat2_R[v, i, j + 1, k] = fhat2_R[v, i, j + 1, k] +
+                                      phi[v, noncons, i, j + 1, k] * value
+        end
+    end
+
+    # Apply correction term to the flux-differencing formula for nonconservative local * jump fluxes.
+    for k in eachnode(dg), i in eachnode(dg)
+        u_0 = get_node_vars(u, equations, dg, i, 1, k, element)
+        Ja2_node_0 = get_contravariant_vector(2, contravariant_vectors, i, 1, k,
+                                              element)
+
+        for j in 2:(nnodes(dg) - 1)
+            u_j = get_node_vars(u, equations, dg, i, j, k, element)
+            Ja2_node_j = get_contravariant_vector(2, contravariant_vectors, i, j, k,
+                                                  element)
+            Ja2_avg = 0.5f0 * (Ja2_node_0 + Ja2_node_j)
+
+            for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+                phi_jump = volume_flux_noncons(u_0, u_j, Ja2_avg, equations,
+                                               NonConservativeJump(), noncons)
+
+                for v in eachvariable(equations)
+                    # The factor of 2 is missing on each term because Trixi multiplies all the non-cons terms with 0.5
+                    fhat2_R[v, i, j, k] -= phi[v, noncons, i, j, k] * phi_jump[v]
+                    fhat2_L[v, i, j + 1, k] -= phi[v, noncons, i, j, k] * phi_jump[v]
+                end
+            end
+        end
+        u_N = get_node_vars(u, equations, dg, i, nnodes(dg), k, element)
+        Ja2_node_N = get_contravariant_vector(2, contravariant_vectors, i, nnodes(dg),
+                                              k, element)
+        Ja2_avg = 0.5f0 * (Ja2_node_0 + Ja2_node_N)
+
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+            phi_jump = volume_flux_noncons(u_0, u_N, Ja2_avg, equations,
+                                           NonConservativeJump(), noncons)
+
+            for v in eachvariable(equations)
+                # The factor of 2 is missing cause Trixi multiplies all the non-cons terms with 0.5
+                fhat2_R[v, i, nnodes(dg), k] -= phi[v, noncons, i, nnodes(dg), k] *
+                                                phi_jump[v]
+            end
+        end
+    end
+
+    # FV-form flux `fhat` in z direction
+    fhat3_L[:, :, :, 1] .= zero(eltype(fhat3_L))
+    fhat3_L[:, :, :, nnodes(dg) + 1] .= zero(eltype(fhat3_L))
+    fhat3_R[:, :, :, 1] .= zero(eltype(fhat3_R))
+    fhat3_R[:, :, :, nnodes(dg) + 1] .= zero(eltype(fhat3_R))
+
+    fhat_temp[:, :, :, 1] .= zero(eltype(fhat1_L))
+    fhat_noncons_temp[:, :, :, :, 1] .= zero(eltype(fhat1_L))
+
+    # Compute local contribution to non-conservative flux
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_local = get_node_vars(u, equations, dg, i, j, k, element)
+        # pull the local contravariant vector
+        Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+            set_node_vars!(phi,
+                           volume_flux_noncons(u_local, Ja3_node, equations,
+                                               NonConservativeLocal(), noncons),
+                           equations, dg, noncons, i, j, k)
+        end
+    end
+
+    for k in 1:(nnodes(dg) - 1), j in eachnode(dg), i in eachnode(dg)
+        # Conservative part
+        for v in eachvariable(equations)
+            value = fhat_temp[v, i, j, k] + weights[k] * flux_temp[v, i, j, k]
+            fhat_temp[v, i, j, k + 1] = value
+            fhat3_L[v, i, j, k + 1] = value
+            fhat3_R[v, i, j, k + 1] = value
+        end
+        # Nonconservative part
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons),
+            v in eachvariable(equations)
+
+            value = fhat_noncons_temp[v, noncons, i, j, k] +
+                    weights[k] * flux_noncons_temp[v, noncons, i, j, k]
+            fhat_noncons_temp[v, noncons, i, j, k + 1] = value
+
+            fhat3_L[v, i, j, k + 1] = fhat3_L[v, i, j, k + 1] +
+                                      phi[v, noncons, i, j, k] * value
+            fhat3_R[v, i, j, k + 1] = fhat3_R[v, i, j, k + 1] +
+                                      phi[v, noncons, i, j, k + 1] * value
+        end
+    end
+
+    # Apply correction term to the flux-differencing formula for nonconservative local * jump fluxes.
+    for j in eachnode(dg), i in eachnode(dg)
+        u_0 = get_node_vars(u, equations, dg, i, j, 1, element)
+        Ja3_node_0 = get_contravariant_vector(3, contravariant_vectors, i, j, 1,
+                                              element)
+
+        for k in 2:(nnodes(dg) - 1)
+            u_k = get_node_vars(u, equations, dg, i, j, k, element)
+            Ja3_node_k = get_contravariant_vector(3, contravariant_vectors, i, j, k,
+                                                  element)
+            Ja3_avg = 0.5f0 * (Ja3_node_0 + Ja3_node_k)
+
+            for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+                phi_jump = volume_flux_noncons(u_0, u_k, Ja3_avg, equations,
+                                               NonConservativeJump(), noncons)
+
+                for v in eachvariable(equations)
+                    # The factor of 2 is missing on each term because Trixi multiplies all the non-cons terms with 0.5
+                    fhat3_R[v, i, j, k] -= phi[v, noncons, i, j, k] * phi_jump[v]
+                    fhat3_L[v, i, j, k + 1] -= phi[v, noncons, i, j, k] * phi_jump[v]
+                end
+            end
+        end
+        u_N = get_node_vars(u, equations, dg, i, j, nnodes(dg), element)
+        Ja3_node_N = get_contravariant_vector(3, contravariant_vectors, i, j,
+                                              nnodes(dg), element)
+        Ja3_avg = 0.5f0 * (Ja3_node_0 + Ja3_node_N)
+
+        for noncons in 1:n_nonconservative_terms(volume_flux_noncons)
+            phi_jump = volume_flux_noncons(u_0, u_N, Ja3_avg, equations,
+                                           NonConservativeJump(), noncons)
+
+            for v in eachvariable(equations)
+                # The factor of 2 is missing cause Trixi multiplies all the non-cons terms with 0.5
+                fhat3_R[v, i, j, nnodes(dg)] -= phi[v, noncons, i, j, nnodes(dg)] *
+                                                phi_jump[v]
+            end
+        end
+    end
+
+    return nothing
+end
 end # @muladd
diff --git a/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl
index f9b671568d0..1743ae19085 100644
--- a/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl
+++ b/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl
@@ -16,6 +16,7 @@ function create_cache(mesh::P4estMesh{3},
     A4dp1_y = Array{uEltype, 4}
     A4dp1_z = Array{uEltype, 4}
     A4d = Array{uEltype, 4}
+    A5d = Array{uEltype, 5}
 
     fhat1_L_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg) + 1,
                                        nnodes(dg), nnodes(dg))
@@ -44,26 +45,27 @@ function create_cache(mesh::P4estMesh{3},
                                                                  nvariables(equations),
                                                                  nnodes(dg))
 
-    # TODO: nonconservative terms
-    # if have_nonconservative_terms(equations) == true
-    #     # Extract the nonconservative flux as a dispatch argument for `n_nonconservative_terms`
-    #     _, volume_flux_noncons = volume_integral.volume_flux_dg
-
-    #     flux_nonconservative_temp_threaded = A4d[A4d(undef, nvariables(equations),
-    #                                                  n_nonconservative_terms(volume_flux_noncons),
-    #                                                  nnodes(dg), nnodes(dg))
-    #                                              for _ in 1:Threads.nthreads()]
-    #     fhat_nonconservative_temp_threaded = A4d[A4d(undef, nvariables(equations),
-    #                                                  n_nonconservative_terms(volume_flux_noncons),
-    #                                                  nnodes(dg), nnodes(dg))
-    #                                              for _ in 1:Threads.nthreads()]
-    #     phi_threaded = A4d[A4d(undef, nvariables(equations),
-    #                            n_nonconservative_terms(volume_flux_noncons),
-    #                            nnodes(dg), nnodes(dg))
-    #                        for _ in 1:Threads.nthreads()]
-    #     cache = (; cache..., flux_nonconservative_temp_threaded,
-    #              fhat_nonconservative_temp_threaded, phi_threaded)
-    # end
+    if have_nonconservative_terms(equations) == true
+        # Extract the nonconservative flux as a dispatch argument for `n_nonconservative_terms`
+        _, volume_flux_noncons = volume_integral.volume_flux_dg
+
+        flux_nonconservative_temp_threaded = A5d[A5d(undef, nvariables(equations),
+                                                     n_nonconservative_terms(volume_flux_noncons),
+                                                     nnodes(dg), nnodes(dg),
+                                                     nnodes(dg))
+                                                 for _ in 1:Threads.nthreads()]
+        fhat_nonconservative_temp_threaded = A5d[A5d(undef, nvariables(equations),
+                                                     n_nonconservative_terms(volume_flux_noncons),
+                                                     nnodes(dg), nnodes(dg),
+                                                     nnodes(dg))
+                                                 for _ in 1:Threads.nthreads()]
+        phi_threaded = A5d[A5d(undef, nvariables(equations),
+                               n_nonconservative_terms(volume_flux_noncons),
+                               nnodes(dg), nnodes(dg), nnodes(dg))
+                           for _ in 1:Threads.nthreads()]
+        cache = (; cache..., flux_nonconservative_temp_threaded,
+                 fhat_nonconservative_temp_threaded, phi_threaded)
+    end
 
     return (; cache..., antidiffusive_fluxes,
             fhat1_L_threaded, fhat1_R_threaded, fhat2_L_threaded, fhat2_R_threaded,

From 08ebe50afd5b3ecb3fe8b746517b2dcc02e411e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20Rueda-Ram=C3=ADrez?= <aruedara@uni-koeln.de>
Date: Thu, 18 Sep 2025 14:51:44 +0200
Subject: [PATCH 05/18] Added local*symmetric form of the Powell term for 3D

---
 src/equations/ideal_glm_mhd_3d.jl | 361 ++++++++++++++++++++++++++++++
 1 file changed, 361 insertions(+)

diff --git a/src/equations/ideal_glm_mhd_3d.jl b/src/equations/ideal_glm_mhd_3d.jl
index 80d6cae67db..f59a04c4040 100644
--- a/src/equations/ideal_glm_mhd_3d.jl
+++ b/src/equations/ideal_glm_mhd_3d.jl
@@ -326,6 +326,367 @@ end
     return f
 end
 
+# For `VolumeIntegralSubcellLimiting` the nonconservative flux is created as a callable struct to 
+# enable dispatch on the type of the nonconservative term (symmetric / jump).
+"""
+    flux_nonconservative_powell_local_symmetric(u_ll, u_rr,
+                                                orientation::Integer,
+                                                equations::IdealGlmMhdEquations3D)
+    flux_nonconservative_powell_local_symmetric(u_ll, u_rr,
+                                                normal_direction::AbstractVector,
+                                                equations::IdealGlmMhdEquations3D)
+
+Non-symmetric two-point flux discretizing the nonconservative (source) term of
+Powell and the Galilean nonconservative term associated with the GLM multiplier
+of the [`IdealGlmMhdEquations3D`](@ref).
+
+This implementation uses a non-conservative term that can be written as the product
+of local and symmetric parts. It is equivalent to the non-conservative flux of Bohm
+et al. [`flux_nonconservative_powell`](@ref) for conforming meshes but it yields different
+results on non-conforming meshes(!). On curvilinear meshes this formulation applies the
+local normal direction compared to the averaged one used in [`flux_nonconservative_powell`](@ref).
+
+The two other flux functions with the same name return either the local
+or symmetric portion of the non-conservative flux based on the type of the
+nonconservative_type argument, employing multiple dispatch. They are used to
+compute the subcell fluxes in dg_2d_subcell_limiters.jl.
+
+## References
+- Rueda-Ramírez, Gassner (2023). A Flux-Differencing Formula for Split-Form Summation By Parts
+  Discretizations of Non-Conservative Systems. https://arxiv.org/pdf/2211.14009.pdf.
+"""
+@inline function (noncons_flux::FluxNonConservativePowellLocalSymmetric)(u_ll, u_rr,
+                                                                         orientation::Integer,
+                                                                         equations::IdealGlmMhdEquations3D)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
+
+    # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
+    # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2})
+    psi_avg = (psi_ll + psi_rr) #* 0.5 # The flux is already multiplied by 0.5 wherever it is used in the code
+    if orientation == 1
+        B1_avg = (B1_ll + B1_rr) #* 0.5 # The flux is already multiplied by 0.5 wherever it is used in the code
+        f = SVector(0,
+                    B1_ll * B1_avg,
+                    B2_ll * B1_avg,
+                    B3_ll * B1_avg,
+                    v_dot_B_ll * B1_avg + v1_ll * psi_ll * psi_avg,
+                    v1_ll * B1_avg,
+                    v2_ll * B1_avg,
+                    v3_ll * B1_avg,
+                    v1_ll * psi_avg)
+    elseif orientation == 2
+        B2_avg = (B2_ll + B2_rr) #* 0.5 # The flux is already multiplied by 0.5 wherever it is used in the code
+        f = SVector(0,
+                    B1_ll * B2_avg,
+                    B2_ll * B2_avg,
+                    B3_ll * B2_avg,
+                    v_dot_B_ll * B2_avg + v2_ll * psi_ll * psi_avg,
+                    v1_ll * B2_avg,
+                    v2_ll * B2_avg,
+                    v3_ll * B2_avg,
+                    v2_ll * psi_avg)
+    else # orientation == 3
+        B3_avg = (B3_ll + B3_rr) #* 0.5 # The flux is already multiplied by 0.5 wherever it is used in the code
+        f = SVector(0,
+                    B1_ll * B3_avg,
+                    B2_ll * B3_avg,
+                    B3_ll * B3_avg,
+                    v_dot_B_ll * B3_avg + v3_ll * psi_ll * psi_avg,
+                    v1_ll * B3_avg,
+                    v2_ll * B3_avg,
+                    v3_ll * B3_avg,
+                    v3_ll * psi_avg)
+    end
+
+    return f
+end
+
+@inline function (noncons_flux::FluxNonConservativePowellLocalSymmetric)(u_ll, u_rr,
+                                                                         normal_direction::AbstractVector,
+                                                                         equations::IdealGlmMhdEquations3D)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
+
+    # The factor 0.5 of the averages can be omitted since it is already applied when this
+    # function is called.
+    psi_avg = (psi_ll + psi_rr)
+    B1_avg = (B1_ll + B1_rr)
+    B2_avg = (B2_ll + B2_rr)
+    B3_avg = (B3_ll + B3_rr)
+
+    B_dot_n_avg = B1_avg * normal_direction[1] + B2_avg * normal_direction[2] +
+                  B3_avg * normal_direction[3]
+    v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] +
+                 v3_ll * normal_direction[3]
+
+    # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
+    # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2,3}, 0, 0, 0, v_{1,2,3})
+    f = SVector(0,
+                B1_ll * B_dot_n_avg,
+                B2_ll * B_dot_n_avg,
+                B3_ll * B_dot_n_avg,
+                v_dot_B_ll * B_dot_n_avg + v_dot_n_ll * psi_ll * psi_avg,
+                v1_ll * B_dot_n_avg,
+                v2_ll * B_dot_n_avg,
+                v3_ll * B_dot_n_avg,
+                v_dot_n_ll * psi_avg)
+
+    return f
+end
+
+"""
+    flux_nonconservative_powell_local_symmetric(u_ll, orientation::Integer,
+                                                equations::IdealGlmMhdEquations3D,
+                                                nonconservative_type::NonConservativeLocal,
+                                                nonconservative_term::Integer)
+    flux_nonconservative_powell_local_symmetric(u_ll, normal_direction_ll::AbstractVector,
+                                                equations::IdealGlmMhdEquations3D,
+                                                nonconservative_type::NonConservativeLocal,
+                                                nonconservative_term::Integer)
+
+Local part of the Powell and GLM non-conservative terms. Needed for the calculation of
+the non-conservative staggered "fluxes" for subcell limiting. See, e.g.,
+- Rueda-Ramírez, Gassner (2023). A Flux-Differencing Formula for Split-Form Summation By Parts
+  Discretizations of Non-Conservative Systems. https://arxiv.org/pdf/2211.14009.pdf.
+This function is used to compute the subcell fluxes in dg_2d_subcell_limiters.jl.
+"""
+@inline function (noncons_flux::FluxNonConservativePowellLocalSymmetric)(u_ll,
+                                                                         orientation::Integer,
+                                                                         equations::IdealGlmMhdEquations3D,
+                                                                         nonconservative_type::NonConservativeLocal,
+                                                                         nonconservative_term::Integer)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+
+    if nonconservative_term == 1
+        # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
+        v1_ll = rho_v1_ll / rho_ll
+        v2_ll = rho_v2_ll / rho_ll
+        v3_ll = rho_v3_ll / rho_ll
+        v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
+        f = SVector(0,
+                    B1_ll,
+                    B2_ll,
+                    B3_ll,
+                    v_dot_B_ll,
+                    v1_ll,
+                    v2_ll,
+                    v3_ll,
+                    0)
+    else #nonconservative_term ==2
+        # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2})
+        if orientation == 1
+            v1_ll = rho_v1_ll / rho_ll
+            f = SVector(0,
+                        0,
+                        0,
+                        0,
+                        v1_ll * psi_ll,
+                        0,
+                        0,
+                        0,
+                        v1_ll)
+        elseif orientation == 2
+            v2_ll = rho_v2_ll / rho_ll
+            f = SVector(0,
+                        0,
+                        0,
+                        0,
+                        v2_ll * psi_ll,
+                        0,
+                        0,
+                        0,
+                        v2_ll)
+        else #orientation == 3
+            v3_ll = rho_v3_ll / rho_ll
+            f = SVector(0,
+                        0,
+                        0,
+                        0,
+                        v3_ll * psi_ll,
+                        0,
+                        0,
+                        0,
+                        v3_ll)
+        end
+    end
+    return f
+end
+
+@inline function (noncons_flux::FluxNonConservativePowellLocalSymmetric)(u_ll,
+                                                                         normal_direction_ll::AbstractVector,
+                                                                         equations::IdealGlmMhdEquations3D,
+                                                                         nonconservative_type::NonConservativeLocal,
+                                                                         nonconservative_term::Integer)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+
+    if nonconservative_term == 1
+        # Powell nonconservative term: (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
+        v1_ll = rho_v1_ll / rho_ll
+        v2_ll = rho_v2_ll / rho_ll
+        v3_ll = rho_v3_ll / rho_ll
+        v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
+
+        f = SVector(0,
+                    B1_ll,
+                    B2_ll,
+                    B3_ll,
+                    v_dot_B_ll,
+                    v1_ll,
+                    v2_ll,
+                    v3_ll,
+                    0)
+    else # nonconservative_term == 2
+        # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2})
+        v1_ll = rho_v1_ll / rho_ll
+        v2_ll = rho_v2_ll / rho_ll
+        v3_ll = rho_v3_ll / rho_ll
+        v_dot_n_ll = v1_ll * normal_direction_ll[1] + v2_ll * normal_direction_ll[2] +
+                     v3_ll * normal_direction_ll[3]
+
+        f = SVector(0,
+                    0,
+                    0,
+                    0,
+                    v_dot_n_ll * psi_ll,
+                    0,
+                    0,
+                    0,
+                    v_dot_n_ll)
+    end
+    return f
+end
+
+"""
+    flux_nonconservative_powell_local_symmetric(u_ll, orientation::Integer,
+                                                equations::IdealGlmMhdEquations3D,
+                                                nonconservative_type::NonConservativeSymmetric,
+                                                nonconservative_term::Integer)
+    flux_nonconservative_powell_local_symmetric(u_ll, normal_direction_avg::AbstractVector,
+                                                equations::IdealGlmMhdEquations3D,
+                                                nonconservative_type::NonConservativeSymmetric,
+                                                nonconservative_term::Integer)
+
+Symmetric part of the Powell and GLM non-conservative terms. Needed for the calculation of
+the non-conservative staggered "fluxes" for subcell limiting. See, e.g.,
+- Rueda-Ramírez, Gassner (2023). A Flux-Differencing Formula for Split-Form Summation By Parts
+  Discretizations of Non-Conservative Systems. https://arxiv.org/pdf/2211.14009.pdf.
+This function is used to compute the subcell fluxes in dg_2d_subcell_limiters.jl.
+"""
+@inline function (noncons_flux::FluxNonConservativePowellLocalSymmetric)(u_ll, u_rr,
+                                                                         orientation::Integer,
+                                                                         equations::IdealGlmMhdEquations3D,
+                                                                         nonconservative_type::NonConservativeSymmetric,
+                                                                         nonconservative_term::Integer)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+
+    if nonconservative_term == 1
+        # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
+        if orientation == 1
+            B1_avg = (B1_ll + B1_rr)#* 0.5 # The flux is already multiplied by 0.5 wherever it is used in the code
+            f = SVector(0,
+                        B1_avg,
+                        B1_avg,
+                        B1_avg,
+                        B1_avg,
+                        B1_avg,
+                        B1_avg,
+                        B1_avg,
+                        0)
+        elseif orientation == 2
+            B2_avg = (B2_ll + B2_rr)#* 0.5 # The flux is already multiplied by 0.5 wherever it is used in the code
+            f = SVector(0,
+                        B2_avg,
+                        B2_avg,
+                        B2_avg,
+                        B2_avg,
+                        B2_avg,
+                        B2_avg,
+                        B2_avg,
+                        0)
+        else # orientation == 3
+            B3_avg = (B3_ll + B3_rr)#* 0.5 # The flux is already multiplied by 0.5 wherever it is used in the code
+            f = SVector(0,
+                        B3_avg,
+                        B3_avg,
+                        B3_avg,
+                        B3_avg,
+                        B3_avg,
+                        B3_avg,
+                        B3_avg,
+                        0)
+        end
+    else #nonconservative_term == 2
+        # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2})
+        psi_avg = (psi_ll + psi_rr)#* 0.5 # The flux is already multiplied by 0.5 wherever it is used in the code
+        f = SVector(0,
+                    0,
+                    0,
+                    0,
+                    psi_avg,
+                    0,
+                    0,
+                    0,
+                    psi_avg)
+    end
+
+    return f
+end
+
+@inline function (noncons_flux::FluxNonConservativePowellLocalSymmetric)(u_ll, u_rr,
+                                                                         normal_direction_avg::AbstractVector,
+                                                                         equations::IdealGlmMhdEquations3D,
+                                                                         nonconservative_type::NonConservativeSymmetric,
+                                                                         nonconservative_term::Integer)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+
+    if nonconservative_term == 1
+        # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
+        # The factor 0.5 of the average can be omitted since it is already applied when this
+        # function is called.
+        B_dot_n_avg = ((B1_ll + B1_rr) * normal_direction_avg[1] +
+                       (B2_ll + B2_rr) * normal_direction_avg[2] +
+                       (B3_ll + B3_rr) * normal_direction_avg[3])
+        f = SVector(0,
+                    B_dot_n_avg,
+                    B_dot_n_avg,
+                    B_dot_n_avg,
+                    B_dot_n_avg,
+                    B_dot_n_avg,
+                    B_dot_n_avg,
+                    B_dot_n_avg,
+                    0)
+    else # nonconservative_term == 2
+        # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2})
+        # The factor 0.5 of the average can be omitted since it is already applied when this
+        # function is called.
+        psi_avg = (psi_ll + psi_rr)
+        f = SVector(0,
+                    0,
+                    0,
+                    0,
+                    psi_avg,
+                    0,
+                    0,
+                    0,
+                    psi_avg)
+    end
+
+    return f
+end
+
 """
     flux_derigs_etal(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations3D)
 

From 139300e2da2d3cf0da62c4ca577776fbce612247 Mon Sep 17 00:00:00 2001
From: bennibolm <benjamin.bolm@gmx.de>
Date: Thu, 18 Sep 2025 16:05:59 +0200
Subject: [PATCH 06/18] Add support for nonconservative terms; add test

---
 .../elixir_mhd_shockcapturing_subcell.jl      | 68 +++++++-------
 .../dgsem_tree/dg_3d_subcell_limiters.jl      | 93 +++++++++++--------
 test/test_p4est_3d.jl                         | 39 ++++++++
 3 files changed, 129 insertions(+), 71 deletions(-)

diff --git a/examples/p4est_3d_dgsem/elixir_mhd_shockcapturing_subcell.jl b/examples/p4est_3d_dgsem/elixir_mhd_shockcapturing_subcell.jl
index 7bc6c775232..9aa4896afac 100644
--- a/examples/p4est_3d_dgsem/elixir_mhd_shockcapturing_subcell.jl
+++ b/examples/p4est_3d_dgsem/elixir_mhd_shockcapturing_subcell.jl
@@ -34,45 +34,51 @@ function initial_condition_blast_wave(x, t, equations::IdealGlmMhdEquations3D)
 end
 initial_condition = initial_condition_blast_wave
 
-# Up to version 0.13.0, `max_abs_speed_naive` was used as the default wave speed estimate of
-# `const flux_lax_friedrichs = FluxLaxFriedrichs(), i.e., `FluxLaxFriedrichs(max_abs_speed = max_abs_speed_naive)`.
-# In the `StepsizeCallback`, though, the less diffusive `max_abs_speeds` is employed which is consistent with `max_abs_speed`.
-# Thus, we exchanged in PR#2458 the default wave speed used in the LLF flux to `max_abs_speed`.
-# To ensure that every example still runs we specify explicitly `FluxLaxFriedrichs(max_abs_speed_naive)`.
-# We remark, however, that the now default `max_abs_speed` is in general recommended due to compliance with the
-# `StepsizeCallback` (CFL-Condition) and less diffusion.
-surface_flux = (FluxLaxFriedrichs(max_abs_speed_naive),
-                flux_nonconservative_powell_local_symmetric)
-# volume_flux = (flux_derigs_etal, flux_nonconservative_powell_local_symmetric)
-volume_flux = (flux_central, flux_nonconservative_powell_local_symmetric)
-
-# surface_flux = (FluxLaxFriedrichs(max_abs_speed_naive), flux_nonconservative_powell)
-# volume_flux = (flux_hindenlang_gassner, flux_nonconservative_powell)
-
-surface_flux = (FluxLaxFriedrichs(max_abs_speed_naive), flux_nonconservative_powell)
-volume_flux = (flux_hindenlang_gassner, flux_nonconservative_powell)
-
-# TODO: Test with working fluxes
-
-basis = LobattoLegendreBasis(3)
-
+surface_flux = (flux_lax_friedrichs, flux_nonconservative_powell_local_symmetric)
+volume_flux = (flux_hindenlang_gassner, flux_nonconservative_powell_local_symmetric)
+polydeg = 3
+basis = LobattoLegendreBasis(polydeg)
 limiter_idp = SubcellLimiterIDP(equations, basis;
                                 positivity_variables_cons = ["rho"],
-                                positivity_variables_nonlinear = [pressure],
-                                positivity_correction_factor = 0.1)
+                                positivity_variables_nonlinear = [pressure])
 volume_integral = VolumeIntegralSubcellLimiting(limiter_idp;
                                                 volume_flux_dg = volume_flux,
                                                 volume_flux_fv = surface_flux)
 solver = DGSEM(basis, surface_flux, volume_integral)
 
-coordinates_min = (-0.5, -0.5, -0.5)
-coordinates_max = (0.5, 0.5, 0.5)
+# Mapping as described in https://arxiv.org/abs/2012.12040 but with slightly less warping.
+# The mapping will be interpolated at tree level, and then refined without changing
+# the geometry interpolant.
+function mapping(xi_, eta_, zeta_)
+    # Transform input variables between -1 and 1 onto [0,3]
+    xi = 1.5 * xi_ + 1.5
+    eta = 1.5 * eta_ + 1.5
+    zeta = 1.5 * zeta_ + 1.5
+
+    y = eta +
+        3 / 11 * (cos(1.5 * pi * (2 * xi - 3) / 3) *
+         cos(0.5 * pi * (2 * eta - 3) / 3) *
+         cos(0.5 * pi * (2 * zeta - 3) / 3))
+
+    x = xi +
+        3 / 11 * (cos(0.5 * pi * (2 * xi - 3) / 3) *
+         cos(2 * pi * (2 * y - 3) / 3) *
+         cos(0.5 * pi * (2 * zeta - 3) / 3))
+
+    z = zeta +
+        3 / 11 * (cos(0.5 * pi * (2 * x - 3) / 3) *
+         cos(pi * (2 * y - 3) / 3) *
+         cos(0.5 * pi * (2 * zeta - 3) / 3))
+
+    return SVector(x, y, z)
+end
+
 trees_per_dimension = (2, 2, 2)
 mesh = P4estMesh(trees_per_dimension,
                  polydeg = 3,
-                 #  mapping = mapping,
-                 coordinates_min = coordinates_min,
-                 coordinates_max = coordinates_max,
+                 mapping = mapping,
+                 # coordinates_min = (0.0, 0.0, 0.0),
+                 # coordinates_max = (3.0, 3.0, 3.0),
                  initial_refinement_level = 2,
                  periodicity = true)
 
@@ -81,7 +87,7 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
 ###############################################################################
 # ODE solvers, callbacks etc.
 
-tspan = (0.0, 0.1)
+tspan = (0.0, 0.5)
 ode = semidiscretize(semi, tspan)
 
 summary_callback = SummaryCallback()
@@ -97,7 +103,7 @@ save_solution = SaveSolutionCallback(interval = 100,
                                      solution_variables = cons2prim,
                                      extra_node_variables = (:limiting_coefficient,))
 
-cfl = 0.4
+cfl = 0.9
 stepsize_callback = StepsizeCallback(cfl = cfl)
 
 glm_speed_callback = GlmSpeedCallback(glm_scale = 0.5, cfl = cfl)
diff --git a/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl
index 1743ae19085..c1e482d087f 100644
--- a/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl
+++ b/src/solvers/dgsem_tree/dg_3d_subcell_limiters.jl
@@ -183,44 +183,57 @@ end
     return nothing
 end
 
-# TODO: 2d version for now.
-# # Calculate the antidiffusive flux `antidiffusive_flux` as the subtraction between `fhat` and `fstar` for conservative systems.
-# @inline function calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R,
-#                                          fstar1_L, fstar1_R, fstar2_L, fstar2_R,
-#                                          u,
-#                                          mesh::Union{TreeMesh{2}, StructuredMesh{2},
-#                                                      P4estMesh{2}},
-#                                          nonconservative_terms::True, equations,
-#                                          limiter::SubcellLimiterIDP, dg, element, cache)
-#     @unpack antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R = cache.antidiffusive_fluxes
-
-#     for j in eachnode(dg), i in 2:nnodes(dg)
-#         for v in eachvariable(equations)
-#             antidiffusive_flux1_L[v, i, j, element] = fhat1_L[v, i, j] -
-#                                                       fstar1_L[v, i, j]
-#             antidiffusive_flux1_R[v, i, j, element] = fhat1_R[v, i, j] -
-#                                                       fstar1_R[v, i, j]
-#         end
-#     end
-#     for j in 2:nnodes(dg), i in eachnode(dg)
-#         for v in eachvariable(equations)
-#             antidiffusive_flux2_L[v, i, j, element] = fhat2_L[v, i, j] -
-#                                                       fstar2_L[v, i, j]
-#             antidiffusive_flux2_R[v, i, j, element] = fhat2_R[v, i, j] -
-#                                                       fstar2_R[v, i, j]
-#         end
-#     end
-
-#     antidiffusive_flux1_L[:, 1, :, element] .= zero(eltype(antidiffusive_flux1_L))
-#     antidiffusive_flux1_L[:, nnodes(dg) + 1, :, element] .= zero(eltype(antidiffusive_flux1_L))
-#     antidiffusive_flux1_R[:, 1, :, element] .= zero(eltype(antidiffusive_flux1_R))
-#     antidiffusive_flux1_R[:, nnodes(dg) + 1, :, element] .= zero(eltype(antidiffusive_flux1_R))
-
-#     antidiffusive_flux2_L[:, :, 1, element] .= zero(eltype(antidiffusive_flux2_L))
-#     antidiffusive_flux2_L[:, :, nnodes(dg) + 1, element] .= zero(eltype(antidiffusive_flux2_L))
-#     antidiffusive_flux2_R[:, :, 1, element] .= zero(eltype(antidiffusive_flux2_R))
-#     antidiffusive_flux2_R[:, :, nnodes(dg) + 1, element] .= zero(eltype(antidiffusive_flux2_R))
-
-#     return nothing
-# end
+# Calculate the antidiffusive flux `antidiffusive_flux` as the subtraction between `fhat` and `fstar` for conservative systems.
+@inline function calcflux_antidiffusive!(fhat1_L, fhat1_R, fhat2_L, fhat2_R,
+                                         fhat3_L, fhat3_R,
+                                         fstar1_L, fstar1_R, fstar2_L, fstar2_R,
+                                         fstar3_L, fstar3_R,
+                                         u,
+                                         mesh::P4estMesh{3},
+                                         nonconservative_terms::True, equations,
+                                         limiter::SubcellLimiterIDP, dg, element, cache)
+    @unpack antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R, antidiffusive_flux3_L, antidiffusive_flux3_R = cache.antidiffusive_fluxes
+
+    for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg)
+        for v in eachvariable(equations)
+            antidiffusive_flux1_L[v, i, j, k, element] = fhat1_L[v, i, j, k] -
+                                                         fstar1_L[v, i, j, k]
+            antidiffusive_flux1_R[v, i, j, k, element] = fhat1_R[v, i, j, k] -
+                                                         fstar1_R[v, i, j, k]
+        end
+    end
+    for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg)
+        for v in eachvariable(equations)
+            antidiffusive_flux2_L[v, i, j, k, element] = fhat2_L[v, i, j, k] -
+                                                         fstar2_L[v, i, j, k]
+            antidiffusive_flux2_R[v, i, j, k, element] = fhat2_R[v, i, j, k] -
+                                                         fstar2_R[v, i, j, k]
+        end
+    end
+    for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg)
+        for v in eachvariable(equations)
+            antidiffusive_flux3_L[v, i, j, k, element] = fhat3_L[v, i, j, k] -
+                                                         fstar3_L[v, i, j, k]
+            antidiffusive_flux3_R[v, i, j, k, element] = fhat3_R[v, i, j, k] -
+                                                         fstar3_R[v, i, j, k]
+        end
+    end
+
+    antidiffusive_flux1_L[:, 1, :, :, element] .= zero(eltype(antidiffusive_flux1_L))
+    antidiffusive_flux1_L[:, nnodes(dg) + 1, :, :, element] .= zero(eltype(antidiffusive_flux1_L))
+    antidiffusive_flux1_R[:, 1, :, :, element] .= zero(eltype(antidiffusive_flux1_R))
+    antidiffusive_flux1_R[:, nnodes(dg) + 1, :, :, element] .= zero(eltype(antidiffusive_flux1_R))
+
+    antidiffusive_flux2_L[:, :, 1, :, element] .= zero(eltype(antidiffusive_flux2_L))
+    antidiffusive_flux2_L[:, :, nnodes(dg) + 1, :, element] .= zero(eltype(antidiffusive_flux2_L))
+    antidiffusive_flux2_R[:, :, 1, :, element] .= zero(eltype(antidiffusive_flux2_R))
+    antidiffusive_flux2_R[:, :, nnodes(dg) + 1, :, element] .= zero(eltype(antidiffusive_flux2_R))
+
+    antidiffusive_flux3_L[:, :, :, 1, element] .= zero(eltype(antidiffusive_flux3_L))
+    antidiffusive_flux3_L[:, :, :, nnodes(dg) + 1, element] .= zero(eltype(antidiffusive_flux3_L))
+    antidiffusive_flux3_R[:, :, :, 1, element] .= zero(eltype(antidiffusive_flux3_R))
+    antidiffusive_flux3_R[:, :, :, nnodes(dg) + 1, element] .= zero(eltype(antidiffusive_flux3_R))
+
+    return nothing
+end
 end # @muladd
diff --git a/test/test_p4est_3d.jl b/test/test_p4est_3d.jl
index cdce79d0d7c..6711af6a05b 100644
--- a/test/test_p4est_3d.jl
+++ b/test/test_p4est_3d.jl
@@ -825,6 +825,45 @@ end
     end
 end
 
+@trixi_testset "elixir_mhd_shockcapturing_subcell.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_shockcapturing_subcell.jl"),
+                        l2=[
+                            0.0059340191538310005,
+                            0.006283749821992117,
+                            0.00776614780511013,
+                            0.006308928588096081,
+                            0.02307409839907803,
+                            0.005395582058152679,
+                            0.007206446732909664,
+                            0.0054239694752144145,
+                            1.0267069826457686e-5
+                        ],
+                        linf=[
+                            0.26892628360831483,
+                            0.23437156515448437,
+                            0.3609031724258315,
+                            0.22466728194150376,
+                            0.8703707153009601,
+                            0.2442543980664369,
+                            0.21250673584918245,
+                            0.23503747011075915,
+                            0.0011551893939651886
+                        ],
+                        tspan=(0.0, 0.04))
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        # Larger values for allowed allocations due to usage of custom
+        # integrator which are not *recorded* for the methods from
+        # OrdinaryDiffEq.jl
+        # Corresponding issue: https://github.com/trixi-framework/Trixi.jl/issues/1877
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 15000
+    end
+end
+
 @trixi_testset "elixir_mhd_amr_entropy_bounded.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_amr_entropy_bounded.jl"),
                         l2=[

From d69017c5cea958ecd7a6a142b264844af68f96d9 Mon Sep 17 00:00:00 2001
From: bennibolm <benjamin.bolm@gmx.de>
Date: Thu, 18 Sep 2025 16:40:32 +0200
Subject: [PATCH 07/18] Add `isvalid` for 3D MHD

---
 src/equations/ideal_glm_mhd_3d.jl | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/equations/ideal_glm_mhd_3d.jl b/src/equations/ideal_glm_mhd_3d.jl
index f59a04c4040..ab111db7821 100644
--- a/src/equations/ideal_glm_mhd_3d.jl
+++ b/src/equations/ideal_glm_mhd_3d.jl
@@ -326,7 +326,7 @@ end
     return f
 end
 
-# For `VolumeIntegralSubcellLimiting` the nonconservative flux is created as a callable struct to 
+# For `VolumeIntegralSubcellLimiting` the nonconservative flux is created as a callable struct to
 # enable dispatch on the type of the nonconservative term (symmetric / jump).
 """
     flux_nonconservative_powell_local_symmetric(u_ll, u_rr,
@@ -1722,6 +1722,15 @@ end
             cons[9]^2 / 2)
 end
 
+# State validation for Newton-bisection method of subcell IDP limiting
+@inline function Base.isvalid(u, equations::IdealGlmMhdEquations2D)
+    p = pressure(u, equations)
+    if u[1] <= 0 || p <= 0
+        return false
+    end
+    return true
+end
+
 # Calculate the cross helicity (\vec{v}⋅\vec{B}) for a conservative state `cons'
 @inline function cross_helicity(cons, ::IdealGlmMhdEquations3D)
     return (cons[2] * cons[6] + cons[3] * cons[7] + cons[4] * cons[8]) / cons[1]

From b2fcff1a858701a009f244a9957195b1a72d499f Mon Sep 17 00:00:00 2001
From: bennibolm <benjamin.bolm@gmx.de>
Date: Thu, 18 Sep 2025 17:08:17 +0200
Subject: [PATCH 08/18] Fix typo

---
 src/equations/ideal_glm_mhd_3d.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/equations/ideal_glm_mhd_3d.jl b/src/equations/ideal_glm_mhd_3d.jl
index ab111db7821..5a60e1952a4 100644
--- a/src/equations/ideal_glm_mhd_3d.jl
+++ b/src/equations/ideal_glm_mhd_3d.jl
@@ -1723,7 +1723,7 @@ end
 end
 
 # State validation for Newton-bisection method of subcell IDP limiting
-@inline function Base.isvalid(u, equations::IdealGlmMhdEquations2D)
+@inline function Base.isvalid(u, equations::IdealGlmMhdEquations3D)
     p = pressure(u, equations)
     if u[1] <= 0 || p <= 0
         return false

From f8a6320e535cbb3b5c03fed182349a685a6fdde0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20Rueda-Ram=C3=ADrez?= <aruedara@uni-koeln.de>
Date: Thu, 18 Sep 2025 18:44:41 +0200
Subject: [PATCH 09/18] Added subcell-limiting-ready 3D fluxes for multi-ion
 MHD (P4estMesh)

---
 src/equations/ideal_glm_mhd_multiion_3d.jl | 307 ++++++++++++++++++++-
 1 file changed, 301 insertions(+), 6 deletions(-)

diff --git a/src/equations/ideal_glm_mhd_multiion_3d.jl b/src/equations/ideal_glm_mhd_multiion_3d.jl
index f5a88ca4192..23d04d71bd1 100644
--- a/src/equations/ideal_glm_mhd_multiion_3d.jl
+++ b/src/equations/ideal_glm_mhd_multiion_3d.jl
@@ -304,6 +304,16 @@ end
     return SVector(f)
 end
 
+# For `VolumeIntegralSubcellLimiting` the nonconservative flux is created as a callable struct to 
+# enable dispatch on the type of the nonconservative term (symmetric / jump).
+struct FluxNonConservativeRuedaRamirezEtAl <:
+       FluxNonConservative{NonConservativeSymmetric()}
+end
+
+n_nonconservative_terms(::FluxNonConservativeRuedaRamirezEtAl) = 6
+
+const flux_nonconservative_ruedaramirez_etal = FluxNonConservativeRuedaRamirezEtAl()
+
 """
     flux_nonconservative_ruedaramirez_etal(u_ll, u_rr,
                                            orientation_or_normal_direction,
@@ -329,9 +339,9 @@ The term is composed of four individual non-conservative terms:
 3. The "multi-ion" term, which vanishes in the limit of one ion species.
 4. The GLM term, which is needed for Galilean invariance.
 """
-@inline function flux_nonconservative_ruedaramirez_etal(u_ll, u_rr,
-                                                        orientation::Integer,
-                                                        equations::IdealGlmMhdMultiIonEquations3D)
+@inline function (noncons_flux::FluxNonConservativeRuedaRamirezEtAl)(u_ll, u_rr,
+                                                                     orientation::Integer,
+                                                                     equations::IdealGlmMhdMultiIonEquations3D)
     @unpack charge_to_mass = equations
     # Unpack left and right states to get the magnetic field
     B1_ll, B2_ll, B3_ll = magnetic_field(u_ll, equations)
@@ -514,9 +524,9 @@ The term is composed of four individual non-conservative terms:
     return SVector(f)
 end
 
-@inline function flux_nonconservative_ruedaramirez_etal(u_ll, u_rr,
-                                                        normal_direction::AbstractVector,
-                                                        equations::IdealGlmMhdMultiIonEquations3D)
+@inline function (noncons_flux::FluxNonConservativeRuedaRamirezEtAl)(u_ll, u_rr,
+                                                                     normal_direction::AbstractVector,
+                                                                     equations::IdealGlmMhdMultiIonEquations3D)
     @unpack charge_to_mass = equations
     # Unpack left and right states to get the magnetic field
     B1_ll, B2_ll, B3_ll = magnetic_field(u_ll, equations)
@@ -628,6 +638,291 @@ end
     return SVector(f)
 end
 
+"""
+    flux_nonconservative_ruedaramirez_etal(u_ll, normal_direction_ll::AbstractVector,
+                                           equations::IdealGlmMhdEquations2D,
+                                           nonconservative_type::NonConservativeLocal,
+                                           nonconservative_term::Integer)
+
+Non-symmetric local part of the non-conservative terms for multi-ion MHD. Needed for the calculation of
+the non-conservative staggered "fluxes" for subcell limiting. See, e.g.,
+- Rueda-Ramírez, Gassner (2023). A Flux-Differencing Formula for Split-Form Summation By Parts
+  Discretizations of Non-Conservative Systems. https://arxiv.org/pdf/2211.14009.pdf.
+This function is used to compute the subcell fluxes in dg_3d_subcell_limiters.jl.
+
+On curvilinear meshes this formulation applies the local normal direction compared to the averaged one used 
+in [`flux_nonconservative_ruedaramirez_etal`](@ref) when used for volume fluxes. This is done to reduce the 
+number of operations to obtain the subcell limiting fluxes. However, this decision causes this flux to be 
+"slightly" different when used for subcell limiting or for a flux-differencing DG method.
+"""
+@inline function (noncons_flux::FluxNonConservativeRuedaRamirezEtAl)(u_ll,
+                                                                     normal_direction::AbstractVector,
+                                                                     equations::IdealGlmMhdEquations2D,
+                                                                     nonconservative_type::NonConservativeLocal,
+                                                                     nonconservative_term::Integer)
+    @unpack charge_to_mass = equations
+    # Unpack left and right states to get the magnetic field
+    B1_ll, B2_ll, B3_ll = magnetic_field(u_ll, equations)
+    psi_ll = divergence_cleaning_field(u_ll, equations)
+
+    # Compute charge ratio of u_ll
+    charge_ratio_ll = zero(MVector{ncomponents(equations), eltype(u_ll)})
+    total_electron_charge = zero(eltype(u_ll))
+    for k in eachcomponent(equations)
+        rho_k = u_ll[3 + (k - 1) * 5 + 1] # Extract densities from conserved variable vector
+        charge_ratio_ll[k] = rho_k * charge_to_mass[k]
+        total_electron_charge += charge_ratio_ll[k]
+    end
+    charge_ratio_ll ./= total_electron_charge
+
+    # Compute auxiliary variables
+    v1_plus_ll, v2_plus_ll, v3_plus_ll, vk1_plus_ll, vk2_plus_ll, vk3_plus_ll = charge_averaged_velocities(u_ll,
+                                                                                                           equations)
+
+    f = zero(MVector{nvariables(equations), eltype(u_ll)})
+
+    if nonconservative_term == 1
+        f[1] = v1_plus_ll
+        f[2] = v2_plus_ll
+        f[3] = v3_plus_ll
+
+        for k in eachcomponent(equations)
+            # Compute Godunov-Powell term
+            f2 = charge_ratio_ll[k] * B1_ll
+            f3 = charge_ratio_ll[k] * B2_ll
+            f4 = charge_ratio_ll[k] * B3_ll
+            f5 = (v1_plus_ll * B1_ll + v2_plus_ll * B2_ll + v3_plus_ll * B3_ll)
+
+            set_component!(f, k, 0, f2, f3, f4, f5, equations)
+        end
+    elseif nonconservative_term == 2
+        # Compute Lorentz term
+
+        for k in eachcomponent(equations)
+            f2 = charge_ratio_ll[k]
+            f3 = charge_ratio_ll[k]
+            f4 = charge_ratio_ll[k]
+            f5 = (vk1_plus_ll[k] * normal_direction[1] +
+                  vk2_plus_ll[k] * normal_direction[2] +
+                  vk3_plus_ll[k] * normal_direction[3])
+
+            set_component!(f, k, 0, f2, f3, f4, f5, equations)
+        end
+    elseif nonconservative_term == 3
+        # Compute GLM term
+        v_plus_dot_n_ll = (v1_plus_ll * normal_direction[1] +
+                           v2_plus_ll * normal_direction[2] +
+                           v3_plus_ll * normal_direction[3])
+        for k in eachcomponent(equations)
+            f5 = v_plus_dot_n_ll * psi_ll
+            set_component!(f, k, 0, 0, 0, 0, f5, equations)
+        end
+        # Compute GLM term for psi
+        f[end] = v_plus_dot_n_ll
+    elseif nonconservative_term == 4
+        # Multi-ion term (vanishes for NCOMP==1) for B1
+        for k in eachcomponent(equations)
+            f5 = B1_ll
+            set_component!(f, k, 0, 0, 0, 0, f5, equations)
+        end
+    elseif nonconservative_term == 5
+        # Multi-ion term (vanishes for NCOMP==1) for B2
+        for k in eachcomponent(equations)
+            f5 = B2_ll
+            set_component!(f, k, 0, 0, 0, 0, f5, equations)
+        end
+    elseif nonconservative_term == 6
+        # Multi-ion term (vanishes for NCOMP==1) for B3
+        for k in eachcomponent(equations)
+            f5 = B3_ll
+            set_component!(f, k, 0, 0, 0, 0, f5, equations)
+        end
+    end
+
+    return SVector(f)
+end
+
+"""
+    flux_nonconservative_ruedaramirez_etal(u_ll, normal_direction_avg::AbstractVector,
+                                                equations::IdealGlmMhdEquations2D,
+                                                nonconservative_type::NonConservativeSymmetric,
+                                                nonconservative_term::Integer)
+
+Symmetric part of the multi-ion non-conservative terms. Needed for the calculation of
+the non-conservative staggered "fluxes" for subcell limiting. See, e.g.,
+- Rueda-Ramírez, Gassner (2023). A Flux-Differencing Formula for Split-Form Summation By Parts
+  Discretizations of Non-Conservative Systems. https://arxiv.org/pdf/2211.14009.pdf.
+This function is used to compute the subcell fluxes in dg_3d_subcell_limiters.jl.
+"""
+@inline function (noncons_flux::FluxNonConservativeRuedaRamirezEtAl)(u_ll, u_rr,
+                                                                     normal_direction::AbstractVector,
+                                                                     equations::IdealGlmMhdEquations2D,
+                                                                     nonconservative_type::NonConservativeSymmetric,
+                                                                     nonconservative_term::Integer)
+    @unpack charge_to_mass = equations
+    # Unpack left and right states to get the magnetic field
+    B1_ll, B2_ll, B3_ll = magnetic_field(u_ll, equations)
+    B1_rr, B2_rr, B3_rr = magnetic_field(u_rr, equations)
+    psi_ll = divergence_cleaning_field(u_ll, equations)
+    psi_rr = divergence_cleaning_field(u_rr, equations)
+    B_dot_n_ll = B1_ll * normal_direction[1] +
+                 B2_ll * normal_direction[2] +
+                 B3_ll * normal_direction[3]
+    B_dot_n_rr = B1_rr * normal_direction[1] +
+                 B2_rr * normal_direction[2] +
+                 B3_rr * normal_direction[3]
+    B_dot_n_avg = 0.5f0 * (B_dot_n_ll + B_dot_n_rr)
+
+    # Compute important averages
+    B1_avg = 0.5f0 * (B1_ll + B1_rr)
+    B2_avg = 0.5f0 * (B2_ll + B2_rr)
+    B3_avg = 0.5f0 * (B3_ll + B3_rr)
+
+    # Compute charge ratio of u_ll
+    charge_ratio_ll = zero(MVector{ncomponents(equations), eltype(u_ll)})
+    total_electron_charge = zero(eltype(u_ll))
+    for k in eachcomponent(equations)
+        rho_k = u_ll[3 + (k - 1) * 5 + 1] # Extract densities from conserved variable vector
+        charge_ratio_ll[k] = rho_k * charge_to_mass[k]
+        total_electron_charge += charge_ratio_ll[k]
+    end
+    charge_ratio_ll ./= total_electron_charge
+
+    # Compute auxiliary variables
+    v1_plus_ll, v2_plus_ll, v3_plus_ll, vk1_plus_ll, vk2_plus_ll, vk3_plus_ll = charge_averaged_velocities(u_ll,
+                                                                                                           equations)
+    v1_plus_rr, v2_plus_rr, v3_plus_rr, vk1_plus_rr, vk2_plus_rr, vk3_plus_rr = charge_averaged_velocities(u_rr,
+                                                                                                           equations)
+
+    f = zero(MVector{nvariables(equations), eltype(u_ll)})
+
+    if nonconservative_term == 1
+        # Entries of Godunov-Powell term for induction equation (multiply by 2 because the non-conservative flux is 
+        # multiplied by 0.5 whenever it's used in the Trixi code)
+        f[1] = 2 * B_dot_n_avg
+        f[2] = 2 * B_dot_n_avg
+        f[3] = 2 * B_dot_n_avg
+
+        for k in eachcomponent(equations)
+            # Compute Godunov-Powell term
+            f2 = B_dot_n_avg
+            f3 = B_dot_n_avg
+            f4 = B_dot_n_avg
+            f5 = B_dot_n_avg
+            # Add to the flux vector (multiply by 2 because the non-conservative flux is 
+            # multiplied by 0.5 whenever it's used in the Trixi code)
+            set_component!(f, k, 0, 2 * f2, 2 * f3, 2 * f4, 2 * f5,
+                           equations)
+        end
+    elseif nonconservative_term == 2
+        # Compute Lorentz term
+
+        # Important averages
+        mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2
+        mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2
+        mag_norm_avg = 0.5f0 * (mag_norm_ll + mag_norm_rr)
+
+        # Mean electron pressure
+        pe_ll = equations.electron_pressure(u_ll, equations)
+        pe_rr = equations.electron_pressure(u_rr, equations)
+        pe_mean = 0.5f0 * (pe_ll + pe_rr)
+
+        for k in eachcomponent(equations)
+            f2 = ((0.5f0 * mag_norm_avg + pe_mean) * normal_direction[1] -
+                  B_dot_n_avg * B1_avg)
+            f3 = ((0.5f0 * mag_norm_avg + pe_mean) * normal_direction[2] -
+                  B_dot_n_avg * B2_avg)
+            f4 = ((0.5f0 * mag_norm_avg + pe_mean) * normal_direction[3] -
+                  B_dot_n_avg * B3_avg)
+            f5 = pe_mean
+
+            set_component!(f, k, 0, 2 * f2, 2 * f3, 2 * f4, 2 * f5,
+                           equations)
+        end
+    elseif nonconservative_term == 3
+        # Compute GLM term
+        psi_avg = 0.5f0 * (psi_ll + psi_rr)
+        for k in eachcomponent(equations)
+            f5 = psi_avg
+            # (multiply by 2 because the non-conservative flux is 
+            # multiplied by 0.5 whenever it's used in the Trixi code)
+            set_component!(f, k, 0, 0, 0, 0, 2 * f5, equations)
+        end
+        # Compute GLM term for psi (multiply by 2 because the non-conservative flux is 
+        # multiplied by 0.5 whenever it's used in the Trixi code)
+        f[end] = 2 * psi_avg
+    elseif nonconservative_term == 4
+        # Multi-ion term (vanishes for NCOMP==1) for B1
+        for k in eachcomponent(equations)
+            vk1_minus_ll = v1_plus_ll - vk1_plus_ll[k]
+            vk2_minus_ll = v2_plus_ll - vk2_plus_ll[k]
+            vk3_minus_ll = v3_plus_ll - vk3_plus_ll[k]
+            vk1_minus_rr = v1_plus_rr - vk1_plus_rr[k]
+            vk2_minus_rr = v2_plus_rr - vk2_plus_rr[k]
+            vk3_minus_rr = v3_plus_rr - vk3_plus_rr[k]
+            vk1_minus_avg = 0.5f0 * (vk1_minus_ll + vk1_minus_rr)
+            vk2_minus_avg = 0.5f0 * (vk2_minus_ll + vk2_minus_rr)
+            vk3_minus_avg = 0.5f0 * (vk3_minus_ll + vk3_minus_rr)
+
+            f5 = ((vk2_minus_avg * B1_avg - vk1_minus_avg * B2_avg) *
+                  normal_direction[2] +
+                  (vk3_minus_avg * B1_avg - vk1_minus_avg * B3_avg) *
+                  normal_direction[3])
+
+            # Add to the flux vector (multiply by 2 because the non-conservative flux is 
+            # multiplied by 0.5 whenever it's used in the Trixi code)
+            set_component!(f, k, 0, 0, 0, 0, 2 * f5,
+                           equations)
+        end
+    elseif nonconservative_term == 5
+        # Multi-ion term (vanishes for NCOMP==1) for B2
+        for k in eachcomponent(equations)
+            vk1_minus_ll = v1_plus_ll - vk1_plus_ll[k]
+            vk2_minus_ll = v2_plus_ll - vk2_plus_ll[k]
+            vk3_minus_ll = v3_plus_ll - vk3_plus_ll[k]
+            vk1_minus_rr = v1_plus_rr - vk1_plus_rr[k]
+            vk2_minus_rr = v2_plus_rr - vk2_plus_rr[k]
+            vk3_minus_rr = v3_plus_rr - vk3_plus_rr[k]
+            vk1_minus_avg = 0.5f0 * (vk1_minus_ll + vk1_minus_rr)
+            vk2_minus_avg = 0.5f0 * (vk2_minus_ll + vk2_minus_rr)
+            vk3_minus_avg = 0.5f0 * (vk3_minus_ll + vk3_minus_rr)
+
+            f5 = ((vk1_minus_avg * B2_avg - vk2_minus_avg * B1_avg) *
+                  normal_direction[1] +
+                  (vk3_minus_avg * B2_avg - vk2_minus_avg * B3_avg) *
+                  normal_direction[3])
+            # Add to the flux vector (multiply by 2 because the non-conservative flux is 
+            # multiplied by 0.5 whenever it's used in the Trixi code)
+            set_component!(f, k, 0, 0, 0, 0, 2 * f5,
+                           equations)
+        end
+    elseif nonconservative_term == 6
+        # Multi-ion term (vanishes for NCOMP==1) for B3
+        for k in eachcomponent(equations)
+            vk1_minus_ll = v1_plus_ll - vk1_plus_ll[k]
+            vk2_minus_ll = v2_plus_ll - vk2_plus_ll[k]
+            vk3_minus_ll = v3_plus_ll - vk3_plus_ll[k]
+            vk1_minus_rr = v1_plus_rr - vk1_plus_rr[k]
+            vk2_minus_rr = v2_plus_rr - vk2_plus_rr[k]
+            vk3_minus_rr = v3_plus_rr - vk3_plus_rr[k]
+            vk1_minus_avg = 0.5f0 * (vk1_minus_ll + vk1_minus_rr)
+            vk2_minus_avg = 0.5f0 * (vk2_minus_ll + vk2_minus_rr)
+            vk3_minus_avg = 0.5f0 * (vk3_minus_ll + vk3_minus_rr)
+
+            f5 = ((vk1_minus_avg * B3_avg - vk3_minus_avg * B1_avg) *
+                  normal_direction[1] +
+                  (vk2_minus_avg * B3_avg - vk3_minus_avg * B2_avg) *
+                  normal_direction[2])
+            # Add to the flux vector (multiply by 2 because the non-conservative flux is 
+            # multiplied by 0.5 whenever it's used in the Trixi code)
+            set_component!(f, k, 0, 0, 0, 0, 2 * f5,
+                           equations)
+        end
+    end
+
+    return SVector(f)
+end
+
 """
     flux_nonconservative_central(u_ll, u_rr, orientation::Integer,
                                  equations::IdealGlmMhdMultiIonEquations3D)

From 5f53301f68515030e1d3e37ece63339dde1d1538 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20Rueda-Ram=C3=ADrez?= <aruedara@uni-koeln.de>
Date: Fri, 19 Sep 2025 09:39:37 +0200
Subject: [PATCH 10/18] Fixed redefinition of constant error

---
 src/equations/ideal_glm_mhd_multiion.jl    | 13 +++++++++++++
 src/equations/ideal_glm_mhd_multiion_2d.jl |  6 +++---
 src/equations/ideal_glm_mhd_multiion_3d.jl | 10 ----------
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/src/equations/ideal_glm_mhd_multiion.jl b/src/equations/ideal_glm_mhd_multiion.jl
index 0d0229e8750..9fb65541079 100644
--- a/src/equations/ideal_glm_mhd_multiion.jl
+++ b/src/equations/ideal_glm_mhd_multiion.jl
@@ -45,6 +45,19 @@ function default_analysis_integrals(::AbstractIdealGlmMhdMultiIonEquations)
     (entropy_timederivative, Val(:l2_divb), Val(:linf_divb))
 end
 
+# For `VolumeIntegralSubcellLimiting` the nonconservative flux is created as a callable struct to 
+# enable dispatch on the type of the nonconservative term (symmetric / jump).
+struct FluxNonConservativeRuedaRamirezEtAl <:
+       FluxNonConservative{NonConservativeSymmetric()}
+end
+
+# We specify 6 non-conservative terms for FluxNonConservativeRuedaRamirezEtAl. This is the number of
+# non-conservative terms in 3D. In 2D, only 5 terms are needed. TODO: Should we create a different struct
+# for the 2D non-conservative term?
+n_nonconservative_terms(::FluxNonConservativeRuedaRamirezEtAl) = 6
+
+const flux_nonconservative_ruedaramirez_etal = FluxNonConservativeRuedaRamirezEtAl()
+
 """
     source_terms_lorentz(u, x, t, equations::AbstractIdealGlmMhdMultiIonEquations)
 
diff --git a/src/equations/ideal_glm_mhd_multiion_2d.jl b/src/equations/ideal_glm_mhd_multiion_2d.jl
index 38b3e52fa49..870ceb6bae6 100644
--- a/src/equations/ideal_glm_mhd_multiion_2d.jl
+++ b/src/equations/ideal_glm_mhd_multiion_2d.jl
@@ -343,9 +343,9 @@ The term is composed of four individual non-conservative terms:
 3. The "multi-ion" term, which vanishes in the limit of one ion species.
 4. The GLM term, which is needed for Galilean invariance.
 """
-@inline function flux_nonconservative_ruedaramirez_etal(u_ll, u_rr,
-                                                        orientation::Integer,
-                                                        equations::IdealGlmMhdMultiIonEquations2D)
+@inline function (noncons_flux::FluxNonConservativeRuedaRamirezEtAl)(u_ll, u_rr,
+                                                                     orientation::Integer,
+                                                                     equations::IdealGlmMhdMultiIonEquations2D)
     @unpack charge_to_mass = equations
     # Unpack left and right states to get the magnetic field
     B1_ll, B2_ll, B3_ll = magnetic_field(u_ll, equations)
diff --git a/src/equations/ideal_glm_mhd_multiion_3d.jl b/src/equations/ideal_glm_mhd_multiion_3d.jl
index 23d04d71bd1..f4fee47cac3 100644
--- a/src/equations/ideal_glm_mhd_multiion_3d.jl
+++ b/src/equations/ideal_glm_mhd_multiion_3d.jl
@@ -304,16 +304,6 @@ end
     return SVector(f)
 end
 
-# For `VolumeIntegralSubcellLimiting` the nonconservative flux is created as a callable struct to 
-# enable dispatch on the type of the nonconservative term (symmetric / jump).
-struct FluxNonConservativeRuedaRamirezEtAl <:
-       FluxNonConservative{NonConservativeSymmetric()}
-end
-
-n_nonconservative_terms(::FluxNonConservativeRuedaRamirezEtAl) = 6
-
-const flux_nonconservative_ruedaramirez_etal = FluxNonConservativeRuedaRamirezEtAl()
-
 """
     flux_nonconservative_ruedaramirez_etal(u_ll, u_rr,
                                            orientation_or_normal_direction,

From ad963787e1226f34854a2248f81a377293c3cd5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20Rueda-Ram=C3=ADrez?= <aruedara@uni-koeln.de>
Date: Fri, 19 Sep 2025 14:50:56 +0200
Subject: [PATCH 11/18] Fixed some bugs

---
 src/equations/ideal_glm_mhd_multiion_3d.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/equations/ideal_glm_mhd_multiion_3d.jl b/src/equations/ideal_glm_mhd_multiion_3d.jl
index f4fee47cac3..3c2988c89fc 100644
--- a/src/equations/ideal_glm_mhd_multiion_3d.jl
+++ b/src/equations/ideal_glm_mhd_multiion_3d.jl
@@ -630,7 +630,7 @@ end
 
 """
     flux_nonconservative_ruedaramirez_etal(u_ll, normal_direction_ll::AbstractVector,
-                                           equations::IdealGlmMhdEquations2D,
+                                           equations::IdealGlmMhdMultiIonEquations3D,
                                            nonconservative_type::NonConservativeLocal,
                                            nonconservative_term::Integer)
 
@@ -647,7 +647,7 @@ number of operations to obtain the subcell limiting fluxes. However, this decisi
 """
 @inline function (noncons_flux::FluxNonConservativeRuedaRamirezEtAl)(u_ll,
                                                                      normal_direction::AbstractVector,
-                                                                     equations::IdealGlmMhdEquations2D,
+                                                                     equations::IdealGlmMhdMultiIonEquations3D,
                                                                      nonconservative_type::NonConservativeLocal,
                                                                      nonconservative_term::Integer)
     @unpack charge_to_mass = equations
@@ -734,9 +734,9 @@ end
 
 """
     flux_nonconservative_ruedaramirez_etal(u_ll, normal_direction_avg::AbstractVector,
-                                                equations::IdealGlmMhdEquations2D,
-                                                nonconservative_type::NonConservativeSymmetric,
-                                                nonconservative_term::Integer)
+                                           equations::IdealGlmMhdMultiIonEquations3D,
+                                           nonconservative_type::NonConservativeSymmetric,
+                                           nonconservative_term::Integer)
 
 Symmetric part of the multi-ion non-conservative terms. Needed for the calculation of
 the non-conservative staggered "fluxes" for subcell limiting. See, e.g.,
@@ -746,7 +746,7 @@ This function is used to compute the subcell fluxes in dg_3d_subcell_limiters.jl
 """
 @inline function (noncons_flux::FluxNonConservativeRuedaRamirezEtAl)(u_ll, u_rr,
                                                                      normal_direction::AbstractVector,
-                                                                     equations::IdealGlmMhdEquations2D,
+                                                                     equations::IdealGlmMhdMultiIonEquations3D,
                                                                      nonconservative_type::NonConservativeSymmetric,
                                                                      nonconservative_term::Integer)
     @unpack charge_to_mass = equations

From 57daab6947879d6f76a78a2f4a531ab2f904ea59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20Rueda-Ram=C3=ADrez?= <aruedara@uni-koeln.de>
Date: Fri, 19 Sep 2025 18:50:25 +0200
Subject: [PATCH 12/18] Added isvalid function for multi-ion MHD

---
 src/equations/ideal_glm_mhd_multiion.jl | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/equations/ideal_glm_mhd_multiion.jl b/src/equations/ideal_glm_mhd_multiion.jl
index 9fb65541079..37ba7c52a48 100644
--- a/src/equations/ideal_glm_mhd_multiion.jl
+++ b/src/equations/ideal_glm_mhd_multiion.jl
@@ -58,6 +58,18 @@ n_nonconservative_terms(::FluxNonConservativeRuedaRamirezEtAl) = 6
 
 const flux_nonconservative_ruedaramirez_etal = FluxNonConservativeRuedaRamirezEtAl()
 
+# State validation for Newton-bisection method of subcell IDP limiting
+@inline function Base.isvalid(u, equations::AbstractIdealGlmMhdMultiIonEquations)
+    p = pressure(u, equations)
+    for k in eachcomponent(equations)
+        u_k = get_component(k, u, equations)
+        if u_k[1] <= 0 || p[k] <= 0
+            return false
+        end
+    end
+    return true
+end
+
 """
     source_terms_lorentz(u, x, t, equations::AbstractIdealGlmMhdMultiIonEquations)
 

From 79274d9b0ff61206cebdcc887dd26a2c726981dc Mon Sep 17 00:00:00 2001
From: Arpit Babbar <arpitbabbar@gmail.com>
Date: Sat, 20 Sep 2025 22:40:59 +0200
Subject: [PATCH 13/18] Elixir with constant subsonic state and boundary
 treatment (#2574)

* Elixir with constant subsonic state and boundary treatment

* Apply suggestions from code review

* Update examples/p4est_2d_dgsem/elixir_euler_subsonic_constant.jl

* Update examples/p4est_2d_dgsem/elixir_euler_subsonic_constant.jl

* Apply suggestions from code review

* Update examples/p4est_2d_dgsem/elixir_euler_subsonic_constant.jl

* Update examples/p4est_2d_dgsem/elixir_euler_subsonic_constant.jl

---------

Co-authored-by: Daniel Doehring <doehringd2@gmail.com>
---
 .../elixir_euler_subsonic_constant.jl         | 114 ++++++++++++++++++
 test/test_p4est_2d.jl                         |  27 +++++
 test/test_tree_2d_euler.jl                    |   2 +-
 3 files changed, 142 insertions(+), 1 deletion(-)
 create mode 100644 examples/p4est_2d_dgsem/elixir_euler_subsonic_constant.jl

diff --git a/examples/p4est_2d_dgsem/elixir_euler_subsonic_constant.jl b/examples/p4est_2d_dgsem/elixir_euler_subsonic_constant.jl
new file mode 100644
index 00000000000..9f431220198
--- /dev/null
+++ b/examples/p4est_2d_dgsem/elixir_euler_subsonic_constant.jl
@@ -0,0 +1,114 @@
+using OrdinaryDiffEqSSPRK
+using Trixi
+using LinearAlgebra: norm
+
+###############################################################################
+## Semidiscretization of the compressible Euler equations
+
+equations = CompressibleEulerEquations2D(1.4)
+polydeg = 3
+solver = DGSEM(polydeg = polydeg, surface_flux = flux_lax_friedrichs)
+
+@inline function initial_condition_subsonic(x_, t, equations::CompressibleEulerEquations2D)
+    rho, v1, v2, p = (0.5313, 0.0, 0.0, 0.4)
+
+    prim = SVector(rho, v1, v2, p)
+    return prim2cons(prim, equations)
+end
+
+initial_condition = initial_condition_subsonic
+
+# Calculate the boundary flux from the inner state while using the pressure from the outer state
+# when the flow is subsonic (which is always the case in this example).
+
+# If the naive approach of only using the inner state is used, the errors increase with the
+# increase of refinement level, see https://github.com/trixi-framework/Trixi.jl/issues/2530
+# These errors arise from the corner points in this test.
+
+# See the reference below for a discussion on inflow/outflow boundary conditions. The subsonic
+# outflow boundary conditions are discussed in Section 2.3.
+#
+# - Jan-Reneé Carlson (2011)
+#   Inflow/Outflow Boundary Conditions with Application to FUN3D.
+#   [NASA TM 20110022658](https://ntrs.nasa.gov/citations/20110022658)
+@inline function boundary_condition_outflow_general(u_inner,
+                                                    normal_direction::AbstractVector, x, t,
+                                                    surface_flux_function,
+                                                    equations::CompressibleEulerEquations2D)
+
+    # This would be for the general case where we need to check the magnitude of the local Mach number
+    norm_ = norm(normal_direction)
+    # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later
+    normal = normal_direction / norm_
+
+    # Rotate the internal solution state
+    u_local = Trixi.rotate_to_x(u_inner, normal, equations)
+
+    # Compute the primitive variables
+    rho_local, v_normal, v_tangent, p_local = cons2prim(u_local, equations)
+
+    # Compute local Mach number
+    a_local = sqrt(equations.gamma * p_local / rho_local)
+    Mach_local = abs(v_normal / a_local)
+    if Mach_local <= 1.0 # The `if` is not needed in this elixir but kept for generality
+        # In general, `p_local` need not be available from the initial condition
+        p_local = pressure(initial_condition_subsonic(x, t, equations), equations)
+    end
+
+    # Create the `u_surface` solution state where the local pressure is possibly set from an external value
+    prim = SVector(rho_local, v_normal, v_tangent, p_local)
+    u_boundary = prim2cons(prim, equations)
+    u_surface = Trixi.rotate_from_x(u_boundary, normal, equations)
+
+    # Compute the flux using the appropriate mixture of internal / external solution states
+    return flux(u_surface, normal_direction, equations)
+end
+
+boundary_conditions = Dict(:x_neg => boundary_condition_outflow_general,
+                           :x_pos => boundary_condition_outflow_general,
+                           :y_neg => boundary_condition_outflow_general,
+                           :y_pos => boundary_condition_outflow_general)
+
+coordinates_min = (0.0, 0.0)
+coordinates_max = (1.0, 1.0)
+
+trees_per_dimension = (1, 1)
+
+mesh = P4estMesh(trees_per_dimension, polydeg = polydeg,
+                 coordinates_min = coordinates_min, coordinates_max = coordinates_max,
+                 initial_refinement_level = 3,
+                 periodicity = false)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions = boundary_conditions)
+
+###############################################################################
+## ODE solvers, callbacks etc.
+
+tspan = (0.0, 0.25)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 1000
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval = 100)
+
+save_solution = SaveSolutionCallback(interval = 100,
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim)
+
+stepsize_callback = StepsizeCallback(cfl = 0.5)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, alive_callback,
+                        save_solution,
+                        stepsize_callback)
+
+###############################################################################
+## Run the simulation
+sol = solve(ode, SSPRK54();
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
diff --git a/test/test_p4est_2d.jl b/test/test_p4est_2d.jl
index b0852789b0a..9087e45207e 100644
--- a/test/test_p4est_2d.jl
+++ b/test/test_p4est_2d.jl
@@ -187,6 +187,33 @@ end
     end
 end
 
+@trixi_testset "elixir_euler_subsonic_constant.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR,
+                                 "elixir_euler_subsonic_constant.jl"),
+                        l2=[
+                            9.268884363640194e-14,
+                            1.0689589615395477e-13,
+                            1.0408994850535984e-13,
+                            1.7032684855598177e-13
+                        ],
+                        linf=[
+                            1.6986412276764895e-13,
+                            2.2503592098759465e-12,
+                            1.7696906239744284e-12,
+                            3.623767952376511e-13
+                        ],
+                        initial_refinement_level=7,
+                        tspan=(0.0, 0.1))
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
+end
+
 @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_flag.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR,
                                  "elixir_euler_source_terms_nonconforming_unstructured_flag.jl"),
diff --git a/test/test_tree_2d_euler.jl b/test/test_tree_2d_euler.jl
index 8dcd351ffa9..7ffbcfbef4f 100644
--- a/test/test_tree_2d_euler.jl
+++ b/test/test_tree_2d_euler.jl
@@ -1094,7 +1094,7 @@ end
                             8.515583343047957e-14, 2.0472512574087887e-13
                         ],
                         initial_refinement_level=7,
-                        tspan=(0.0, 0.1)) # this test is sensitive to the CFL factor
+                        tspan=(0.0, 0.1))
     # Ensure that we do not have excessive memory allocations
     # (e.g., from type instabilities)
     let

From 891c906d1fa364373db64849ed174497a57646ce Mon Sep 17 00:00:00 2001
From: Benjamin Bolm <74359358+bennibolm@users.noreply.github.com>
Date: Sun, 21 Sep 2025 23:15:12 +0200
Subject: [PATCH 14/18] Refactor dimension-(in)dependent implementation of
 subcell limiting (#2573)

* Refactor dimension-dependent implementation of subcell limiting

* Implement suggestions

* Update src/solvers/dgsem_tree/subcell_limiters_2d.jl

* Update src/solvers/dgsem_tree/subcell_limiters_2d.jl

---------

Co-authored-by: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Co-authored-by: Daniel Doehring <doehringd2@gmail.com>
---
 src/solvers/dgsem_tree/subcell_limiters.jl    | 174 +++++++++++++++
 src/solvers/dgsem_tree/subcell_limiters_2d.jl | 198 ++----------------
 2 files changed, 194 insertions(+), 178 deletions(-)

diff --git a/src/solvers/dgsem_tree/subcell_limiters.jl b/src/solvers/dgsem_tree/subcell_limiters.jl
index 0831287899e..1be3456884e 100644
--- a/src/solvers/dgsem_tree/subcell_limiters.jl
+++ b/src/solvers/dgsem_tree/subcell_limiters.jl
@@ -229,4 +229,178 @@ function get_node_variable(::Val{:limiting_coefficient}, u, mesh, equations, dg,
                            equations_parabolic, cache_parabolic)
     get_node_variable(Val(:limiting_coefficient), u, mesh, equations, dg, cache)
 end
+
+###############################################################################
+# Local minimum and maximum limiting (conservative variables)
+
+@inline function idp_local_twosided!(alpha, limiter, u, t, dt, semi)
+    for variable in limiter.local_twosided_variables_cons
+        idp_local_twosided!(alpha, limiter, u, t, dt, semi, variable)
+    end
+
+    return nothing
+end
+
+##############################################################################
+# Local minimum or maximum limiting (nonlinear variables)
+
+@inline function idp_local_onesided!(alpha, limiter, u, t, dt, semi)
+    for (variable, min_or_max) in limiter.local_onesided_variables_nonlinear
+        idp_local_onesided!(alpha, limiter, u, t, dt, semi, variable, min_or_max)
+    end
+
+    return nothing
+end
+
+###############################################################################
+# Global positivity limiting (conservative and nonlinear variables)
+
+@inline function idp_positivity!(alpha, limiter, u, dt, semi)
+    # Conservative variables
+    for variable in limiter.positivity_variables_cons
+        @trixi_timeit timer() "conservative variables" idp_positivity_conservative!(alpha,
+                                                                                    limiter,
+                                                                                    u,
+                                                                                    dt,
+                                                                                    semi,
+                                                                                    variable)
+    end
+
+    # Nonlinear variables
+    for variable in limiter.positivity_variables_nonlinear
+        @trixi_timeit timer() "nonlinear variables" idp_positivity_nonlinear!(alpha,
+                                                                              limiter,
+                                                                              u,
+                                                                              dt,
+                                                                              semi,
+                                                                              variable)
+    end
+
+    return nothing
+end
+
+###############################################################################
+# Newton-bisection method
+
+@inline function newton_loop!(alpha, bound, u, indices, variable, min_or_max,
+                              initial_check, final_check, equations, dt, limiter,
+                              antidiffusive_flux)
+    newton_reltol, newton_abstol = limiter.newton_tolerances
+
+    beta = 1 - alpha[indices...]
+
+    beta_L = 0 # alpha = 1
+    beta_R = beta # No higher beta (lower alpha) than the current one
+
+    u_curr = u + beta * dt * antidiffusive_flux
+
+    # If state is valid, perform initial check and return if correction is not needed
+    if isvalid(u_curr, equations)
+        goal = goal_function_newton_idp(variable, bound, u_curr, equations)
+
+        initial_check(min_or_max, bound, goal, newton_abstol) && return nothing
+    end
+
+    # Newton iterations
+    for iter in 1:(limiter.max_iterations_newton)
+        beta_old = beta
+
+        # If the state is valid, evaluate d(goal)/d(beta)
+        if isvalid(u_curr, equations)
+            dgoal_dbeta = dgoal_function_newton_idp(variable, u_curr, dt,
+                                                    antidiffusive_flux, equations)
+        else # Otherwise, perform a bisection step
+            dgoal_dbeta = 0
+        end
+
+        if dgoal_dbeta != 0
+            # Update beta with Newton's method
+            beta = beta - goal / dgoal_dbeta
+        end
+
+        # Check bounds
+        if (beta < beta_L) || (beta > beta_R) || (dgoal_dbeta == 0) || isnan(beta)
+            # Out of bounds, do a bisection step
+            beta = 0.5f0 * (beta_L + beta_R)
+            # Get new u
+            u_curr = u + beta * dt * antidiffusive_flux
+
+            # If the state is invalid, finish bisection step without checking tolerance and iterate further
+            if !isvalid(u_curr, equations)
+                beta_R = beta
+                continue
+            end
+
+            # Check new beta for condition and update bounds
+            goal = goal_function_newton_idp(variable, bound, u_curr, equations)
+            if initial_check(min_or_max, bound, goal, newton_abstol)
+                # New beta fulfills condition
+                beta_L = beta
+            else
+                # New beta does not fulfill condition
+                beta_R = beta
+            end
+        else
+            # Get new u
+            u_curr = u + beta * dt * antidiffusive_flux
+
+            # If the state is invalid, redefine right bound without checking tolerance and iterate further
+            if !isvalid(u_curr, equations)
+                beta_R = beta
+                continue
+            end
+
+            # Evaluate goal function
+            goal = goal_function_newton_idp(variable, bound, u_curr, equations)
+        end
+
+        # Check relative tolerance
+        if abs(beta_old - beta) <= newton_reltol
+            break
+        end
+
+        # Check absolute tolerance
+        if final_check(bound, goal, newton_abstol)
+            break
+        end
+    end
+
+    alpha[indices...] = 1 - beta # new alpha
+
+    return nothing
+end
+
+### Auxiliary routines for Newton's bisection method ###
+# Initial checks
+@inline function initial_check_local_onesided_newton_idp(::typeof(min), bound,
+                                                         goal, newton_abstol)
+    return goal <= max(newton_abstol, abs(bound) * newton_abstol)
+end
+
+@inline function initial_check_local_onesided_newton_idp(::typeof(max), bound,
+                                                         goal, newton_abstol)
+    return goal >= -max(newton_abstol, abs(bound) * newton_abstol)
+end
+
+@inline initial_check_nonnegative_newton_idp(min_or_max, bound, goal, newton_abstol) = goal <=
+                                                                                       0
+
+# Goal and d(Goal)/d(u) function
+@inline goal_function_newton_idp(variable, bound, u, equations) = bound -
+                                                                  variable(u, equations)
+@inline function dgoal_function_newton_idp(variable, u, dt, antidiffusive_flux,
+                                           equations)
+    return -dot(gradient_conservative(variable, u, equations), dt * antidiffusive_flux)
+end
+
+# Final checks
+# final check for one-sided local limiting
+@inline function final_check_local_onesided_newton_idp(bound, goal, newton_abstol)
+    return abs(goal) < max(newton_abstol, abs(bound) * newton_abstol)
+end
+
+# final check for nonnegativity limiting
+@inline function final_check_nonnegative_newton_idp(bound, goal, newton_abstol)
+    return (goal <= eps()) && (goal > -max(newton_abstol, abs(bound) * newton_abstol))
+end
 end # @muladd
diff --git a/src/solvers/dgsem_tree/subcell_limiters_2d.jl b/src/solvers/dgsem_tree/subcell_limiters_2d.jl
index 539405f0a1e..dc9cde24240 100644
--- a/src/solvers/dgsem_tree/subcell_limiters_2d.jl
+++ b/src/solvers/dgsem_tree/subcell_limiters_2d.jl
@@ -72,7 +72,7 @@ end
 # Calculation of local bounds using low-order FV solution
 
 @inline function calc_bounds_twosided!(var_min, var_max, variable,
-                                       u, t, semi, equations)
+                                       u::AbstractArray{<:Any, 4}, t, semi, equations)
     mesh, _, dg, cache = mesh_equations_solver_cache(semi)
     # Calc bounds inside elements
     @threaded for element in eachelement(dg, cache)
@@ -176,7 +176,8 @@ end
     return nothing
 end
 
-@inline function calc_bounds_onesided!(var_minmax, min_or_max, variable, u, t, semi)
+@inline function calc_bounds_onesided!(var_minmax, min_or_max, variable,
+                                       u::AbstractArray{<:Any, 4}, t, semi)
     mesh, equations, dg, cache = mesh_equations_solver_cache(semi)
     # Calc bounds inside elements
     @threaded for element in eachelement(dg, cache)
@@ -285,17 +286,10 @@ end
 end
 
 ###############################################################################
-# Local two-sided limiting of conservative variables
+# Local minimum and maximum limiting of conservative variables
 
-@inline function idp_local_twosided!(alpha, limiter, u, t, dt, semi)
-    for variable in limiter.local_twosided_variables_cons
-        idp_local_twosided!(alpha, limiter, u, t, dt, semi, variable)
-    end
-
-    return nothing
-end
-
-@inline function idp_local_twosided!(alpha, limiter, u, t, dt, semi, variable)
+@inline function idp_local_twosided!(alpha, limiter, u::AbstractArray{<:Any, 4}, t, dt,
+                                     semi, variable)
     mesh, equations, dg, cache = mesh_equations_solver_cache(semi)
     (; antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R) = cache.antidiffusive_fluxes
     (; inverse_weights) = dg.basis
@@ -355,18 +349,10 @@ end
 end
 
 ##############################################################################
-# Local one-sided limiting of nonlinear variables
-
-@inline function idp_local_onesided!(alpha, limiter, u, t, dt, semi)
-    for (variable, min_or_max) in limiter.local_onesided_variables_nonlinear
-        idp_local_onesided!(alpha, limiter, u, t, dt, semi, variable, min_or_max)
-    end
-
-    return nothing
-end
+# Local minimum or maximum limiting of nonlinear variables
 
-@inline function idp_local_onesided!(alpha, limiter, u, t, dt, semi,
-                                     variable, min_or_max)
+@inline function idp_local_onesided!(alpha, limiter, u::AbstractArray{<:Real, 4}, t, dt,
+                                     semi, variable, min_or_max)
     mesh, equations, dg, cache = mesh_equations_solver_cache(semi)
     (; variable_bounds) = limiter.cache.subcell_limiter_coefficients
     var_minmax = variable_bounds[Symbol(string(variable), "_", string(min_or_max))]
@@ -389,36 +375,12 @@ end
     return nothing
 end
 
-###############################################################################
-# Global positivity limiting
-
-@inline function idp_positivity!(alpha, limiter, u, dt, semi)
-    # Conservative variables
-    for variable in limiter.positivity_variables_cons
-        @trixi_timeit timer() "conservative variables" idp_positivity_conservative!(alpha,
-                                                                                    limiter,
-                                                                                    u,
-                                                                                    dt,
-                                                                                    semi,
-                                                                                    variable)
-    end
-
-    # Nonlinear variables
-    for variable in limiter.positivity_variables_nonlinear
-        @trixi_timeit timer() "nonlinear variables" idp_positivity_nonlinear!(alpha,
-                                                                              limiter,
-                                                                              u, dt,
-                                                                              semi,
-                                                                              variable)
-    end
-
-    return nothing
-end
-
 ###############################################################################
 # Global positivity limiting of conservative variables
 
-@inline function idp_positivity_conservative!(alpha, limiter, u, dt, semi, variable)
+@inline function idp_positivity_conservative!(alpha, limiter,
+                                              u::AbstractArray{<:Real, 4},
+                                              dt, semi, variable)
     mesh, _, dg, cache = mesh_equations_solver_cache(semi)
     (; antidiffusive_flux1_L, antidiffusive_flux2_L, antidiffusive_flux1_R, antidiffusive_flux2_R) = cache.antidiffusive_fluxes
     (; inverse_weights) = dg.basis
@@ -483,7 +445,9 @@ end
 ###############################################################################
 # Global positivity limiting of nonlinear variables
 
-@inline function idp_positivity_nonlinear!(alpha, limiter, u, dt, semi, variable)
+@inline function idp_positivity_nonlinear!(alpha, limiter,
+                                           u::AbstractArray{<:Real, 4},
+                                           dt, semi, variable)
     mesh, equations, dg, cache = mesh_equations_solver_cache(semi)
     (; positivity_correction_factor) = limiter
 
@@ -517,6 +481,7 @@ end
 ###############################################################################
 # Newton-bisection method
 
+# 2D version
 @inline function newton_loops_alpha!(alpha, bound, u, i, j, element, variable,
                                      min_or_max, initial_check, final_check,
                                      inverse_jacobian, dt, equations, dg, cache,
@@ -530,7 +495,7 @@ end
     antidiffusive_flux = gamma_constant_newton * inverse_jacobian * inverse_weights[i] *
                          get_node_vars(antidiffusive_flux1_R, equations, dg, i, j,
                                        element)
-    newton_loop!(alpha, bound, u, i, j, element, variable, min_or_max, initial_check,
+    newton_loop!(alpha, bound, u, (i, j, element), variable, min_or_max, initial_check,
                  final_check, equations, dt, limiter, antidiffusive_flux)
 
     # positive xi direction
@@ -538,14 +503,14 @@ end
                          inverse_weights[i] *
                          get_node_vars(antidiffusive_flux1_L, equations, dg, i + 1, j,
                                        element)
-    newton_loop!(alpha, bound, u, i, j, element, variable, min_or_max, initial_check,
+    newton_loop!(alpha, bound, u, (i, j, element), variable, min_or_max, initial_check,
                  final_check, equations, dt, limiter, antidiffusive_flux)
 
     # negative eta direction
     antidiffusive_flux = gamma_constant_newton * inverse_jacobian * inverse_weights[j] *
                          get_node_vars(antidiffusive_flux2_R, equations, dg, i, j,
                                        element)
-    newton_loop!(alpha, bound, u, i, j, element, variable, min_or_max, initial_check,
+    newton_loop!(alpha, bound, u, (i, j, element), variable, min_or_max, initial_check,
                  final_check, equations, dt, limiter, antidiffusive_flux)
 
     # positive eta direction
@@ -553,132 +518,9 @@ end
                          inverse_weights[j] *
                          get_node_vars(antidiffusive_flux2_L, equations, dg, i, j + 1,
                                        element)
-    newton_loop!(alpha, bound, u, i, j, element, variable, min_or_max, initial_check,
+    newton_loop!(alpha, bound, u, (i, j, element), variable, min_or_max, initial_check,
                  final_check, equations, dt, limiter, antidiffusive_flux)
 
     return nothing
 end
-
-@inline function newton_loop!(alpha, bound, u, i, j, element, variable, min_or_max,
-                              initial_check, final_check, equations, dt, limiter,
-                              antidiffusive_flux)
-    newton_reltol, newton_abstol = limiter.newton_tolerances
-
-    beta = 1 - alpha[i, j, element]
-
-    beta_L = 0 # alpha = 1
-    beta_R = beta # No higher beta (lower alpha) than the current one
-
-    u_curr = u + beta * dt * antidiffusive_flux
-
-    # If state is valid, perform initial check and return if correction is not needed
-    if isvalid(u_curr, equations)
-        goal = goal_function_newton_idp(variable, bound, u_curr, equations)
-
-        initial_check(min_or_max, bound, goal, newton_abstol) && return nothing
-    end
-
-    # Newton iterations
-    for iter in 1:(limiter.max_iterations_newton)
-        beta_old = beta
-
-        # If the state is valid, evaluate d(goal)/d(beta)
-        if isvalid(u_curr, equations)
-            dgoal_dbeta = dgoal_function_newton_idp(variable, u_curr, dt,
-                                                    antidiffusive_flux, equations)
-        else # Otherwise, perform a bisection step
-            dgoal_dbeta = 0
-        end
-
-        if dgoal_dbeta != 0
-            # Update beta with Newton's method
-            beta = beta - goal / dgoal_dbeta
-        end
-
-        # Check bounds
-        if (beta < beta_L) || (beta > beta_R) || (dgoal_dbeta == 0) || isnan(beta)
-            # Out of bounds, do a bisection step
-            beta = 0.5f0 * (beta_L + beta_R)
-            # Get new u
-            u_curr = u + beta * dt * antidiffusive_flux
-
-            # If the state is invalid, finish bisection step without checking tolerance and iterate further
-            if !isvalid(u_curr, equations)
-                beta_R = beta
-                continue
-            end
-
-            # Check new beta for condition and update bounds
-            goal = goal_function_newton_idp(variable, bound, u_curr, equations)
-            if initial_check(min_or_max, bound, goal, newton_abstol)
-                # New beta fulfills condition
-                beta_L = beta
-            else
-                # New beta does not fulfill condition
-                beta_R = beta
-            end
-        else
-            # Get new u
-            u_curr = u + beta * dt * antidiffusive_flux
-
-            # If the state is invalid, redefine right bound without checking tolerance and iterate further
-            if !isvalid(u_curr, equations)
-                beta_R = beta
-                continue
-            end
-
-            # Evaluate goal function
-            goal = goal_function_newton_idp(variable, bound, u_curr, equations)
-        end
-
-        # Check relative tolerance
-        if abs(beta_old - beta) <= newton_reltol
-            break
-        end
-
-        # Check absolute tolerance
-        if final_check(bound, goal, newton_abstol)
-            break
-        end
-    end
-
-    new_alpha = 1 - beta
-    alpha[i, j, element] = new_alpha
-
-    return nothing
-end
-
-### Auxiliary routines for Newton's bisection method ###
-# Initial checks
-@inline function initial_check_local_onesided_newton_idp(::typeof(min), bound,
-                                                         goal, newton_abstol)
-    goal <= max(newton_abstol, abs(bound) * newton_abstol)
-end
-
-@inline function initial_check_local_onesided_newton_idp(::typeof(max), bound,
-                                                         goal, newton_abstol)
-    goal >= -max(newton_abstol, abs(bound) * newton_abstol)
-end
-
-@inline initial_check_nonnegative_newton_idp(min_or_max, bound, goal, newton_abstol) = goal <=
-                                                                                       0
-
-# Goal and d(Goal)d(u) function
-@inline goal_function_newton_idp(variable, bound, u, equations) = bound -
-                                                                  variable(u, equations)
-@inline function dgoal_function_newton_idp(variable, u, dt, antidiffusive_flux,
-                                           equations)
-    -dot(gradient_conservative(variable, u, equations), dt * antidiffusive_flux)
-end
-
-# Final checks
-# final check for one-sided local limiting
-@inline function final_check_local_onesided_newton_idp(bound, goal, newton_abstol)
-    abs(goal) < max(newton_abstol, abs(bound) * newton_abstol)
-end
-
-# final check for nonnegativity limiting
-@inline function final_check_nonnegative_newton_idp(bound, goal, newton_abstol)
-    (goal <= eps()) && (goal > -max(newton_abstol, abs(bound) * newton_abstol))
-end
 end # @muladd

From 89a84d4d7f3ecbf38db456c635192ba54d407433 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20Rueda-Ram=C3=ADrez?= <aruedara@uni-koeln.de>
Date: Mon, 22 Sep 2025 15:59:48 +0200
Subject: [PATCH 15/18] Unified pressure functions for MHD

---
 src/equations/ideal_glm_mhd_2d.jl |  8 ++++++--
 src/equations/ideal_glm_mhd_3d.jl | 10 ----------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/src/equations/ideal_glm_mhd_2d.jl b/src/equations/ideal_glm_mhd_2d.jl
index 56a6ef97a4c..17bc77d0bad 100644
--- a/src/equations/ideal_glm_mhd_2d.jl
+++ b/src/equations/ideal_glm_mhd_2d.jl
@@ -1645,7 +1645,9 @@ end
     return v
 end
 
-@inline function pressure(u, equations::IdealGlmMhdEquations2D)
+@inline function pressure(u,
+                          equations::Union{IdealGlmMhdEquations2D,
+                                           IdealGlmMhdEquations3D})
     rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
     p = (equations.gamma - 1) * (rho_e - 0.5f0 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
          -
@@ -1657,7 +1659,9 @@ end
 
 # Transformation from conservative variables u to d(p)/d(u)
 @inline function gradient_conservative(::typeof(pressure),
-                                       u, equations::IdealGlmMhdEquations2D)
+                                       u,
+                                       equations::Union{IdealGlmMhdEquations2D,
+                                                        IdealGlmMhdEquations3D})
     rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
 
     v1 = rho_v1 / rho
diff --git a/src/equations/ideal_glm_mhd_3d.jl b/src/equations/ideal_glm_mhd_3d.jl
index 5a60e1952a4..13bb339aae6 100644
--- a/src/equations/ideal_glm_mhd_3d.jl
+++ b/src/equations/ideal_glm_mhd_3d.jl
@@ -1433,16 +1433,6 @@ end
     return v
 end
 
-@inline function pressure(u, equations::IdealGlmMhdEquations3D)
-    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-    p = (equations.gamma - 1) * (rho_e - 0.5f0 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
-         -
-         0.5f0 * (B1^2 + B2^2 + B3^2)
-         -
-         0.5f0 * psi^2)
-    return p
-end
-
 @inline function density_pressure(u, equations::IdealGlmMhdEquations3D)
     rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
     p = (equations.gamma - 1) * (rho_e - 0.5f0 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho

From 63ff9dfe92b61a612743ac09cd5df26f020ed885 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20Rueda-Ram=C3=ADrez?= <aruedara@uni-koeln.de>
Date: Tue, 23 Sep 2025 12:23:39 +0200
Subject: [PATCH 16/18] Added a counter-streaming elixir with subcell limiting

---
 .../elixir_mhdmultiion_counterstreaming.jl    | 302 ++++++++++++++++++
 1 file changed, 302 insertions(+)
 create mode 100644 examples/p4est_3d_dgsem/elixir_mhdmultiion_counterstreaming.jl

diff --git a/examples/p4est_3d_dgsem/elixir_mhdmultiion_counterstreaming.jl b/examples/p4est_3d_dgsem/elixir_mhdmultiion_counterstreaming.jl
new file mode 100644
index 00000000000..aa111421df4
--- /dev/null
+++ b/examples/p4est_3d_dgsem/elixir_mhdmultiion_counterstreaming.jl
@@ -0,0 +1,302 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# This elixir describes the counter-streaming of two plasma beams, as relevant in inertial confinement fusion devices. It is described in:
+# - Ghosh, D., Chapman, T. D., Berger, R. L., Dimits, A., & Banks, J. W. (2019). A 
+#   multispecies, multifluid model for laser–induced counterstreaming plasma simulations. 
+#   Computers & Fluids, 186, 38-57. [DOI: 10.1016/j.compfluid.2019.04.012](https://doi.org/10.1016/j.compfluid.2019.04.012).
+#
+# This is effectively a one-dimensional case.
+#
+
+# Return the electron pressure for a constant electron temperature Te = 1 keV
+function electron_pressure_constantTe(u, equations::IdealGlmMhdMultiIonEquations3D)
+    @unpack charge_to_mass = equations
+    Te = 0.5
+    total_electron_charge = zero(eltype(u))
+    for k in eachcomponent(equations)
+        rho_k = u[3 + (k - 1) * 5 + 1]
+        total_electron_charge += rho_k * charge_to_mass[k]
+    end
+
+    # Boltzmann constant divided by elementary charge
+    kB_e = 2.39629174724586E-03
+
+    return total_electron_charge * kB_e * Te
+end
+
+function electron_temperature_constantTe(u, equations::IdealGlmMhdMultiIonEquations3D)
+    return 0.5
+end
+
+# semidiscretization of the ideal MHD equations
+equations = IdealGlmMhdMultiIonEquations3D(gammas = (5 / 3, 5 / 3),
+                                           charge_to_mass = (417.3114568162800000,
+                                                             417.3114568162800000),
+                                           gas_constants = (1.0, 1.0),
+                                           molar_masses = (1.0, 1.0),
+                                           ion_ion_collision_constants = [0.0 0.5*440.1240021904530000;
+                                                                          0.5*440.1240021904530000 0.0], #660.1860032856790000, #0.2855567736309880 ) #0.20396912402213403) #0.6119073720664020) #  this is with ln(Lambda)=15
+                                           ion_electron_collision_constants = (3.4993604479301E-02,
+                                                                               3.4993604479301E-02),
+                                           electron_pressure = electron_pressure_constantTe,
+                                           electron_temperature = electron_temperature_constantTe)
+
+"""
+    initial_condition_thermeq1(x, t, equations::IdealGlmMhdEquations2D)
+"""
+function initial_condition_interpenetration(x, t, equations::IdealGlmMhdMultiIonEquations3D)
+    # setup taken from Derigs et al. DMV article (2018)
+    # domain must be [0, 1] x [0, 1], γ_1 = γ_2 = 5/3
+    dx = 0.005
+
+    v1 = v2 = v3 = 0.0
+    B1 = B2 = B3 = 0.0
+
+    rho1 = 0.5 * smoothed_slab(x[1], dx, -1.0, 0.2)
+    rho2 = 0.5 * smoothed_slab(x[1], dx, 0.8, 2.0)
+
+    rho_vac = 1e-14
+    rho1 += rho_vac
+    rho2 += rho_vac
+
+    p1 = rho1
+    p2 = rho2
+
+    return prim2cons(SVector(B1, B2, B3, rho1, v1, v2, v3, p1, rho2, v1, v2, v3, p2, 0.0),
+                     equations)
+end
+function smoothed_slab(x, dx, xmin, xmax)
+    exp(x / dx) *
+    (1.0 / (exp(xmin / dx) + exp(x / dx)) - 1.0 / (exp(xmax / dx) + exp(x / dx)))
+end
+
+"""
+Free-slip reflective wall for species 1 only  
+"""
+function boundary_condition_lleft(u_inner, normal_direction::AbstractVector,
+                                  x, t,
+                                  surface_flux_function,
+                                  equations::IdealGlmMhdMultiIonEquations3D)
+    u_outer = SVector(u_inner[1], u_inner[2], u_inner[3], u_inner[4], -u_inner[5],
+                      u_inner[6], u_inner[7], u_inner[8], u_inner[9], u_inner[10],
+                      u_inner[11], u_inner[12], u_inner[13], u_inner[14])
+
+    flux_conservative, flux_noncons = surface_flux_function
+
+    return flux_conservative(u_inner, u_outer, normal_direction, equations),
+           flux_noncons(u_inner, u_outer, normal_direction, equations)
+end
+
+function Trixi.get_boundary_outer_state(u_inner, t,
+                                        boundary_condition::typeof(boundary_condition_lleft),
+                                        normal_direction::AbstractVector,
+                                        mesh::P4estMesh, equations, dg, cache,
+                                        indices...)
+    return SVector(u_inner[1], u_inner[2], u_inner[3], u_inner[4], -u_inner[5], u_inner[6],
+                   u_inner[7], u_inner[8], u_inner[9], u_inner[10], u_inner[11],
+                   u_inner[12], u_inner[13], u_inner[14])
+end
+
+"""
+Free-slip reflective wall for species 2 only  
+"""
+function boundary_condition_rright(u_inner, normal_direction::AbstractVector,
+                                   x, t,
+                                   surface_flux_function,
+                                   equations::IdealGlmMhdMultiIonEquations3D)
+    u_outer = SVector(u_inner[1], u_inner[2], u_inner[3], u_inner[4], u_inner[5],
+                      u_inner[6], u_inner[7], u_inner[8], u_inner[9], -u_inner[10],
+                      u_inner[11], u_inner[12], u_inner[13], u_inner[14])
+
+    # return flux
+    flux_conservative, flux_noncons = surface_flux_function
+
+    return flux_conservative(u_inner, u_outer, normal_direction, equations),
+           flux_noncons(u_inner, u_outer, normal_direction, equations)
+end
+
+function Trixi.get_boundary_outer_state(u_inner, t,
+                                        boundary_condition::typeof(boundary_condition_rright),
+                                        normal_direction::AbstractVector,
+                                        mesh::P4estMesh, equations, dg, cache,
+                                        indices...)
+    return SVector(u_inner[1], u_inner[2], u_inner[3], u_inner[4], u_inner[5], u_inner[6],
+                   u_inner[7], u_inner[8], u_inner[9], -u_inner[10], u_inner[11],
+                   u_inner[12], u_inner[13], u_inner[14])
+end
+
+boundary_conditions = Dict(:x_neg => boundary_condition_lleft,
+                           :x_pos => boundary_condition_rright)
+
+function temperature1(cons, equations::IdealGlmMhdMultiIonEquations3D)
+    prim = cons2prim(cons, equations)
+    rho, _, _, _, p = Trixi.get_component(1, prim, equations)
+
+    return p / rho / equations.gas_constants[1]
+end
+function temperature2(cons, equations::IdealGlmMhdMultiIonEquations3D)
+    prim = cons2prim(cons, equations)
+    rho, _, _, _, p = Trixi.get_component(2, prim, equations)
+
+    return p / rho / equations.gas_constants[2]
+end
+@inline function vel11(cons, equations::IdealGlmMhdMultiIonEquations3D)
+    prim = cons2prim(cons, equations)
+    _, v1, _, _, _ = Trixi.get_component(1, prim, equations)
+
+    return v1
+end
+@inline function vel21(cons, equations::IdealGlmMhdMultiIonEquations3D)
+    prim = cons2prim(cons, equations)
+    _, v1, _, _, _ = Trixi.get_component(2, prim, equations)
+
+    return v1
+end
+
+@inline function pressure1(u, equations::IdealGlmMhdMultiIonEquations3D)
+    pres = pressure(u, equations)
+    return pres[1]
+end
+
+@inline function pressure2(u, equations::IdealGlmMhdMultiIonEquations3D)
+    pres = pressure(u, equations)
+    return pres[2]
+end
+
+@inline function Trixi.gradient_conservative(::typeof(pressure1),
+                                             u, equations::IdealGlmMhdMultiIonEquations3D)
+    return gradient_conservative_pressure(1, u, equations)
+end
+
+@inline function Trixi.gradient_conservative(::typeof(pressure2),
+                                             u, equations::IdealGlmMhdMultiIonEquations3D)
+    return gradient_conservative_pressure(2, u, equations)
+end
+
+# Transformation from conservative variables u to d(p)/d(u)
+function gradient_conservative_pressure(k::Integer, u,
+                                        equations::IdealGlmMhdMultiIonEquations3D)
+    rho, rho_v1, rho_v2, rho_v3, rho_e = Trixi.get_component(k, u, equations)
+    B1, B2, B3 = magnetic_field(u, equations)
+    psi = divergence_cleaning_field(u, equations)
+
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    v_square = v1^2 + v2^2 + v3^2
+
+    dp_du = zero(Trixi.MVector{nvariables(equations), eltype(u)})
+
+    Trixi.set_component!(dp_du, k, 0.5f0 * v_square, -v1, -v2, -v3, 1, equations)
+    dp_du[1] = -B1
+    dp_du[2] = -B2
+    dp_du[3] = -B3
+    dp_du[end] = -psi
+
+    for i in Trixi.eachvariable(equations)
+        dp_du[i] *= (equations.gammas[k] - 1.0)
+    end
+
+    return SVector(dp_du)
+end
+
+initial_condition = initial_condition_interpenetration
+
+volume_flux = (flux_ruedaramirez_etal, flux_nonconservative_ruedaramirez_etal)
+surface_flux = (flux_lax_friedrichs, flux_nonconservative_central)
+
+basis = LobattoLegendreBasis(3)
+
+limiter_idp = SubcellLimiterIDP(equations, basis;
+                                positivity_variables_cons = ["rho_1", "rho_2"],
+                                positivity_variables_nonlinear = [pressure1, pressure2],
+                                local_twosided_variables_cons = [], #["rho_1", "rho_2"] 
+                                local_onesided_variables_nonlinear = [],
+                                max_iterations_newton = 40, # Default parameters are not sufficient to fulfill bounds properly.
+                                newton_tolerances = (1.0e-14, 1.0e-15))
+
+volume_integral = VolumeIntegralSubcellLimiting(limiter_idp;
+                                                volume_flux_dg = volume_flux,
+                                                volume_flux_fv = surface_flux)
+
+# volume_integral = VolumeIntegralPureLGLFiniteVolume(surface_flux)
+
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+### MESH ###
+
+coordinates_min = (0.0, 0.0, 0.0)
+coordinates_max = (1.0, 1.0, 1.0)
+
+trees_per_dimension = (128, 1, 1)
+mesh = P4estMesh(trees_per_dimension,
+                 polydeg = 1,
+                 coordinates_min = coordinates_min,
+                 coordinates_max = coordinates_max,
+                 initial_refinement_level = 0,
+                 periodicity = (false, true, true))
+
+# io-ion and ion-electron source terms
+# We don't need Lorentz terms because there's no electric field
+function source_terms_counterstreaming(u, x, t, equations::IdealGlmMhdMultiIonEquations3D)
+    Sii = source_terms_collision_ion_ion(u, x, t, equations::IdealGlmMhdMultiIonEquations3D)
+    Sie = source_terms_collision_ion_electron(u, x, t,
+                                              equations::IdealGlmMhdMultiIonEquations3D)
+    return Sii + Sie
+end
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    source_terms = source_terms_counterstreaming,
+                                    boundary_conditions = boundary_conditions)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+tspan = (0.0, 0.4)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 50
+analysis_callback = AnalysisCallback(semi,
+                                     save_analysis = true,
+                                     interval = analysis_interval,
+                                     extra_analysis_integrals = (temperature1, temperature2,
+                                                                 vel11, vel21),
+                                     output_directory = joinpath(@__DIR__, "out"))
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+save_solution = SaveSolutionCallback(dt = 0.01, # interval = 50, # 
+                                     save_initial_solution = true,
+                                     save_final_solution = true,
+                                     solution_variables = cons2prim,
+                                     output_directory = joinpath(@__DIR__, "out")
+                                     #  extra_node_variables = (:limiting_coefficient,)
+                                     )
+
+stepsize_callback = StepsizeCallback(cfl = 0.5)
+
+save_restart = SaveRestartCallback(interval = 100,
+                                   save_final_restart = true,
+                                   output_directory = joinpath(@__DIR__, "out"))
+glm_speed_callback = GlmSpeedCallback(glm_scale = 1.0, cfl = cfl)
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, alive_callback,
+                        save_solution,
+                        save_restart,
+                        stepsize_callback,
+                        glm_speed_callback)
+
+###############################################################################
+
+# stage_callbacks = ()
+stage_callbacks = (SubcellLimiterIDPCorrection(), BoundsCheckCallback())
+
+sol = Trixi.solve(ode, Trixi.SimpleSSPRK33(stage_callbacks = stage_callbacks);
+                  dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep = false, callback = callbacks);
+
+#= sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false); #SSPRK43(stage_limiter!); # stage_limiter!, 
+            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep=false, callback=callbacks); =#

From d83e7bdcedaafd2a8399668e040d4a0905b4fe8d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20Rueda-Ram=C3=ADrez?= <aruedara@uni-koeln.de>
Date: Tue, 23 Sep 2025 12:29:03 +0200
Subject: [PATCH 17/18] Added copies of 2D pressure functions for 3D MHD

---
 src/equations/ideal_glm_mhd_2d.jl |  7 ++-----
 src/equations/ideal_glm_mhd_3d.jl | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/equations/ideal_glm_mhd_2d.jl b/src/equations/ideal_glm_mhd_2d.jl
index 17bc77d0bad..35dd1023b70 100644
--- a/src/equations/ideal_glm_mhd_2d.jl
+++ b/src/equations/ideal_glm_mhd_2d.jl
@@ -1645,9 +1645,7 @@ end
     return v
 end
 
-@inline function pressure(u,
-                          equations::Union{IdealGlmMhdEquations2D,
-                                           IdealGlmMhdEquations3D})
+@inline function pressure(u, equations::IdealGlmMhdEquations2D)
     rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
     p = (equations.gamma - 1) * (rho_e - 0.5f0 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
          -
@@ -1660,8 +1658,7 @@ end
 # Transformation from conservative variables u to d(p)/d(u)
 @inline function gradient_conservative(::typeof(pressure),
                                        u,
-                                       equations::Union{IdealGlmMhdEquations2D,
-                                                        IdealGlmMhdEquations3D})
+                                       equations::IdealGlmMhdEquations2D)
     rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
 
     v1 = rho_v1 / rho
diff --git a/src/equations/ideal_glm_mhd_3d.jl b/src/equations/ideal_glm_mhd_3d.jl
index 13bb339aae6..ce6a9d42423 100644
--- a/src/equations/ideal_glm_mhd_3d.jl
+++ b/src/equations/ideal_glm_mhd_3d.jl
@@ -1433,6 +1433,31 @@ end
     return v
 end
 
+@inline function pressure(u, equations::IdealGlmMhdEquations3D)
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    p = (equations.gamma - 1) * (rho_e - 0.5f0 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
+         -
+         0.5f0 * (B1^2 + B2^2 + B3^2)
+         -
+         0.5f0 * psi^2)
+    return p
+end
+
+# Transformation from conservative variables u to d(p)/d(u)
+@inline function gradient_conservative(::typeof(pressure),
+                                       u,
+                                       equations::IdealGlmMhdEquations3D)
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    v_square = v1^2 + v2^2 + v3^2
+
+    return (equations.gamma - 1) *
+           SVector(0.5f0 * v_square, -v1, -v2, -v3, 1, -B1, -B2, -B3, -psi)
+end
+
 @inline function density_pressure(u, equations::IdealGlmMhdEquations3D)
     rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
     p = (equations.gamma - 1) * (rho_e - 0.5f0 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho

From 06ee72c59a62454608d601558935c56c6bc1ee52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9s=20Rueda-Ram=C3=ADrez?= <aruedara@uni-koeln.de>
Date: Wed, 24 Sep 2025 09:36:05 +0200
Subject: [PATCH 18/18] Improved allocations of elixir by using tuple instead
 of vector

---
 .../p4est_3d_dgsem/elixir_mhdmultiion_counterstreaming.jl   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/p4est_3d_dgsem/elixir_mhdmultiion_counterstreaming.jl b/examples/p4est_3d_dgsem/elixir_mhdmultiion_counterstreaming.jl
index aa111421df4..caa1dae212e 100644
--- a/examples/p4est_3d_dgsem/elixir_mhdmultiion_counterstreaming.jl
+++ b/examples/p4est_3d_dgsem/elixir_mhdmultiion_counterstreaming.jl
@@ -211,7 +211,7 @@ basis = LobattoLegendreBasis(3)
 
 limiter_idp = SubcellLimiterIDP(equations, basis;
                                 positivity_variables_cons = ["rho_1", "rho_2"],
-                                positivity_variables_nonlinear = [pressure1, pressure2],
+                                positivity_variables_nonlinear = (pressure1, pressure2),
                                 local_twosided_variables_cons = [], #["rho_1", "rho_2"] 
                                 local_onesided_variables_nonlinear = [],
                                 max_iterations_newton = 40, # Default parameters are not sufficient to fulfill bounds properly.
@@ -275,11 +275,13 @@ save_solution = SaveSolutionCallback(dt = 0.01, # interval = 50, #
                                      #  extra_node_variables = (:limiting_coefficient,)
                                      )
 
-stepsize_callback = StepsizeCallback(cfl = 0.5)
+cfl = 0.5
+stepsize_callback = StepsizeCallback(cfl = cfl)
 
 save_restart = SaveRestartCallback(interval = 100,
                                    save_final_restart = true,
                                    output_directory = joinpath(@__DIR__, "out"))
+
 glm_speed_callback = GlmSpeedCallback(glm_scale = 1.0, cfl = cfl)
 callbacks = CallbackSet(summary_callback,
                         analysis_callback, alive_callback,