diff --git a/src/Trixi.jl b/src/Trixi.jl index 75d2d739f59..c7216a78969 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -18,7 +18,7 @@ module Trixi using Preferences: @load_preference, set_preferences! const _PREFERENCE_SQRT = @load_preference("sqrt", "sqrt_Trixi_NaN") const _PREFERENCE_LOG = @load_preference("log", "log_Trixi_NaN") -const _PREFERENCE_POLYESTER = @load_preference("polyester", true) +const _PREFERENCE_THREADING = @load_preference("backend", :polyester) const _PREFERENCE_LOOPVECTORIZATION = @load_preference("loop_vectorization", true) # Include other packages that are used in Trixi.jl diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index d3b94f6e611..ba437f67db9 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -206,13 +206,13 @@ and [https://discourse.julialang.org/t/threads-threads-with-one-thread-how-to-re macro threaded(expr) # !!! danger "Heisenbug" # Look at the comments for `wrap_array` when considering to change this macro. - expr = if _PREFERENCE_POLYESTER + expr = @static if _PREFERENCE_THREADING === :polyester # Currently using `@batch` from Polyester.jl is more efficient, # bypasses the Julia task scheduler and provides parallelization with less overhead. quote $Trixi.@batch $(expr) end - else + elseif _PREFERENCE_THREADING === :static # The following code is a simple version using only `Threads.@threads` from the # standard library with an additional check whether only a single thread is used # to reduce some overhead (and allocations) for serial execution. @@ -225,6 +225,20 @@ macro threaded(expr) end end end + elseif _PREFERENCE_THREADING === :dynamic + quote + let + if $Threads.nthreads() == 1 + $(expr) + else + $Threads.@threads :dynamic $(expr) + end + end + end + elseif _PREFERENCE_THREADING === :serial + quote + $(expr) + end end # Use `esc(quote ... end)` for nested macro calls as suggested in # https://github.com/JuliaLang/julia/issues/23221 diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl index 241c3ab213a..e4a2d13e441 100644 --- a/src/auxiliary/math.jl +++ b/src/auxiliary/math.jl @@ -8,18 +8,26 @@ const TRIXI_UUID = UUID("a7f1ee26-1774-49b1-8366-f1abc58fbfcb") """ - Trixi.set_polyester!(toggle::Bool; force = true) - -Toggle the usage of [Polyester.jl](https://github.com/JuliaSIMD/Polyester.jl) for multithreading. -By default, Polyester.jl is enabled, but it can -be useful for performance comparisons to switch to the Julia core backend. - -This does not fully disable Polyester.jl, -but only its use as part of Trixi.jl's [`@threaded`](@ref) macro. + Trixi.set_threading_backend!(backend::Symbol; force = true) + +Toggle and/or switch backend behavior used in multithreaded loops inside Trixi.jl. +The selected backend affects the behavior of Trixi.jl's [`@threaded`](@ref) macro, which is used +throughout the codebase for parallel loops. By default, Polyester.jl is enabled for +optimal performance, but switching backends can be useful for comparisons or debugging. + +# Available backends +- `:polyester`: Uses the default [Polyester.jl](https://github.com/JuliaSIMD/Polyester.jl) +- `:static`: Uses Julia's built-in static thread scheduling via `Threads.@threads :static` +- `:dynamic`: Uses Julia's built-in dynamic thread scheduling via `Threads.@threads :dynamic` +- `:serial`: Disables threading, executing loops serially """ -function set_polyester!(toggle::Bool; force = true) - set_preferences!(TRIXI_UUID, "polyester" => toggle, force = force) - @info "Please restart Julia and reload Trixi.jl for the `polyester` change to take effect" +function set_threading_backend!(backend::Symbol = :polyester; force = true) + valid_backends = (:polyester, :static, :dynamic, :serial) + if !(backend in valid_backends) + throw(ArgumentError("Invalid threading backend: $(backend). Current options are: $(join(valid_backends, ", "))")) + end + set_preferences!(TRIXI_UUID, "backend" => backend, force = force) + @info "Please restart Julia and reload Trixi.jl for the `backend` change to take effect" end """ diff --git a/src/callbacks_step/summary.jl b/src/callbacks_step/summary.jl index 4b3947582ef..f47a050cf2b 100644 --- a/src/callbacks_step/summary.jl +++ b/src/callbacks_step/summary.jl @@ -209,7 +209,7 @@ function initialize_summary_callback(cb::DiscreteCallback, u, t, integrator; # technical details setup = Pair{String, Any}["#threads" => Threads.nthreads()] - if !_PREFERENCE_POLYESTER + if _PREFERENCE_THREADING !== :polyester push!(setup, "Polyester" => "disabled") end if !_PREFERENCE_LOOPVECTORIZATION diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index ad211b3c003..a60eed44154 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -663,7 +663,7 @@ end # since LoopVectorization does not support `ForwardDiff.Dual`s. Hence, we use # optimized `PtrArray`s whenever possible and fall back to plain `Array`s # otherwise. - if _PREFERENCE_POLYESTER && LoopVectorization.check_args(u_ode) + if _PREFERENCE_THREADING === :polyester && LoopVectorization.check_args(u_ode) # This version using `PtrArray`s from StrideArrays.jl is very fast and # does not result in allocations. # @@ -697,7 +697,7 @@ end nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache) end # See comments on the DGSEM version above - if _PREFERENCE_POLYESTER && LoopVectorization.check_args(u_ode) + if _PREFERENCE_THREADING === :polyester && LoopVectorization.check_args(u_ode) # Here, we do not specialize on the number of nodes using `StaticInt` since # - it will not be type stable (SBP operators just store it as a runtime value) # - FD methods tend to use high node counts