From ae87cfd438d029eb0a93126df2f021cc550f907d Mon Sep 17 00:00:00 2001 From: afilogo Date: Mon, 26 May 2025 09:23:06 -0600 Subject: [PATCH 1/7] Support serial backend Co-authored-by: Valentin Churavy --- src/Trixi.jl | 2 +- src/auxiliary/auxiliary.jl | 8 ++++++-- src/auxiliary/math.jl | 25 ++++++++++++++++--------- src/callbacks_step/summary.jl | 2 +- src/solvers/dg.jl | 4 ++-- 5 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index dda4bc021f5..677221b1a88 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -18,7 +18,7 @@ module Trixi using Preferences: @load_preference, set_preferences! const _PREFERENCE_SQRT = @load_preference("sqrt", "sqrt_Trixi_NaN") const _PREFERENCE_LOG = @load_preference("log", "log_Trixi_NaN") -const _PREFERENCE_POLYESTER = @load_preference("polyester", true) +const _PREFERENCE_THREADING = @load_preference("backend", :polyester) const _PREFERENCE_LOOPVECTORIZATION = @load_preference("loop_vectorization", true) const _PREFERENCE_USE_NATIVE_THREADING = @load_preference("native_threading", true) diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index d3b94f6e611..3de7e583bcd 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -206,13 +206,13 @@ and [https://discourse.julialang.org/t/threads-threads-with-one-thread-how-to-re macro threaded(expr) # !!! danger "Heisenbug" # Look at the comments for `wrap_array` when considering to change this macro. - expr = if _PREFERENCE_POLYESTER + expr = @static if _PREFERENCE_THREADING === :polyester # Currently using `@batch` from Polyester.jl is more efficient, # bypasses the Julia task scheduler and provides parallelization with less overhead. quote $Trixi.@batch $(expr) end - else + elseif _PREFERENCE_THREADING === :static # The following code is a simple version using only `Threads.@threads` from the # standard library with an additional check whether only a single thread is used # to reduce some overhead (and allocations) for serial execution. @@ -225,6 +225,10 @@ macro threaded(expr) end end end + elseif _PREFERENCE_THREADING === :serial + quote + $(expr) + end end # Use `esc(quote ... end)` for nested macro calls as suggested in # https://github.com/JuliaLang/julia/issues/23221 diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl index 241c3ab213a..f9ea23c5355 100644 --- a/src/auxiliary/math.jl +++ b/src/auxiliary/math.jl @@ -8,18 +8,25 @@ const TRIXI_UUID = UUID("a7f1ee26-1774-49b1-8366-f1abc58fbfcb") """ - Trixi.set_polyester!(toggle::Bool; force = true) + Trixi.set_threading_backend!(backend::Symbol; force = true) -Toggle the usage of [Polyester.jl](https://github.com/JuliaSIMD/Polyester.jl) for multithreading. -By default, Polyester.jl is enabled, but it can -be useful for performance comparisons to switch to the Julia core backend. +Toggle and/or switch backend behavior used in multithreaded loops inside Trixi.jl. +The selected backend affects the behavior of Trixi.jl's [`@threaded`](@ref) macro, which is used +throughout the codebase for parallel loops. By default, Polyester.jl is enabled for +optimal performance, but switching backends can be useful for comparisons or debugging. -This does not fully disable Polyester.jl, -but only its use as part of Trixi.jl's [`@threaded`](@ref) macro. +# Available backends +- `:polyester`: Uses the default [Polyester.jl](https://github.com/JuliaSIMD/Polyester.jl) +- `:static`: Uses Julia's built-in static thread scheduling via `Threads.@threads :static` +- `:serial`: Disables threading, executing loops serially """ -function set_polyester!(toggle::Bool; force = true) - set_preferences!(TRIXI_UUID, "polyester" => toggle, force = force) - @info "Please restart Julia and reload Trixi.jl for the `polyester` change to take effect" +function set_threading_backend!(backend::Symbol = :polyester; force = true) + valid_backends = (:polyester, :static, :serial) + if !(backend in valid_backends) + throw(ArgumentError("Invalid threading backend: $(backend). Current options are: $(join(valid_backends, ", "))")) + end + set_preferences!(TRIXI_UUID, "backend" => backend, force = force) + @info "Please restart Julia and reload Trixi.jl for the `backend` change to take effect" end """ diff --git a/src/callbacks_step/summary.jl b/src/callbacks_step/summary.jl index 4b3947582ef..f47a050cf2b 100644 --- a/src/callbacks_step/summary.jl +++ b/src/callbacks_step/summary.jl @@ -209,7 +209,7 @@ function initialize_summary_callback(cb::DiscreteCallback, u, t, integrator; # technical details setup = Pair{String, Any}["#threads" => Threads.nthreads()] - if !_PREFERENCE_POLYESTER + if _PREFERENCE_THREADING !== :polyester push!(setup, "Polyester" => "disabled") end if !_PREFERENCE_LOOPVECTORIZATION diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 3ed10ec2d46..32121df0b6b 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -669,7 +669,7 @@ end # since LoopVectorization does not support `ForwardDiff.Dual`s. Hence, we use # optimized `PtrArray`s whenever possible and fall back to plain `Array`s # otherwise. - if _PREFERENCE_POLYESTER && LoopVectorization.check_args(u_ode) + if _PREFERENCE_THREADING === :polyester && LoopVectorization.check_args(u_ode) # This version using `PtrArray`s from StrideArrays.jl is very fast and # does not result in allocations. # @@ -704,7 +704,7 @@ end nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache) end # See comments on the DGSEM version above - if _PREFERENCE_POLYESTER && LoopVectorization.check_args(u_ode) + if _PREFERENCE_THREADING === :polyester && LoopVectorization.check_args(u_ode) # Here, we do not specialize on the number of nodes using `StaticInt` since # - it will not be type stable (SBP operators just store it as a runtime value) # - FD methods tend to use high node counts From 171c26058d554ef1c7c081177bb7aff50de0e5b8 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 18 Jul 2025 10:38:14 +0200 Subject: [PATCH 2/7] Rebase post 2212 and KernelAbstraction support --- src/Trixi.jl | 1 - src/auxiliary/auxiliary.jl | 5 ++++- src/auxiliary/containers.jl | 5 +++-- src/auxiliary/math.jl | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index 677221b1a88..948d0f1732c 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -20,7 +20,6 @@ const _PREFERENCE_SQRT = @load_preference("sqrt", "sqrt_Trixi_NaN") const _PREFERENCE_LOG = @load_preference("log", "log_Trixi_NaN") const _PREFERENCE_THREADING = @load_preference("backend", :polyester) const _PREFERENCE_LOOPVECTORIZATION = @load_preference("loop_vectorization", true) -const _PREFERENCE_USE_NATIVE_THREADING = @load_preference("native_threading", true) # Include other packages that are used in Trixi.jl # (standard library packages first, other packages next, all of them sorted alphabetically) diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index 3de7e583bcd..14b699d958b 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -212,10 +212,13 @@ macro threaded(expr) quote $Trixi.@batch $(expr) end - elseif _PREFERENCE_THREADING === :static + elseif _PREFERENCE_THREADING === :static || + _PREFERENCE_THREADING === :kernelabstractions # The following code is a simple version using only `Threads.@threads` from the # standard library with an additional check whether only a single thread is used # to reduce some overhead (and allocations) for serial execution. + # If we want to execute on KernelAbstractions, we use the static backend here to fallback on, + # for loops that do not yet support GPU execution. quote let if $Threads.nthreads() == 1 diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index aeeddcbe185..73583e94916 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -359,8 +359,9 @@ If the backend is `nothing`, the default multi-threaded CPU backend is used. """ function trixi_backend(x) # TODO: https://github.com/trixi-framework/Trixi.jl/pull/2417 - if (_PREFERENCE_POLYESTER && LoopVectorization.check_args(x)) || - (_PREFERENCE_USE_NATIVE_THREADING && get_backend(x) isa KernelAbstractions.CPU) + if (_PREFERENCE_THREADING === :polyester && LoopVectorization.check_args(x)) || + (_PREFERENCE_THREADING === :kernelabstractions && + get_backend(x) isa KernelAbstractions.CPU) return nothing end return get_backend(x) diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl index f9ea23c5355..4c31dc477cb 100644 --- a/src/auxiliary/math.jl +++ b/src/auxiliary/math.jl @@ -19,9 +19,10 @@ optimal performance, but switching backends can be useful for comparisons or deb - `:polyester`: Uses the default [Polyester.jl](https://github.com/JuliaSIMD/Polyester.jl) - `:static`: Uses Julia's built-in static thread scheduling via `Threads.@threads :static` - `:serial`: Disables threading, executing loops serially +- `:kernelabstractions`: Preferentially use the KernelAbstractions kernels written in Trixi.jl, falling back to :static execution. """ function set_threading_backend!(backend::Symbol = :polyester; force = true) - valid_backends = (:polyester, :static, :serial) + valid_backends = (:polyester, :static, :serial, :kernelabstractions) if !(backend in valid_backends) throw(ArgumentError("Invalid threading backend: $(backend). Current options are: $(join(valid_backends, ", "))")) end From 2e9a929506e82628e9456fbffae7d26ec0a9b5dc Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 18 Jul 2025 11:16:32 +0200 Subject: [PATCH 3/7] Update src/auxiliary/containers.jl --- src/auxiliary/containers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index 73583e94916..f88ba3cb456 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -360,7 +360,7 @@ If the backend is `nothing`, the default multi-threaded CPU backend is used. function trixi_backend(x) # TODO: https://github.com/trixi-framework/Trixi.jl/pull/2417 if (_PREFERENCE_THREADING === :polyester && LoopVectorization.check_args(x)) || - (_PREFERENCE_THREADING === :kernelabstractions && + (_PREFERENCE_THREADING !== :kernelabstractions && get_backend(x) isa KernelAbstractions.CPU) return nothing end From 32d7a326ded71d012fb6b7f7d2cf0933d3b05234 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 18 Jul 2025 15:09:38 +0200 Subject: [PATCH 4/7] Apply suggestions from code review Co-authored-by: Hendrik Ranocha --- src/auxiliary/containers.jl | 1 - src/auxiliary/math.jl | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl index f88ba3cb456..5036863ff4b 100644 --- a/src/auxiliary/containers.jl +++ b/src/auxiliary/containers.jl @@ -358,7 +358,6 @@ Return the computational backend for `x`, which is either a KernelAbstractions b If the backend is `nothing`, the default multi-threaded CPU backend is used. """ function trixi_backend(x) - # TODO: https://github.com/trixi-framework/Trixi.jl/pull/2417 if (_PREFERENCE_THREADING === :polyester && LoopVectorization.check_args(x)) || (_PREFERENCE_THREADING !== :kernelabstractions && get_backend(x) isa KernelAbstractions.CPU) diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl index 4c31dc477cb..c79b750a2e5 100644 --- a/src/auxiliary/math.jl +++ b/src/auxiliary/math.jl @@ -19,7 +19,8 @@ optimal performance, but switching backends can be useful for comparisons or deb - `:polyester`: Uses the default [Polyester.jl](https://github.com/JuliaSIMD/Polyester.jl) - `:static`: Uses Julia's built-in static thread scheduling via `Threads.@threads :static` - `:serial`: Disables threading, executing loops serially -- `:kernelabstractions`: Preferentially use the KernelAbstractions kernels written in Trixi.jl, falling back to :static execution. +- `:kernelabstractions`: Preferentially use the [KernelAbstractions.jl](https://github.com/JuliaGPU/KernelAbstractions.jl) + kernels written in Trixi.jl, falling back to `:static` execution. """ function set_threading_backend!(backend::Symbol = :polyester; force = true) valid_backends = (:polyester, :static, :serial, :kernelabstractions) From 0f412a3487906f5ca81331993cbf286883961938 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 18 Jul 2025 15:13:58 +0200 Subject: [PATCH 5/7] add news and bump version --- NEWS.md | 11 +++++++++++ Project.toml | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index c96b522da94..64be9c5779d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,17 @@ Trixi.jl follows the interpretation of used in the Julia ecosystem. Notable changes will be documented in this file for human readability. + +## Changes when updating to v0.13 from v0.12.x + +#### Changed + +- The `polyester` preference got merged with the `native_threading` preference and the `Trixi.set_polyester` function got renamed to `Trixi.set_threading_backend` ([#2476]). + +#### Deprecated + +#### Removed + ## Changes in the v0.12 lifecycle #### Added diff --git a/Project.toml b/Project.toml index aaa85ea3bb9..305e84aa051 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Trixi" uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb" authors = ["Michael Schlottke-Lakemper ", "Gregor Gassner ", "Hendrik Ranocha ", "Andrew R. Winters ", "Jesse Chan ", "Andrés Rueda-Ramírez "] -version = "0.12.8-DEV" +version = "0.13.0-DEV" [deps] Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" From 7ffb561c3b04a95cd9086303254d3cd8aeb0f85c Mon Sep 17 00:00:00 2001 From: Hendrik Ranocha Date: Fri, 18 Jul 2025 20:36:05 +0200 Subject: [PATCH 6/7] Update NEWS.md --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 64be9c5779d..247b459fef5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,7 +10,7 @@ for human readability. #### Changed -- The `polyester` preference got merged with the `native_threading` preference and the `Trixi.set_polyester` function got renamed to `Trixi.set_threading_backend` ([#2476]). +- The `polyester` preference got merged with the `native_threading` preference and the `Trixi.set_polyester!` function got renamed to `Trixi.set_threading_backend!` ([#2476]). #### Deprecated From 4a11b04ea383f3e8c9d2d510536047f0c20e0d0f Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 18 Jul 2025 21:53:02 +0200 Subject: [PATCH 7/7] Improve summary for threading backend --- src/callbacks_step/summary.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/callbacks_step/summary.jl b/src/callbacks_step/summary.jl index f47a050cf2b..3194cad0e6f 100644 --- a/src/callbacks_step/summary.jl +++ b/src/callbacks_step/summary.jl @@ -209,9 +209,7 @@ function initialize_summary_callback(cb::DiscreteCallback, u, t, integrator; # technical details setup = Pair{String, Any}["#threads" => Threads.nthreads()] - if _PREFERENCE_THREADING !== :polyester - push!(setup, "Polyester" => "disabled") - end + push!(setup, "threading backend" => string(_PREFERENCE_THREADING)) if !_PREFERENCE_LOOPVECTORIZATION push!(setup, "LoopVectorization" => "disabled") end