diff --git a/Project.toml b/Project.toml index f1a22171eb1..a6ed162504a 100644 --- a/Project.toml +++ b/Project.toml @@ -54,6 +54,7 @@ TrixiBase = "9a0f1c46-06d5-4909-a5a3-ce25d3fa3284" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [weakdeps] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Convex = "f65535da-76fb-5f13-bab9-19810c17039a" ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199" @@ -61,12 +62,14 @@ Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" [extensions] +TrixiAMDGPUExt = "AMDGPU" TrixiCUDAExt = "CUDA" TrixiConvexECOSExt = ["Convex", "ECOS"] TrixiMakieExt = "Makie" TrixiNLsolveExt = "NLsolve" [compat] +AMDGPU = "1.3.5" Accessors = "0.1.36" Adapt = "4" CUDA = "5.8" diff --git a/ext/TrixiAMDGPUExt.jl b/ext/TrixiAMDGPUExt.jl new file mode 100644 index 00000000000..f3f37769fcd --- /dev/null +++ b/ext/TrixiAMDGPUExt.jl @@ -0,0 +1,19 @@ +# Package extension for adding AMDGPU-based features to Trixi.jl +module TrixiAMDGPUExt + +import AMDGPU: ROCArray +import Trixi + +function Trixi.storage_type(::Type{<:ROCArray}) + return ROCArray +end + +function Trixi.unsafe_wrap_or_alloc(to::Type{<:ROCArray}, vector, size) + if length(vector) == 0 + return similar(vector, size) + else + return unsafe_wrap(to, pointer(vector), size, lock = false) + end +end + +end diff --git a/test/Project.toml b/test/Project.toml index b085cf23860..d2d835746e1 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,6 @@ [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" @@ -33,6 +34,7 @@ TrixiTest = "0a316866-cbd0-4425-8bcb-08103b2c1f26" [compat] Adapt = "4" ADTypes = "1.11" +AMDGPU = "1.3.5" Aqua = "0.8" CairoMakie = "0.12, 0.13, 0.14, 0.15" Convex = "0.16" diff --git a/test/runtests.jl b/test/runtests.jl index 8f35e1fb58d..d32eac318bb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -118,4 +118,13 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3) @warn "Unable to run CUDA tests on this machine" end end + + @time if TRIXI_TEST == "all" || TRIXI_TEST == "AMDGPU" + import AMDGPU + if AMDGPU.functional() + include("test_amdgpu.jl") + else + @warn "Unable to run AMDGPU tests on this machine" + end + end end diff --git a/test/test_amdgpu.jl b/test/test_amdgpu.jl new file mode 100644 index 00000000000..6bbc9e41bec --- /dev/null +++ b/test/test_amdgpu.jl @@ -0,0 +1,79 @@ +module TestAMDGPU + +using Test +using Trixi + +include("test_trixi.jl") + +# Start with a clean environment: remove Trixi.jl output directory if it exists +outdir = "out" +isdir(outdir) && rm(outdir, recursive = true) + +EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") + +@trixi_testset "elixir_advection_basic_gpu.jl native" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=8.311947673061856e-6, + linf=6.627000273229378e-5,) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + let + t = sol.t[end] + u_ode = sol.u[end] + du_ode = similar(u_ode) + @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + end + @test real(ode.p.solver) == Float64 + @test real(ode.p.solver.basis) == Float64 + @test real(ode.p.solver.mortar) == Float64 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 + + @test ode.u0 isa Array + @test ode.p.solver.basis.derivative_matrix isa Array + + @test Trixi.storage_type(ode.p.cache.elements) === Array + @test Trixi.storage_type(ode.p.cache.interfaces) === Array + @test Trixi.storage_type(ode.p.cache.boundaries) === Array + @test Trixi.storage_type(ode.p.cache.mortars) === Array +end + +@trixi_testset "elixir_advection_basic_gpu.jl Float32 / AMDGPU" begin + # Using AMDGPU inside the testset since otherwise the bindings are hiddend by the anonymous modules + using AMDGPU + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=nothing, # TODO: GPU. [Float32(8.311947673061856e-6)], + linf=nothing, # TODO: GPU. [Float32(6.627000273229378e-5)], + RealT=Float32, + real_type=Float32, + storage_type=ROCArray, + sol=nothing,) # TODO: GPU. Remove this once we can run the simulation on the GPU + # # Ensure that we do not have excessive memory allocations + # # (e.g., from type instabilities) + # let + # t = sol.t[end] + # u_ode = sol.u[end] + # du_ode = similar(u_ode) + # @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + # end + @test real(ode.p.solver) == Float32 + @test real(ode.p.solver.basis) == Float32 + @test real(ode.p.solver.mortar) == Float32 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 + + @test ode.u0 isa ROCArray + @test ode.p.solver.basis.derivative_matrix isa ROCArray + + @test Trixi.storage_type(ode.p.cache.elements) === ROCArray + @test Trixi.storage_type(ode.p.cache.interfaces) === ROCArray + @test Trixi.storage_type(ode.p.cache.boundaries) === ROCArray + @test Trixi.storage_type(ode.p.cache.mortars) === ROCArray +end + +# Clean up afterwards: delete Trixi.jl output directory +@test_nowarn isdir(outdir) && rm(outdir, recursive = true) + +end # module