diff --git a/Project.toml b/Project.toml
index f1a22171eb1..a6ed162504a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -54,6 +54,7 @@ TrixiBase = "9a0f1c46-06d5-4909-a5a3-ce25d3fa3284"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
 [weakdeps]
+AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 Convex = "f65535da-76fb-5f13-bab9-19810c17039a"
 ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199"
@@ -61,12 +62,14 @@ Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
 NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
 
 [extensions]
+TrixiAMDGPUExt = "AMDGPU"
 TrixiCUDAExt = "CUDA"
 TrixiConvexECOSExt = ["Convex", "ECOS"]
 TrixiMakieExt = "Makie"
 TrixiNLsolveExt = "NLsolve"
 
 [compat]
+AMDGPU = "1.3.5"
 Accessors = "0.1.36"
 Adapt = "4"
 CUDA = "5.8"
diff --git a/ext/TrixiAMDGPUExt.jl b/ext/TrixiAMDGPUExt.jl
new file mode 100644
index 00000000000..f3f37769fcd
--- /dev/null
+++ b/ext/TrixiAMDGPUExt.jl
@@ -0,0 +1,19 @@
+# Package extension for adding AMDGPU-based features to Trixi.jl
+module TrixiAMDGPUExt
+
+import AMDGPU: ROCArray
+import Trixi
+
+function Trixi.storage_type(::Type{<:ROCArray})
+    return ROCArray
+end
+
+function Trixi.unsafe_wrap_or_alloc(to::Type{<:ROCArray}, vector, size)
+    if length(vector) == 0
+        return similar(vector, size)
+    else
+        return unsafe_wrap(to, pointer(vector), size, lock = false)
+    end
+end
+
+end
diff --git a/test/Project.toml b/test/Project.toml
index b085cf23860..d2d835746e1 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,5 +1,6 @@
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
+AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
@@ -33,6 +34,7 @@ TrixiTest = "0a316866-cbd0-4425-8bcb-08103b2c1f26"
 [compat]
 Adapt = "4"
 ADTypes = "1.11"
+AMDGPU = "1.3.5"
 Aqua = "0.8"
 CairoMakie = "0.12, 0.13, 0.14, 0.15"
 Convex = "0.16"
diff --git a/test/runtests.jl b/test/runtests.jl
index 8f35e1fb58d..d32eac318bb 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -118,4 +118,13 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3)
             @warn "Unable to run CUDA tests on this machine"
         end
     end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "AMDGPU"
+        import AMDGPU
+        if AMDGPU.functional()
+            include("test_amdgpu.jl")
+        else
+            @warn "Unable to run AMDGPU tests on this machine"
+        end
+    end
 end
diff --git a/test/test_amdgpu.jl b/test/test_amdgpu.jl
new file mode 100644
index 00000000000..6bbc9e41bec
--- /dev/null
+++ b/test/test_amdgpu.jl
@@ -0,0 +1,79 @@
+module TestAMDGPU
+
+using Test
+using Trixi
+
+include("test_trixi.jl")
+
+# Start with a clean environment: remove Trixi.jl output directory if it exists
+outdir = "out"
+isdir(outdir) && rm(outdir, recursive = true)
+
+EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem")
+
+@trixi_testset "elixir_advection_basic_gpu.jl native" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"),
+                        # Expected errors are exactly the same as with TreeMesh!
+                        l2=8.311947673061856e-6,
+                        linf=6.627000273229378e-5,)
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
+    @test real(ode.p.solver) == Float64
+    @test real(ode.p.solver.basis) == Float64
+    @test real(ode.p.solver.mortar) == Float64
+    # TODO: remake ignores the mesh itself as well
+    @test real(ode.p.mesh) == Float64
+
+    @test ode.u0 isa Array
+    @test ode.p.solver.basis.derivative_matrix isa Array
+
+    @test Trixi.storage_type(ode.p.cache.elements) === Array
+    @test Trixi.storage_type(ode.p.cache.interfaces) === Array
+    @test Trixi.storage_type(ode.p.cache.boundaries) === Array
+    @test Trixi.storage_type(ode.p.cache.mortars) === Array
+end
+
+@trixi_testset "elixir_advection_basic_gpu.jl Float32 / AMDGPU" begin
+    # Using AMDGPU inside the testset since otherwise the bindings are hiddend by the anonymous modules
+    using AMDGPU
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"),
+                        # Expected errors are exactly the same as with TreeMesh!
+                        l2=nothing,   # TODO: GPU. [Float32(8.311947673061856e-6)],
+                        linf=nothing, # TODO: GPU. [Float32(6.627000273229378e-5)],
+                        RealT=Float32,
+                        real_type=Float32,
+                        storage_type=ROCArray,
+                        sol=nothing,) # TODO: GPU. Remove this once we can run the simulation on the GPU 
+    # # Ensure that we do not have excessive memory allocations
+    # # (e.g., from type instabilities)
+    # let
+    #     t = sol.t[end]
+    #     u_ode = sol.u[end]
+    #     du_ode = similar(u_ode)
+    #     @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    # end
+    @test real(ode.p.solver) == Float32
+    @test real(ode.p.solver.basis) == Float32
+    @test real(ode.p.solver.mortar) == Float32
+    # TODO: remake ignores the mesh itself as well
+    @test real(ode.p.mesh) == Float64
+
+    @test ode.u0 isa ROCArray
+    @test ode.p.solver.basis.derivative_matrix isa ROCArray
+
+    @test Trixi.storage_type(ode.p.cache.elements) === ROCArray
+    @test Trixi.storage_type(ode.p.cache.interfaces) === ROCArray
+    @test Trixi.storage_type(ode.p.cache.boundaries) === ROCArray
+    @test Trixi.storage_type(ode.p.cache.mortars) === ROCArray
+end
+
+# Clean up afterwards: delete Trixi.jl output directory
+@test_nowarn isdir(outdir) && rm(outdir, recursive = true)
+
+end # module