Merge pull request #198 from ChrisRackauckas-Claude/fix-gpu-complex-union

ChrisRackauckas · web-flow · commit c8d5d9eaabed · 2025-12-16T05:16:52.000-01:00
Fix GPU expv! to support complex t via shared implementation
diff --git a/src/krylov_phiv.jl b/src/krylov_phiv.jl
@@ -129,10 +129,9 @@ function expv!(w::AbstractVector{Complex{Tw}}, t::Complex{Tt}, Ks::KrylovSubspac
     lmul!(beta, mul!(w, @view(V[:, 1:m]), compatible_multiplicative_operand(V, expHe))) # exp(A) ≈ norm(b) * V * exp(H)e
 end
 
-function ExponentialUtilities.expv!(w::GPUArraysCore.AbstractGPUVector{Tw},
-        t::Real, Ks::KrylovSubspace{T, U};
-        cache = nothing,
-        expmethod = ExpMethodHigham2005Base()) where {Tw, T, U}
+# Internal GPU implementation shared by Real and Complex t methods
+function _expv_gpu_impl!(w::GPUArraysCore.AbstractGPUVector, t, Ks::KrylovSubspace{T, U},
+        cache, expmethod) where {T, U}
     m, beta, V, H = Ks.m, Ks.beta, getV(Ks), getH(Ks)
     @assert length(w)==size(V, 1) "Dimension mismatch"
     if isnothing(cache)
@@ -150,18 +149,31 @@ function ExponentialUtilities.expv!(w::GPUArraysCore.AbstractGPUVector{Tw},
     if ishermitian(cache)
         # Optimize the case for symtridiagonal H
         F = eigen!(SymTridiagonal(cache))
-        expHe = F.vectors * (exp.(lmul!(t, F.values)) .* @view(F.vectors[1, :]))
+        expHe = F.vectors * (exp.(t * F.values) .* @view(F.vectors[1, :]))
     else
-        #lmul!(t, cache)
-        #expH = exponential!(cache, expmethod)
-        #expHe = @view(expH[:, 1])
         expH = exponential!(t * cache, expmethod)
         expHe = @view(expH[:, 1])
     end
 
     lmul!(beta, mul!(w, @view(V[:, 1:m]), Adapt.adapt(parameterless_type(w), expHe))) # exp(A) ≈ norm(b) * V * exp(H)e
 end
 
+# GPU expv! for Real t
+function ExponentialUtilities.expv!(w::GPUArraysCore.AbstractGPUVector{Tw},
+        t::Real, Ks::KrylovSubspace{T, U};
+        cache = nothing,
+        expmethod = ExpMethodHigham2005Base()) where {Tw, T, U}
+    _expv_gpu_impl!(w, t, Ks, cache, expmethod)
+end
+
+# GPU expv! for Complex t
+function ExponentialUtilities.expv!(w::GPUArraysCore.AbstractGPUVector{Complex{Tw}},
+        t::Complex{Tt}, Ks::KrylovSubspace{T, U};
+        cache = nothing,
+        expmethod = ExpMethodHigham2005Base()) where {Tw, Tt, T, U}
+    _expv_gpu_impl!(w, t, Ks, cache, expmethod)
+end
+
 compatible_multiplicative_operand(::AbstractArray, source::AbstractArray) = source
 
 ############################
diff --git a/test/gpu/gputests.jl b/test/gpu/gputests.jl
@@ -2,6 +2,7 @@ using LinearAlgebra
 using SparseArrays
 using CUDA
 using CUDA.CUSPARSE
+using ExponentialUtilities
 using ExponentialUtilities: inplace_add!,
                             exponential!, ExpMethodHigham2005, expv,
                             expv_timestep
@@ -55,3 +56,20 @@ E2 = Array(expv(t, A_gpu, b_gpu))
 E1 = expv_timestep(ts, A, b)
 E2 = Array(expv_timestep(ts, A_gpu, b_gpu))
 @test E1 ≈ E2
+
+@testset "GPU expv! with complex t" begin
+    T = ComplexF64
+    v0 = randn(T, 4)
+    cuv0 = cu(v0)
+    A = randn(T, 4, 4)
+    cuA = cu(A)
+
+    Ks = ExponentialUtilities.arnoldi(A, v0; tol = 1e-7, ishermitian = false, opnorm = 1.0)
+    cuKs = ExponentialUtilities.arnoldi(cuA, cuv0; tol = 1e-7, ishermitian = false,
+        opnorm = 1.0)
+
+    dt = 0.01im
+    ExponentialUtilities.expv!(v0, dt, Ks)
+    ExponentialUtilities.expv!(cuv0, dt, cuKs)
+    @test v0 ≈ collect(cuv0)
+end