diff --git a/ext/QuantumToolboxCUDAExt.jl b/ext/QuantumToolboxCUDAExt.jl index e676b4ea4..0dbff19bb 100644 --- a/ext/QuantumToolboxCUDAExt.jl +++ b/ext/QuantumToolboxCUDAExt.jl @@ -5,7 +5,7 @@ using QuantumToolbox: makeVal, getVal import QuantumToolbox: _sparse_similar, _convert_eltype_wordsize import CUDA: cu, CuArray, allowscalar import CUDA.CUSPARSE: CuSparseVector, CuSparseMatrixCSC, CuSparseMatrixCSR, AbstractCuSparseArray -import SparseArrays: SparseVector, SparseMatrixCSC, sparse +import SparseArrays: SparseVector, SparseMatrixCSC, sparse, spzeros import CUDA.Adapt: adapt allowscalar(false) @@ -102,7 +102,24 @@ QuantumToolbox.to_dense(A::MT) where {MT<:AbstractCuSparseArray} = CuArray(A) QuantumToolbox.to_dense(::Type{T1}, A::CuArray{T2}) where {T1<:Number,T2<:Number} = CuArray{T1}(A) QuantumToolbox.to_dense(::Type{T}, A::AbstractCuSparseArray) where {T<:Number} = CuArray{T}(A) -QuantumToolbox._sparse_similar(A::CuSparseMatrixCSC, args...) = sparse(args..., fmt = :csc) -QuantumToolbox._sparse_similar(A::CuSparseMatrixCSR, args...) = sparse(args..., fmt = :csr) - +QuantumToolbox._sparse_similar(A::CuSparseMatrixCSC, args...) = CuSparseMatrixCSC(sparse(args...)) +QuantumToolbox._sparse_similar(A::CuSparseMatrixCSR, args...) = CuSparseMatrixCSC(sparse(args...)) +QuantumToolbox._sparse_similar( + A::CuSparseMatrixCSC, + I::AbstractVector, + J::AbstractVector, + V::AbstractVector, + m::Int, + n::Int, +) = CuSparseMatrixCSC(sparse(I, J, V, m, n)) +QuantumToolbox._sparse_similar(A::CuSparseMatrixCSC, m::Int, n::Int) = CuSparseMatrixCSC(spzeros(eltype(A), m, n)) +QuantumToolbox._sparse_similar( + A::CuSparseMatrixCSR, + I::AbstractVector, + J::AbstractVector, + V::AbstractVector, + m::Int, + n::Int, +) = CuSparseMatrixCSR(sparse(I, J, V, m, n)) +QuantumToolbox._sparse_similar(A::CuSparseMatrixCSR, m::Int, n::Int) = CuSparseMatrixCSR(spzeros(eltype(A), m, n)) end diff --git a/src/steadystate.jl b/src/steadystate.jl index d17ad4783..4f3b19e3a 100644 --- a/src/steadystate.jl +++ b/src/steadystate.jl @@ -355,64 +355,54 @@ function _steadystate_fourier( tol::R = 1e-8, kwargs..., ) where {R<:Real} - T1 = eltype(L_0) - T2 = eltype(L_p) - T3 = eltype(L_m) - T = promote_type(T1, T2, T3) - - L_0_mat = get_data(L_0) - L_p_mat = get_data(L_p) - L_m_mat = get_data(L_m) - - N = size(L_0_mat, 1) + L0_mat = get_data(L_0) + Lp_mat = get_data(L_p) + Lm_mat = get_data(L_m) + T = promote_type(eltype(L0_mat), eltype(Lp_mat), eltype(Lm_mat)) + N = size(L0_mat, 1) Ns = isqrt(N) n_fourier = 2 * n_max + 1 n_list = (-n_max):n_max - - weight = 1 - Mn = sparse(ones(Ns), [Ns * (j - 1) + j for j in 1:Ns], fill(weight, Ns), N, N) - L = L_0_mat + Mn - - M = spzeros(T, n_fourier * N, n_fourier * N) - M += kron(spdiagm(1 => ones(n_fourier - 1)), L_m_mat) - M += kron(spdiagm(-1 => ones(n_fourier - 1)), L_p_mat) - for i in 1:n_fourier - n = n_list[i] - M += kron(sparse([i], [i], one(T), n_fourier, n_fourier), L - 1im * ωd * n * I) - end - + weight = one(T) + diag_indices = Ns * (0:(Ns-1)) .+ (1:Ns) + Mn = _sparse_similar(L0_mat, ones(Int, Ns), diag_indices, fill(weight, Ns), N, N) + L = L0_mat + Mn + Kp = _sparse_similar(L0_mat, spdiagm(-1 => ones(T, n_fourier - 1))) + Km = _sparse_similar(L0_mat, spdiagm(1 => ones(T, n_fourier - 1))) + M = kron(Kp, Lm_mat) + kron(Km, Lp_mat) + n_vals = -1im * ωd * T.(n_list) + I_N = _sparse_similar(L0_mat, sparse(I, N, N)) + I_F = _sparse_similar(L0_mat, spdiagm(0 => ones(T, n_fourier))) + D_F = _sparse_similar(L0_mat, spdiagm(0 => n_vals)) + block_diag = kron(I_F, L) + kron(D_F, I_N) + M += block_diag v0 = zeros(T, n_fourier * N) - v0[n_max*N+1] = weight - - (haskey(kwargs, :Pl) || haskey(kwargs, :Pr)) && error("The use of preconditioners must be defined in the solver.") + allowed_setindex!(v0, weight, n_max * N + 1) + (haskey(kwargs, :Pl) || haskey(kwargs, :Pr)) && + error("The use of preconditioners must be defined in the solver.") if !isnothing(solver.Pl) - kwargs = merge((; kwargs...), (Pl = solver.Pl(M),)) + kwargs = (; kwargs..., Pl = solver.Pl(M)) elseif isa(M, SparseMatrixCSC) - kwargs = merge((; kwargs...), (Pl = ilu(M, τ = 0.01),)) + kwargs = (; kwargs..., Pl = ilu(M, τ = 0.01)) end - !isnothing(solver.Pr) && (kwargs = merge((; kwargs...), (Pr = solver.Pr(M),))) - !haskey(kwargs, :abstol) && (kwargs = merge((; kwargs...), (abstol = tol,))) - !haskey(kwargs, :reltol) && (kwargs = merge((; kwargs...), (reltol = tol,))) - + if !isnothing(solver.Pr) + kwargs = (; kwargs..., Pr = solver.Pr(M)) + end + kwargs = merge((; abstol = tol, reltol = tol), kwargs) prob = LinearProblem(M, v0) ρtot = solve(prob, solver.alg; kwargs...).u - offset1 = n_max * N offset2 = (n_max + 1) * N - ρ0 = reshape(ρtot[(offset1+1):offset2], Ns, Ns) - ρ0_tr = tr(ρ0) - ρ0 = ρ0 / ρ0_tr + ρ0 = Matrix(reshape(view(ρtot, (offset1 + 1):offset2), Ns, Ns)) + ρ0 ./= tr(ρ0) ρ0 = QuantumObject((ρ0 + ρ0') / 2, type = Operator(), dims = L_0.dimensions) - ρtot = ρtot / ρ0_tr - - ρ_list = [ρ0] - for i in 0:(n_max-1) - ρi_m = reshape(ρtot[(offset1-(i+1)*N+1):(offset1-i*N)], Ns, Ns) - ρi_m = QuantumObject(ρi_m, type = Operator(), dims = L_0.dimensions) - push!(ρ_list, ρi_m) - end - - return ρ_list + idx_ranges = [(offset1 - (i + 1) * N + 1):(offset1 - i * N) for i in 0:(n_max - 1)] + ρ_components = map(idx_range -> + QuantumObject(Matrix(reshape(view(ρtot, idx_range), Ns, Ns)), + type = Operator(), + dims = L_0.dimensions), + idx_ranges) + return vcat([ρ0], ρ_components) end function _steadystate_fourier( diff --git a/src/utilities.jl b/src/utilities.jl index ebe9481f4..7b879dc68 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -134,6 +134,7 @@ _dense_similar(A::AbstractArray, args...) = similar(A, args...) _dense_similar(A::AbstractSparseMatrix, args...) = similar(nonzeros(A), args...) _sparse_similar(A::AbstractArray, args...) = sparse(args...) +_sparse_similar(A::AbstractArray, m::Int, n::Int) = spzeros(eltype(A), m, n) _Ginibre_ensemble(n::Int, rank::Int = n) = randn(ComplexF64, n, rank) / sqrt(n) diff --git a/test/ext-test/gpu/cuda_ext.jl b/test/ext-test/gpu/cuda_ext.jl index e6399e42a..276c02619 100644 --- a/test/ext-test/gpu/cuda_ext.jl +++ b/test/ext-test/gpu/cuda_ext.jl @@ -154,6 +154,27 @@ end @test ρ_ss_cpu.data ≈ Array(ρ_ss_gpu_csr.data) atol = 1e-8 * length(ρ_ss_cpu) end +@testset "CUDA steadystate_fourier" begin + N = 2 + ωd = 1.0 + n_max = 2 + H0 = cu(sigmaz()) + Hp = cu(sigmax()) + Hm = cu(sigmax()) + c_ops = [cu(sqrt(0.1) * sigmam())] + ρ_list1 = steadystate_fourier(H0, Hp, Hm, ωd, c_ops; solver = SteadyStateLinearSolver(), n_max = n_max) + ρ0 = steadystate_fourier( + H0, + Hp, + Hm, + ωd, + c_ops; + solver = SSFloquetEffectiveLiouvillian(SteadyStateDirectSolver()), + n_max = n_max, + ) + @test isapprox(ρ0, ρ_list1[1]; atol = 1e-6) +end + @testset "CUDA ptrace" begin g = fock(2, 1) e = fock(2, 0)