Skip to content

Commit e3b2c09

Browse files
Improve GPU support for steadystate
1 parent 7d58e46 commit e3b2c09

File tree

4 files changed

+42
-11
lines changed

4 files changed

+42
-11
lines changed

ext/QuantumToolboxCUDAExt.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ module QuantumToolboxCUDAExt
22

33
using QuantumToolbox
44
using QuantumToolbox: makeVal, getVal
5+
import QuantumToolbox: _sparse_similar
56
import CUDA: cu, CuArray, allowscalar
67
import CUDA.CUSPARSE: CuSparseVector, CuSparseMatrixCSC, CuSparseMatrixCSR, AbstractCuSparseArray
78
import SparseArrays: SparseVector, SparseMatrixCSC
@@ -106,4 +107,7 @@ QuantumToolbox.to_dense(A::MT) where {MT<:AbstractCuSparseArray} = CuArray(A)
106107
QuantumToolbox.to_dense(::Type{T1}, A::CuArray{T2}) where {T1<:Number,T2<:Number} = CuArray{T1}(A)
107108
QuantumToolbox.to_dense(::Type{T}, A::AbstractCuSparseArray) where {T<:Number} = CuArray{T}(A)
108109

110+
QuantumToolbox._sparse_similar(A::CuSparseMatrixCSC, args...) = sparse(args..., fmt = :csc)
111+
QuantumToolbox._sparse_similar(A::CuSparseMatrixCSR, args...) = sparse(args..., fmt = :csr)
112+
109113
end

src/steadystate.jl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -108,18 +108,18 @@ function _steadystate(L::QuantumObject{SuperOperatorQuantumObject}, solver::Stea
108108
N = prod(L.dimensions)
109109
weight = norm(L_tmp, 1) / length(L_tmp)
110110

111-
v0 = _get_dense_similar(L_tmp, N^2)
111+
v0 = _dense_similar(L_tmp, N^2)
112112
fill!(v0, 0)
113113
allowed_setindex!(v0, weight, 1) # Because scalar indexing is not allowed on GPU arrays
114114

115115
idx_range = collect(1:N)
116-
rows = _get_dense_similar(L_tmp, N)
117-
cols = _get_dense_similar(L_tmp, N)
118-
vals = _get_dense_similar(L_tmp, N)
116+
rows = _dense_similar(L_tmp, N)
117+
cols = _dense_similar(L_tmp, N)
118+
vals = _dense_similar(L_tmp, N)
119119
fill!(rows, 1)
120120
copyto!(cols, N .* (idx_range .- 1) .+ idx_range)
121121
fill!(vals, weight)
122-
Tn = sparse(rows, cols, vals, N^2, N^2)
122+
Tn = _sparse_similar(L_tmp, rows, cols, vals, N^2, N^2)
123123
L_tmp = L_tmp + Tn
124124

125125
(haskey(kwargs, :Pl) || haskey(kwargs, :Pr)) && error("The use of preconditioners must be defined in the solver.")
@@ -155,14 +155,14 @@ function _steadystate(L::QuantumObject{SuperOperatorQuantumObject}, solver::Stea
155155
N = prod(L.dimensions)
156156
weight = norm(L_tmp, 1) / length(L_tmp)
157157

158-
v0 = _get_dense_similar(L_tmp, N^2)
158+
v0 = _dense_similar(L_tmp, N^2)
159159
fill!(v0, 0)
160160
allowed_setindex!(v0, weight, 1) # Because scalar indexing is not allowed on GPU arrays
161161

162162
idx_range = collect(1:N)
163-
rows = _get_dense_similar(L_tmp, N)
164-
cols = _get_dense_similar(L_tmp, N)
165-
vals = _get_dense_similar(L_tmp, N)
163+
rows = _dense_similar(L_tmp, N)
164+
cols = _dense_similar(L_tmp, N)
165+
vals = _dense_similar(L_tmp, N)
166166
fill!(rows, 1)
167167
copyto!(cols, N .* (idx_range .- 1) .+ idx_range)
168168
fill!(vals, weight)

src/utilities.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,10 @@ end
130130

131131
get_typename_wrapper(A) = Base.typename(typeof(A)).wrapper
132132

133-
_get_dense_similar(A::AbstractArray, args...) = similar(A, args...)
134-
_get_dense_similar(A::AbstractSparseMatrix, args...) = similar(nonzeros(A), args...)
133+
_dense_similar(A::AbstractArray, args...) = similar(A, args...)
134+
_dense_similar(A::AbstractSparseMatrix, args...) = similar(nonzeros(A), args...)
135+
136+
_sparse_similar(A::AbstractArray, args...) = sparse(args...)
135137

136138
_Ginibre_ensemble(n::Int, rank::Int = n) = randn(ComplexF64, n, rank) / sqrt(n)
137139

test/ext-test/gpu/cuda_ext.jl

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,31 @@
125125
@test all([isapprox(sol_cpu.expect[i], sol_gpu32.expect[i]; atol = 1e-6) for i in 1:length(tlist)])
126126
end
127127

128+
@testset "CUDA steadystate" begin
129+
N = 50
130+
Δ = 0.01
131+
F = 0.1
132+
γ = 0.1
133+
nth = 2
134+
135+
a = destroy(N)
136+
H = Δ * a' * a + F * (a + a')
137+
c_ops = [sqrt* (nth + 1)) * a, sqrt* nth) * a']
138+
139+
ρ_ss_cpu = steadystate(H, c_ops)
140+
141+
H_gpu_csc = cu(H)
142+
c_ops_gpu_csc = [cu(c_op) for c_op in c_ops]
143+
ρ_ss_gpu_csc = steadystate(H_gpu_csc, c_ops_gpu_csc, solver = SteadyStateLinearSolver())
144+
145+
H_gpu_csr = CuSparseMatrixCSR(H_gpu_csc)
146+
c_ops_gpu_csr = [CuSparseMatrixCSR(c_op) for c_op in c_ops_gpu_csc]
147+
ρ_ss_gpu_csr = steadystate(H_gpu_csr, c_ops_gpu_csr, solver = SteadyStateLinearSolver())
148+
149+
@test ρ_ss_cpu.data Array(ρ_ss_gpu_csc.data) atol = 1e-8*length(ρ_ss_cpu)
150+
@test ρ_ss_cpu.data Array(ρ_ss_gpu_csr.data) atol = 1e-8*length(ρ_ss_cpu)
151+
end
152+
128153
@testset "CUDA ptrace" begin
129154
g = fock(2, 1)
130155
e = fock(2, 0)

0 commit comments

Comments
 (0)