add CUDA steadystate_fourier test

Fe-r-oz · Fe-r-oz · commit 82e6c270c984 · 2025-05-28T13:58:20.000+05:00
diff --git a/ext/QuantumToolboxCUDAExt.jl b/ext/QuantumToolboxCUDAExt.jl
@@ -2,12 +2,14 @@ module QuantumToolboxCUDAExt
 
 using QuantumToolbox
 using QuantumToolbox: makeVal, getVal
-import QuantumToolbox: _sparse_similar, _convert_eltype_wordsize
-import CUDA: cu, CuArray, allowscalar
+import QuantumToolbox: _sparse_similar, _convert_eltype_wordsize, _safe_setindex!
+import CUDA: cu, CuArray, allowscalar, @allowscalar, has_cuda
 import CUDA.CUSPARSE: CuSparseVector, CuSparseMatrixCSC, CuSparseMatrixCSR, AbstractCuSparseArray
 import SparseArrays: SparseVector, SparseMatrixCSC, sparse
 import CUDA.Adapt: adapt
 
+export _safe_setindex!
+
 allowscalar(false)
 
 @doc raw"""
@@ -108,10 +110,17 @@ function QuantumToolbox._sparse_similar(A::CuSparseMatrixCSC, rows, cols, vals,
     cpu_sparse = sparse(rows, cols, vals, m, n)
     return CuSparseMatrixCSC(cpu_sparse)
 end
-
 function QuantumToolbox._sparse_similar(A::CuSparseMatrixCSR, rows, cols, vals, m::Int64, n::Int64)
     cpu_sparse = sparse(rows, cols, vals, m, n)
     return CuSparseMatrixCSR(cpu_sparse)
 end
 
+function _safe_setindex!(A, val, idx)
+    if has_cuda() && isa(A, CuArray)
+        @allowscalar A[idx] = val
+    else
+        A[idx] = val
+    end
+end
+
 end
diff --git a/src/steadystate.jl b/src/steadystate.jl
@@ -369,8 +369,7 @@ function _steadystate_fourier(
     n_fourier = 2 * n_max + 1
     n_list = (-n_max):n_max
 
-    # Create arrays with proper GPU/CPU backend
-    weight = one(T)
+    weight = 1
     rows = _dense_similar(L_0_mat, Ns)
     cols = _dense_similar(L_0_mat, Ns)
     vals = _dense_similar(L_0_mat, Ns)
@@ -382,7 +381,6 @@ function _steadystate_fourier(
     Mn = _sparse_similar(L_0_mat, rows, cols, vals, N, N)
     L = L_0_mat + Mn
 
-    # Initialize the big matrix M with proper backend
     M = _sparse_similar(L_0_mat, n_fourier * N, n_fourier * N)
 
     # Add superdiagonal blocks (L_m)
@@ -391,7 +389,7 @@ function _steadystate_fourier(
         cols_block = _dense_similar(L_0_mat, N)
         fill!(rows_block, i)
         fill!(cols_block, i+1)
-        block = _sparse_similar(L_0_mat, rows_block, cols_block, ones(T, N), n_fourier, n_fourier)
+        block = _sparse_similar(L_0_mat, rows_block, cols_block, ones(N), n_fourier, n_fourier)
         M += kron(block, L_m_mat)
     end
 
@@ -401,7 +399,7 @@ function _steadystate_fourier(
         cols_block = _dense_similar(L_0_mat, N)
         fill!(rows_block, i+1)
         fill!(cols_block, i)
-        block = _sparse_similar(L_0_mat, rows_block, cols_block, ones(T, N), n_fourier, n_fourier)
+        block = _sparse_similar(L_0_mat, rows_block, cols_block, ones(N), n_fourier, n_fourier)
         M += kron(block, L_p_mat)
     end
 
@@ -413,16 +411,14 @@ function _steadystate_fourier(
         cols_block = _dense_similar(L_0_mat, N)
         fill!(rows_block, i)
         fill!(cols_block, i)
-        block = _sparse_similar(L_0_mat, rows_block, cols_block, ones(T, N), n_fourier, n_fourier)
+        block = _sparse_similar(L_0_mat, rows_block, cols_block, ones(N), n_fourier, n_fourier)
         M += kron(block, block_diag)
     end
 
-    # Initialize solution vector with proper backend
     v0 = _dense_similar(L_0_mat, n_fourier * N)
-    fill!(v0, zero(T))
-    allowed_setindex!(v0, weight, n_max * N + 1)
+    fill!(v0, 0)
+    _safe_setindex!(v0, weight, n_max * N + 1)
 
-    # Prepare preconditioners if needed
     if !isnothing(solver.Pl)
         kwargs = merge((; kwargs...), (Pl = solver.Pl(M),))
     elseif isa(M, SparseMatrixCSC)
@@ -431,11 +427,9 @@ function _steadystate_fourier(
     !isnothing(solver.Pr) && (kwargs = merge((; kwargs...), (Pr = solver.Pr(M),)))
     kwargs = merge((abstol = tol, reltol = tol), kwargs)
 
-    # Solve the linear system
     prob = LinearProblem(M, v0)
     ρtot = solve(prob, solver.alg; kwargs...).u
 
-    # Extract results
     offset1 = n_max * N
     offset2 = (n_max + 1) * N
     ρ0 = reshape(ρtot[(offset1+1):offset2], Ns, Ns)
diff --git a/src/utilities.jl b/src/utilities.jl
@@ -135,6 +135,7 @@ _dense_similar(A::AbstractSparseMatrix, args...) = similar(nonzeros(A), args...)
 
 _sparse_similar(A::AbstractArray, args...) = sparse(args...)
 _sparse_similar(A::AbstractArray, m::Int, n::Int) = spzeros(eltype(A), m, n)
+function _safe_setindex! end
 
 _Ginibre_ensemble(n::Int, rank::Int = n) = randn(ComplexF64, n, rank) / sqrt(n)
 
diff --git a/test/ext-test/gpu/cuda_ext.jl b/test/ext-test/gpu/cuda_ext.jl
@@ -154,6 +154,27 @@ end
     @test ρ_ss_cpu.data ≈ Array(ρ_ss_gpu_csr.data) atol = 1e-8 * length(ρ_ss_cpu)
 end
 
+@testset "CUDA steadystate_fourier" begin
+    N = 2
+    ωd = 1.0
+    n_max = 2
+    H0 = cu(sigmaz())
+    Hp = cu(sigmax())
+    Hm = cu(sigmax())
+    c_ops = [cu(sqrt(0.1) * sigmam())]
+    ρ_list1 = steadystate_fourier(H0, Hp, Hm, ωd, c_ops; solver = SteadyStateLinearSolver(), n_max = n_max)
+    ρ0 = steadystate_fourier(
+        H0,
+        Hp,
+        Hm,
+        ωd,
+        c_ops;
+        solver = SSFloquetEffectiveLiouvillian(SteadyStateDirectSolver()),
+        n_max = n_max,
+    )
+    @test isapprox(ρ0, ρ_list1[1]; atol = 1e-6)
+end
+
 @testset "CUDA ptrace" begin
     g = fock(2, 1)
     e = fock(2, 0)