diff --git a/ext/SparseMatrixColoringsCUDAExt.jl b/ext/SparseMatrixColoringsCUDAExt.jl index 2750964..c01aa25 100644 --- a/ext/SparseMatrixColoringsCUDAExt.jl +++ b/ext/SparseMatrixColoringsCUDAExt.jl @@ -48,12 +48,14 @@ function SMC.StarSetColoringResult( ag::SMC.AdjacencyGraph{T}, color::Vector{<:Integer}, star_set::SMC.StarSet{<:Integer}, + decompression_uplo::Symbol, ) where {T<:Integer} + @assert decompression_uplo == :F group = SMC.group_by_color(T, color) - compressed_indices = SMC.star_csc_indices(ag, color, star_set) + compressed_indices = SMC.star_csc_indices(ag, color, star_set, decompression_uplo) additional_info = (; compressed_indices_gpu_csc=CuVector(compressed_indices)) return SMC.StarSetColoringResult( - A, ag, color, group, compressed_indices, additional_info + A, ag, color, group, compressed_indices, decompression_uplo, additional_info ) end @@ -86,12 +88,14 @@ function SMC.StarSetColoringResult( ag::SMC.AdjacencyGraph{T}, color::Vector{<:Integer}, star_set::SMC.StarSet{<:Integer}, + decompression_uplo::Symbol, ) where {T<:Integer} + @assert decompression_uplo == :F group = SMC.group_by_color(T, color) - compressed_indices = SMC.star_csc_indices(ag, color, star_set) + compressed_indices = SMC.star_csc_indices(ag, color, star_set, decompression_uplo) additional_info = (; compressed_indices_gpu_csr=CuVector(compressed_indices)) return SMC.StarSetColoringResult( - A, ag, color, group, compressed_indices, additional_info + A, ag, color, group, compressed_indices, decompression_uplo, additional_info ) end diff --git a/src/decompression.jl b/src/decompression.jl index 1528c88..4512165 100644 --- a/src/decompression.jl +++ b/src/decompression.jl @@ -448,6 +448,7 @@ end function decompress!( A::AbstractMatrix, B::AbstractMatrix, result::StarSetColoringResult, uplo::Symbol=:F ) + @assert result.decompression_uplo == :F (; ag, compressed_indices) = result (; S) = ag check_compatible_pattern(A, ag, uplo) @@ -472,6 +473,7 @@ function decompress_single_color!( result::StarSetColoringResult, uplo::Symbol=:F, ) + @assert result.decompression_uplo == :F (; ag, compressed_indices, group) = result (; S) = ag check_compatible_pattern(A, ag, uplo) @@ -509,11 +511,12 @@ function decompress!( (; S) = ag nzA = nonzeros(A) check_compatible_pattern(A, ag, uplo) - if uplo == :F + if result.decompression_uplo == uplo for k in eachindex(nzA, compressed_indices) nzA[k] = B[compressed_indices[k]] end else + @assert result.decompression_uplo == :F rvS = rowvals(S) l = 0 # assume A has the same pattern as the triangle for j in axes(S, 2) @@ -534,6 +537,7 @@ end function decompress!( A::AbstractMatrix, B::AbstractMatrix, result::TreeSetColoringResult, uplo::Symbol=:F ) + @assert result.decompression_uplo == :F (; ag, color, reverse_bfs_orders, tree_edge_indices, nt, diagonal_indices, buffer) = result (; S) = ag diff --git a/src/interface.jl b/src/interface.jl index 936b974..d5fdba8 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -286,7 +286,7 @@ function _coloring( end color, star_set = argmin(maximum ∘ first, color_and_star_set_by_order) if speed_setting isa WithResult - return StarSetColoringResult(A, ag, color, star_set) + return StarSetColoringResult(A, ag, color, star_set, :F) else return color end @@ -307,7 +307,7 @@ function _coloring( end color, tree_set = argmin(maximum ∘ first, color_and_tree_set_by_order) if speed_setting isa WithResult - return TreeSetColoringResult(A, ag, color, tree_set, R) + return TreeSetColoringResult(A, ag, color, tree_set, R, :F) else return color end @@ -345,7 +345,7 @@ function _coloring( t -> maximum(t[3]) + maximum(t[4]), outputs_by_order ) # can't use ncolors without computing the full result if speed_setting isa WithResult - symmetric_result = StarSetColoringResult(A_and_Aᵀ, ag, color, star_set) + symmetric_result = StarSetColoringResult(A_and_Aᵀ, ag, color, star_set, :L) return BicoloringResult( A, ag, @@ -390,7 +390,7 @@ function _coloring( t -> maximum(t[3]) + maximum(t[4]), outputs_by_order ) # can't use ncolors without computing the full result if speed_setting isa WithResult - symmetric_result = TreeSetColoringResult(A_and_Aᵀ, ag, color, tree_set, R) + symmetric_result = TreeSetColoringResult(A_and_Aᵀ, ag, color, tree_set, R, :L) return BicoloringResult( A, ag, diff --git a/src/result.jl b/src/result.jl index c74ceaa..d26221f 100644 --- a/src/result.jl +++ b/src/result.jl @@ -309,6 +309,7 @@ struct StarSetColoringResult{ color::CT group::GT compressed_indices::VT + decompression_uplo::Symbol additional_info::A end @@ -317,43 +318,58 @@ function StarSetColoringResult( ag::AdjacencyGraph{T}, color::Vector{<:Integer}, star_set::StarSet{<:Integer}, + decompression_uplo::Symbol, ) where {T<:Integer} group = group_by_color(T, color) - compressed_indices = star_csc_indices(ag, color, star_set) - return StarSetColoringResult(A, ag, color, group, compressed_indices, nothing) + compressed_indices = star_csc_indices(ag, color, star_set, decompression_uplo) + return StarSetColoringResult( + A, ag, color, group, compressed_indices, decompression_uplo, nothing + ) end function star_csc_indices( - ag::AdjacencyGraph{T}, color::Vector{<:Integer}, star_set + ag::AdjacencyGraph{T}, + color::Vector{<:Integer}, + star_set::StarSet{<:Integer}, + decompression_uplo::Symbol, ) where {T} (; star, hub) = star_set S = pattern(ag) edge_to_index = edge_indices(ag) n = S.n rvS = rowvals(S) - compressed_indices = zeros(T, nnz(S)) # needs to be independent from the storage in the graph, in case the graph gets reused + nb_indices = nnz(S) + if decompression_uplo != :F + nb_indices = nb_edges(ag) + ag.nb_self_loops + end + compressed_indices = zeros(T, nb_indices) # needs to be independent from the storage in the graph, in case the graph gets reused + l = 0 for j in axes(S, 2) for k in nzrange(S, j) i = rvS[k] if i == j # diagonal coefficients + l += 1 c = color[i] - compressed_indices[k] = (c - 1) * n + i + compressed_indices[l] = (c - 1) * n + i else - # off-diagonal coefficients - index_ij = edge_to_index[k] - s = star[index_ij] - h = abs(hub[s]) - - # Assign the non-hub vertex (spoke) to the correct position in spokes - if i == h - # i is the hub and j is the spoke - c = color[i] - compressed_indices[k] = (c - 1) * n + j - else # j == h - # j is the hub and i is the spoke - c = color[j] - compressed_indices[k] = (c - 1) * n + i + if in_triangle(i, j, decompression_uplo) + # off-diagonal coefficients + l += 1 + index_ij = edge_to_index[k] + s = star[index_ij] + h = abs(hub[s]) + + # Assign the non-hub vertex (spoke) to the correct position in spokes + if i == h + # i is the hub and j is the spoke + c = color[i] + compressed_indices[l] = (c - 1) * n + j + else # j == h + # j is the hub and i is the spoke + c = color[j] + compressed_indices[l] = (c - 1) * n + i + end end end end @@ -391,6 +407,7 @@ struct TreeSetColoringResult{ lower_triangle_offsets::Vector{T} upper_triangle_offsets::Vector{T} buffer::Vector{R} + decompression_uplo::Symbol end function TreeSetColoringResult( @@ -399,6 +416,7 @@ function TreeSetColoringResult( color::Vector{<:Integer}, tree_set::TreeSet{<:Integer}, decompression_eltype::Type{R}, + decompression_uplo::Symbol, ) where {T<:Integer,R} (; reverse_bfs_orders, tree_edge_indices, nt) = tree_set (; S, nb_self_loops) = ag @@ -408,7 +426,7 @@ function TreeSetColoringResult( # Vector for the decompression of the diagonal coefficients diagonal_indices = Vector{T}(undef, nb_self_loops) - diagonal_nzind = Vector{T}(undef, nb_self_loops) + diagonal_nzind = (decompression_uplo == :F) ? Vector{T}(undef, nb_self_loops) : T[] if !augmented_graph(ag) l = 0 @@ -418,7 +436,9 @@ function TreeSetColoringResult( if i == j l += 1 diagonal_indices[l] = i - diagonal_nzind[l] = k + if decompression_uplo == :F + diagonal_nzind[l] = k + end end end end @@ -426,8 +446,8 @@ function TreeSetColoringResult( # Vectors for the decompression of the off-diagonal coefficients nedges = nb_edges(ag) - lower_triangle_offsets = Vector{T}(undef, nedges) - upper_triangle_offsets = Vector{T}(undef, nedges) + lower_triangle_offsets = decompression_uplo == :U ? T[] : Vector{T}(undef, nedges) + upper_triangle_offsets = decompression_uplo == :L ? T[] : Vector{T}(undef, nedges) # Index in lower_triangle_offsets and upper_triangle_offsets index_offsets = 0 @@ -451,21 +471,29 @@ function TreeSetColoringResult( if in_triangle(i, j, :L) # uplo = :L or uplo = :F # S[i,j] is stored at index_ij = (S.colptr[j+1] - offset_L) in S.nzval - lower_triangle_offsets[index_offsets] = length(col_j) - searchsortedfirst(col_j, i) + 1 + if decompression_uplo != :U + lower_triangle_offsets[index_offsets] = length(col_j) - searchsortedfirst(col_j, i) + 1 + end # uplo = :U or uplo = :F # S[j,i] is stored at index_ji = (S.colptr[i] + offset_U) in S.nzval - upper_triangle_offsets[index_offsets] = searchsortedfirst(col_i, j)::Int - 1 + if decompression_uplo != :L + upper_triangle_offsets[index_offsets] = searchsortedfirst(col_i, j)::Int - 1 + end # S[i,j] is in the upper triangular part of S else # uplo = :U or uplo = :F # S[i,j] is stored at index_ij = (S.colptr[j] + offset_U) in S.nzval - upper_triangle_offsets[index_offsets] = searchsortedfirst(col_j, i)::Int - 1 + if decompression_uplo != :L + upper_triangle_offsets[index_offsets] = searchsortedfirst(col_j, i)::Int - 1 + end # uplo = :L or uplo = :F # S[j,i] is stored at index_ji = (S.colptr[i+1] - offset_L) in S.nzval - lower_triangle_offsets[index_offsets] = length(col_i) - searchsortedfirst(col_i, j) + 1 + if decompression_uplo != :U + lower_triangle_offsets[index_offsets] = length(col_i) - searchsortedfirst(col_i, j) + 1 + end end #! format: on end @@ -488,6 +516,7 @@ function TreeSetColoringResult( lower_triangle_offsets, upper_triangle_offsets, buffer, + decompression_uplo, ) end