Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions benchmarks/matrix_benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -227,3 +227,68 @@ function benchmark_sparse_dense_add!(

return nothing
end

"""
benchmark_kron!(SUITE, array_constructor, array_type_name; N=100, T=Float64)

Benchmark Kronecker product (kron) for CSC, CSR, and COO formats.

# Arguments
- `SUITE`: The BenchmarkGroup to add benchmarks to
- `array_constructor`: Function to construct arrays (e.g., `Array`, `JLArray`)
- `array_type_name`: String name for the array type (for display)

# Keyword Arguments
- `N`: Size of the matrices (default: 100)
- `T`: Element type (default: Float64)
"""
function benchmark_kron!(
SUITE,
array_constructor,
array_type_name;
N = 100,
T = Float64,
)
# Create sparse matrices with 1% density (smaller matrices since kron grows quadratically)
sm_a_std = sprand(T, N, N, 0.01)
sm_b_std = sprand(T, N, N, 0.01)

# Convert to different formats
sm_a_csc = DeviceSparseMatrixCSC(sm_a_std)
sm_b_csc = DeviceSparseMatrixCSC(sm_b_std)

sm_a_csr = DeviceSparseMatrixCSR(sm_a_std)
sm_b_csr = DeviceSparseMatrixCSR(sm_b_std)

sm_a_coo = DeviceSparseMatrixCOO(sm_a_std)
sm_b_coo = DeviceSparseMatrixCOO(sm_b_std)

# Adapt to device
dsm_a_csc = adapt(array_constructor, sm_a_csc)
dsm_b_csc = adapt(array_constructor, sm_b_csc)

dsm_a_csr = adapt(array_constructor, sm_a_csr)
dsm_b_csr = adapt(array_constructor, sm_b_csr)

dsm_a_coo = adapt(array_constructor, sm_a_coo)
dsm_b_coo = adapt(array_constructor, sm_b_coo)

# Level 3: Format (CSC, CSR, COO - will be plotted together)
SUITE["Kronecker Product"][array_type_name]["CSC"] = @benchmarkable begin
kron($dsm_a_csc, $dsm_b_csc)
_synchronize_backend($dsm_a_csc)
end

SUITE["Kronecker Product"][array_type_name]["CSR"] = @benchmarkable begin
kron($dsm_a_csr, $dsm_b_csr)
_synchronize_backend($dsm_a_csr)
end

SUITE["Kronecker Product"][array_type_name]["COO"] = @benchmarkable begin
kron($dsm_a_coo, $dsm_b_coo)
_synchronize_backend($dsm_a_coo)
end

return nothing
end

16 changes: 9 additions & 7 deletions benchmarks/runbenchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,19 @@ benchmark_matrix_vector_mul!(SUITE, Array, "Array")
benchmark_matrix_matrix_mul!(SUITE, Array, "Array")
benchmark_three_arg_dot!(SUITE, Array, "Array")
benchmark_sparse_dense_add!(SUITE, Array, "Array")
benchmark_kron!(SUITE, Array, "Array")
benchmark_conversions!(SUITE, Array, "Array")

# Run benchmarks for JLArrays
println("Running benchmarks for JLArrays...")
benchmark_vector_sum!(SUITE, jl, "JLArray")
benchmark_vector_sparse_dense_dot!(SUITE, jl, "JLArray")
benchmark_matrix_vector_mul!(SUITE, jl, "JLArray")
benchmark_matrix_matrix_mul!(SUITE, jl, "JLArray")
benchmark_three_arg_dot!(SUITE, jl, "JLArray")
benchmark_sparse_dense_add!(SUITE, jl, "JLArray")
benchmark_conversions!(SUITE, jl, "JLArray")
benchmark_vector_sum!(SUITE, JLArray, "JLArray")
benchmark_vector_sparse_dense_dot!(SUITE, JLArray, "JLArray")
benchmark_matrix_vector_mul!(SUITE, JLArray, "JLArray")
benchmark_matrix_matrix_mul!(SUITE, JLArray, "JLArray")
benchmark_three_arg_dot!(SUITE, JLArray, "JLArray")
benchmark_sparse_dense_add!(SUITE, JLArray, "JLArray")
benchmark_kron!(SUITE, JLArray, "JLArray")
benchmark_conversions!(SUITE, JLArray, "JLArray")

# Tune and run benchmarks
println("\nTuning benchmarks...")
Expand Down
2 changes: 1 addition & 1 deletion src/DeviceSparseArrays.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
module DeviceSparseArrays

using LinearAlgebra
import LinearAlgebra: wrap, copymutable_oftype, __normalize!
import LinearAlgebra: wrap, copymutable_oftype, __normalize!, kron
using SparseArrays
import SparseArrays: SparseVector, SparseMatrixCSC
import SparseArrays: getcolptr, getrowval, getnzval, nonzeroinds
Expand Down
8 changes: 4 additions & 4 deletions src/conversions/conversion_kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,21 @@ end
keys,
@Const(rowind),
@Const(colind),
@Const(n),
@Const(m), # Number of rows - needed for proper column-major lexicographic ordering
)
i = @index(Global)
keys[i] = colind[i] * n + rowind[i]
keys[i] = colind[i] * m + rowind[i]
end

# Kernel for creating sort keys for COO → CSR conversion
@kernel inbounds=true function kernel_make_csr_keys!(
keys,
@Const(rowind),
@Const(colind),
@Const(m),
@Const(n),
)
i = @index(Global)
keys[i] = rowind[i] * m + colind[i]
keys[i] = rowind[i] * n + colind[i]
end

# Kernel for counting entries per column (for COO → CSC)
Expand Down
86 changes: 2 additions & 84 deletions src/conversions/conversions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,6 @@
# CSC ↔ COO Conversions
# ============================================================================

"""
DeviceSparseMatrixCOO(A::DeviceSparseMatrixCSC)

Convert a Compressed Sparse Column (CSC) matrix to Coordinate (COO) format.

The conversion preserves all matrix data and maintains backend compatibility.
The result will be on the same backend (CPU/GPU) as the input matrix.

# Examples
```julia
using DeviceSparseArrays, SparseArrays

# Create a CSC matrix
A_sparse = sparse([1, 2, 3], [1, 2, 3], [1.0, 2.0, 3.0], 3, 3)
A_csc = DeviceSparseMatrixCSC(A_sparse)

# Convert to COO format
A_coo = DeviceSparseMatrixCOO(A_csc)
```
"""
function DeviceSparseMatrixCOO(A::DeviceSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
m, n = size(A)
nnz_count = nnz(A)
Expand All @@ -43,27 +23,6 @@ function DeviceSparseMatrixCOO(A::DeviceSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
return DeviceSparseMatrixCOO(m, n, rowind, colind, nzval)
end

"""
DeviceSparseMatrixCSC(A::DeviceSparseMatrixCOO)

Convert a Coordinate (COO) matrix to Compressed Sparse Column (CSC) format.

The conversion sorts the COO entries by column (then by row within each column)
and builds the column pointer structure. The result maintains backend compatibility
with the input matrix.

# Examples
```julia
using DeviceSparseArrays, SparseArrays

# Create a COO matrix
A_sparse = sparse([1, 2, 3], [1, 2, 3], [1.0, 2.0, 3.0], 3, 3)
A_coo = DeviceSparseMatrixCOO(A_sparse)

# Convert to CSC format
A_csc = DeviceSparseMatrixCSC(A_coo)
```
"""
function DeviceSparseMatrixCSC(A::DeviceSparseMatrixCOO{Tv,Ti}) where {Tv,Ti}
m, n = size(A)
nnz_count = nnz(A)
Expand All @@ -75,7 +34,7 @@ function DeviceSparseMatrixCSC(A::DeviceSparseMatrixCOO{Tv,Ti}) where {Tv,Ti}

# Create keys on device
kernel! = kernel_make_csc_keys!(backend)
kernel!(keys, A.rowind, A.colind, n; ndrange = (nnz_count,))
kernel!(keys, A.rowind, A.colind, m; ndrange = (nnz_count,))

perm = AcceleratedKernels.sortperm(keys)

Expand Down Expand Up @@ -103,26 +62,6 @@ end
# CSR ↔ COO Conversions
# ============================================================================

"""
DeviceSparseMatrixCOO(A::DeviceSparseMatrixCSR)

Convert a Compressed Sparse Row (CSR) matrix to Coordinate (COO) format.

The conversion preserves all matrix data and maintains backend compatibility.
The result will be on the same backend (CPU/GPU) as the input matrix.

# Examples
```julia
using DeviceSparseArrays, SparseArrays

# Create a CSR matrix
A_sparse = sparse([1, 2, 3], [1, 2, 3], [1.0, 2.0, 3.0], 3, 3)
A_csr = DeviceSparseMatrixCSR(A_sparse)

# Convert to COO format
A_coo = DeviceSparseMatrixCOO(A_csr)
```
"""
function DeviceSparseMatrixCOO(A::DeviceSparseMatrixCSR{Tv,Ti}) where {Tv,Ti}
m, n = size(A)
nnz_count = nnz(A)
Expand All @@ -141,27 +80,6 @@ function DeviceSparseMatrixCOO(A::DeviceSparseMatrixCSR{Tv,Ti}) where {Tv,Ti}
return DeviceSparseMatrixCOO(m, n, rowind, colind, nzval)
end

"""
DeviceSparseMatrixCSR(A::DeviceSparseMatrixCOO)

Convert a Coordinate (COO) matrix to Compressed Sparse Row (CSR) format.

The conversion sorts the COO entries by row (then by column within each row)
and builds the row pointer structure. The result maintains backend compatibility
with the input matrix.

# Examples
```julia
using DeviceSparseArrays, SparseArrays

# Create a COO matrix
A_sparse = sparse([1, 2, 3], [1, 2, 3], [1.0, 2.0, 3.0], 3, 3)
A_coo = DeviceSparseMatrixCOO(A_sparse)

# Convert to CSR format
A_csr = DeviceSparseMatrixCSR(A_coo)
```
"""
function DeviceSparseMatrixCSR(A::DeviceSparseMatrixCOO{Tv,Ti}) where {Tv,Ti}
m, n = size(A)
nnz_count = nnz(A)
Expand All @@ -173,7 +91,7 @@ function DeviceSparseMatrixCSR(A::DeviceSparseMatrixCOO{Tv,Ti}) where {Tv,Ti}

# Create keys on device
kernel! = kernel_make_csr_keys!(backend)
kernel!(keys, A.rowind, A.colind, m; ndrange = (nnz_count,))
kernel!(keys, A.rowind, A.colind, n; ndrange = (nnz_count,))

# Sort - use AcceleratedKernels
perm = AcceleratedKernels.sortperm(keys)
Expand Down
69 changes: 69 additions & 0 deletions src/matrix_coo/matrix_coo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,72 @@ function _add_sparse_to_dense!(C::DenseMatrix, A::DeviceSparseMatrixCOO)

return C
end

"""
kron(A::DeviceSparseMatrixCOO, B::DeviceSparseMatrixCOO)

Compute the Kronecker product of two sparse matrices in COO format.

The Kronecker product of two matrices `A` (size m×n) and `B` (size p×q)
is an (m*p)×(n*q) matrix formed by multiplying each element of `A` by the
entire matrix `B`.

# Examples
```jldoctest
julia> using DeviceSparseArrays, SparseArrays

julia> A = DeviceSparseMatrixCOO(sparse([1, 2], [1, 2], [1.0, 2.0], 2, 2));

julia> B = DeviceSparseMatrixCOO(sparse([1, 2], [1, 2], [3.0, 4.0], 2, 2));

julia> C = kron(A, B);

julia> size(C)
(4, 4)

julia> nnz(C)
4
```
"""
function LinearAlgebra.kron(
A::DeviceSparseMatrixCOO{Tv1,Ti1},
B::DeviceSparseMatrixCOO{Tv2,Ti2},
) where {Tv1,Ti1,Tv2,Ti2}
# Result dimensions
m_C = size(A, 1) * size(B, 1)
n_C = size(A, 2) * size(B, 2)
nnz_C = nnz(A) * nnz(B)

# Determine result types
Tv = promote_type(Tv1, Tv2)
Ti = promote_type(Ti1, Ti2)

# Check backend compatibility
backend_A = get_backend(A)
backend_B = get_backend(B)
backend_A == backend_B || throw(ArgumentError("Both arrays must have the same backend"))

# Allocate output arrays
rowind_C = similar(A.rowind, Ti, nnz_C)
colind_C = similar(A.colind, Ti, nnz_C)
nzval_C = similar(A.nzval, Tv, nnz_C)

# Launch kernel
kernel! = kernel_kron_coo!(backend_A)
kernel!(
A.rowind,
A.colind,
A.nzval,
B.rowind,
B.colind,
B.nzval,
rowind_C,
colind_C,
nzval_C,
size(B, 1),
size(B, 2);
ndrange = nnz_C,
)

return DeviceSparseMatrixCOO(m_C, n_C, rowind_C, colind_C, nzval_C)
end
44 changes: 43 additions & 1 deletion src/matrix_coo/matrix_coo_kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -137,5 +137,47 @@ end
)
i = @index(Global)

@inbounds C[rowind[i], colind[i]] += nzval[i]
C[rowind[i], colind[i]] += nzval[i]
end

# Kernel for computing Kronecker product in COO format
@kernel inbounds=true function kernel_kron_coo!(
@Const(rowind_A),
@Const(colind_A),
@Const(nzval_A),
@Const(rowind_B),
@Const(colind_B),
@Const(nzval_B),
rowind_C,
colind_C,
nzval_C,
@Const(m_B::Int),
@Const(n_B::Int),
)
idx = @index(Global, Linear)

nnz_A = length(nzval_A)
nnz_B = length(nzval_B)

if idx <= nnz_A * nnz_B
# Compute which element from A and B we're combining
idx_A = div(idx - 1, nnz_B) + 1
idx_B = mod(idx - 1, nnz_B) + 1

# Get the row and column indices from A and B
i_A = rowind_A[idx_A]
j_A = colind_A[idx_A]
val_A = nzval_A[idx_A]

i_B = rowind_B[idx_B]
j_B = colind_B[idx_B]
val_B = nzval_B[idx_B]

# Compute the row and column indices in C
# C[i,j] = A[i_A, j_A] * B[i_B, j_B]
# where i = (i_A - 1) * m_B + i_B and j = (j_A - 1) * n_B + j_B
rowind_C[idx] = (i_A - 1) * m_B + i_B
colind_C[idx] = (j_A - 1) * n_B + j_B
nzval_C[idx] = val_A * val_B
end
end
Loading