Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
ScopedValues = "7e506255-f358-4e82-b7e4-beb19740aa63"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[weakdeps]
Expand All @@ -33,5 +34,6 @@ Random = "1"
Reexport = "1"
ScopedValues = "1"
Serialization = "1"
SparseArrays = "1"
Statistics = "1"
julia = "1.10"
58 changes: 58 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,61 @@ This package is the counterpart of Julia's `AbstractArray` interface, but for GP
types: It provides functionality and tooling to speed-up development of new GPU array types.
**This package is not intended for end users!** Instead, you should use one of the packages
that builds on GPUArrays.jl, such as [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl), [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl), [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl), or [Metal.jl](https://github.com/JuliaGPU/Metal.jl).

## Interface methods

To support a new GPU backend, you will need to implement various interface methods for your backend's array types.
Some (CPU based) examples can be see in the testing library `JLArrays` (located in the `lib` directory of this package).

### Dense array support

### Sparse array support (optional)

`GPUArrays.jl` provides **device-side** array types for `CSC`, `CSR`, `COO`, and `BSR` matrices, as well as sparse vectors.
It also provides abstract types for these layouts that you can create concrete child types of in order to benefit from the
backend-agnostic wrappers. In particular, `GPUArrays.jl` provides out-of-the-box support for broadcasting and `mapreduce` over
GPU sparse arrays.

For **host-side** types, your custom sparse types should implement:

- `dense_array_type` - the corresponding dense array type. For example, for a `CuSparseVector` or `CuSparseMatrixCXX`, the `dense_array_type` is `CuArray`
- `sparse_array_type` - the **untyped** sparse array type corresponding to a given parametrized type. A `CuSparseVector{Tv, Ti}` would have a `sparse_array_type` of `CuVector` -- note the lack of type parameters!
- `csc_type(::Type{T})` - the compressed sparse column type for your backend. A `CuSparseMatrixCSR` would have a `csc_type` of `CuSparseMatrixCSC`.
- `csr_type(::Type{T})` - the compressed sparse row type for your backend. A `CuSparseMatrixCSC` would have a `csr_type` of `CuSparseMatrixCSR`.
- `coo_type(::Type{T})` - the coordinate sparse matrix type for your backend. A `CuSparseMatrixCSC` would have a `coo_type` of `CuSparseMatrixCOO`.

Additionally, you need to teach `GPUArrays.jl` how to translate your backend's specific types onto the device. `GPUArrays.jl` provides the device-side types:

- `GPUSparseDeviceVector`
- `GPUSparseDeviceMatrixCSC`
- `GPUSparseDeviceMatrixCSR`
- `GPUSparseDeviceMatrixBSR`
- `GPUSparseDeviceMatrixCOO`

You will need to create a method of `Adapt.adapt_structure` for each format your backend supports. **Note** that if your backend supports separate address spaces,
as CUDA and ROCm do, you need to provide a parameter to these device-side arrays to indicate in which address space the underlying pointers live. An example of adapting
an array to the device-side struct:

```julia
function GPUArrays.GPUSparseDeviceVector(iPtr::MyDeviceVector{Ti, A},
nzVal::MyDeviceVector{Tv, A},
len::Int,
nnz::Ti) where {Ti, Tv, A}
GPUArrays.GPUSparseDeviceVector{Tv, Ti, MyDeviceVector{Ti, A}, MyDeviceVector{Tv, A}, A}(iPtr, nzVal, len, nnz)
end

function Adapt.adapt_structure(to::MyAdaptor, x::MySparseVector)
return GPUArrays.GPUSparseDeviceVector(
adapt(to, x.iPtr),
adapt(to, x.nzVal),
length(x), x.nnz
)
end
```

You'll also need to inform `GPUArrays.jl` and `GPUCompiler.jl` how to adapt your sparse arrays by extending `KernelAbstractions.jl`'s `get_backend()`:

```julia
KA.get_backend(::MySparseVector) = MyBackend()
```
```
4 changes: 4 additions & 0 deletions lib/JLArrays/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,15 @@ version = "0.3.0"
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[compat]
Adapt = "2.0, 3.0, 4.0"
GPUArrays = "11.1"
KernelAbstractions = "0.9, 0.10"
LinearAlgebra = "1"
Random = "1"
SparseArrays = "1"
julia = "1.8"
225 changes: 224 additions & 1 deletion lib/JLArrays/src/JLArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@

module JLArrays

export JLArray, JLVector, JLMatrix, jl, JLBackend
export JLArray, JLVector, JLMatrix, jl, JLBackend, JLSparseVector, JLSparseMatrixCSC, JLSparseMatrixCSR

using GPUArrays

using Adapt
using SparseArrays, LinearAlgebra

import GPUArrays: dense_array_type

import KernelAbstractions
import KernelAbstractions: Adapt, StaticArrays, Backend, Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config
Expand All @@ -19,6 +22,11 @@ import KernelAbstractions: Adapt, StaticArrays, Backend, Kernel, StaticSize, Dyn
import KernelAbstractions: POCL
end

module AS

const Generic = 0

end

#
# Device functionality
Expand Down Expand Up @@ -115,7 +123,141 @@ mutable struct JLArray{T, N} <: AbstractGPUArray{T, N}
end
end

mutable struct JLSparseVector{Tv, Ti} <: GPUArrays.AbstractGPUSparseVector{Tv, Ti}
iPtr::JLArray{Ti, 1}
nzVal::JLArray{Tv, 1}
len::Int
nnz::Ti

function JLSparseVector{Tv, Ti}(iPtr::JLArray{<:Integer, 1}, nzVal::JLArray{Tv, 1},
len::Integer) where {Tv, Ti <: Integer}
new{Tv, Ti}(iPtr, nzVal, len, length(nzVal))
end
end
SparseArrays.nnz(x::JLSparseVector) = x.nnz
SparseArrays.nonzeroinds(x::JLSparseVector) = x.iPtr
SparseArrays.nonzeros(x::JLSparseVector) = x.nzVal

mutable struct JLSparseMatrixCSC{Tv, Ti} <: GPUArrays.AbstractGPUSparseMatrixCSC{Tv, Ti}
colPtr::JLArray{Ti, 1}
rowVal::JLArray{Ti, 1}
nzVal::JLArray{Tv, 1}
dims::NTuple{2,Int}
nnz::Ti

function JLSparseMatrixCSC{Tv, Ti}(colPtr::JLArray{<:Integer, 1}, rowVal::JLArray{<:Integer, 1},
nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
new{Tv, Ti}(colPtr, rowVal, nzVal, dims, length(nzVal))
end
end
function JLSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
return JLSparseMatrixCSC{Tv, Ti}(colPtr, rowVal, nzVal, dims)
end
SparseArrays.SparseMatrixCSC(x::JLSparseMatrixCSC) = SparseMatrixCSC(size(x)..., Array(x.colPtr), Array(x.rowVal), Array(x.nzVal))

JLSparseMatrixCSC(A::JLSparseMatrixCSC) = A

function Base.getindex(A::JLSparseMatrixCSC{Tv, Ti}, i::Integer, j::Integer) where {Tv, Ti}
@boundscheck checkbounds(A, i, j)
r1 = Int(@inbounds A.colPtr[j])
r2 = Int(@inbounds A.colPtr[j+1]-1)
(r1 > r2) && return zero(Tv)
r1 = searchsortedfirst(view(A.rowVal, r1:r2), i) + r1 - 1
((r1 > r2) || (A.rowVal[r1] != i)) ? zero(Tv) : A.nzVal[r1]
end

mutable struct JLSparseMatrixCSR{Tv, Ti} <: GPUArrays.AbstractGPUSparseMatrixCSR{Tv, Ti}
rowPtr::JLArray{Ti, 1}
colVal::JLArray{Ti, 1}
nzVal::JLArray{Tv, 1}
dims::NTuple{2,Int}
nnz::Ti

function JLSparseMatrixCSR{Tv, Ti}(rowPtr::JLArray{<:Integer, 1}, colVal::JLArray{<:Integer, 1},
nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti<:Integer}
new{Tv, Ti}(rowPtr, colVal, nzVal, dims, length(nzVal))
end
end
function JLSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
return JLSparseMatrixCSR{Tv, Ti}(rowPtr, colVal, nzVal, dims)
end
function SparseArrays.SparseMatrixCSC(x::JLSparseMatrixCSR)
x_transpose = SparseMatrixCSC(size(x, 2), size(x, 1), Array(x.rowPtr), Array(x.colVal), Array(x.nzVal))
return SparseMatrixCSC(transpose(x_transpose))
end

JLSparseMatrixCSC(Mat::Union{Transpose{Tv, <:SparseMatrixCSC}, Adjoint{Tv, <:SparseMatrixCSC}}) where {Tv} = JLSparseMatrixCSC(JLSparseMatrixCSR(Mat))

function Base.size(g::JLSparseMatrixCSR, d::Integer)
if 1 <= d <= 2
return g.dims[d]
elseif d > 1
return 1
else
throw(ArgumentError("dimension must be ≥ 1, got $d"))
end
end

JLSparseMatrixCSR(Mat::Transpose{Tv, <:SparseMatrixCSC}) where {Tv} =
JLSparseMatrixCSR(JLVector{Cint}(parent(Mat).colptr), JLVector{Cint}(parent(Mat).rowval),
JLVector(parent(Mat).nzval), size(Mat))
JLSparseMatrixCSR(Mat::Adjoint{Tv, <:SparseMatrixCSC}) where {Tv} =
JLSparseMatrixCSR(JLVector{Cint}(parent(Mat).colptr), JLVector{Cint}(parent(Mat).rowval),
JLVector(conj.(parent(Mat).nzval)), size(Mat))

JLSparseMatrixCSR(A::JLSparseMatrixCSR) = A

function Base.getindex(A::JLSparseMatrixCSR{Tv, Ti}, i0::Integer, i1::Integer) where {Tv, Ti}
@boundscheck checkbounds(A, i0, i1)
c1 = Int(A.rowPtr[i0])
c2 = Int(A.rowPtr[i0+1]-1)
(c1 > c2) && return zero(Tv)
c1 = searchsortedfirst(A.colVal, i1, c1, c2, Base.Order.Forward)
(c1 > c2 || A.colVal[c1] != i1) && return zero(Tv)
nonzeros(A)[c1]
end

GPUArrays.storage(a::JLArray) = a.data
GPUArrays.dense_array_type(a::JLArray{T, N}) where {T, N} = JLArray{T, N}
GPUArrays.dense_array_type(::Type{JLArray{T, N}}) where {T, N} = JLArray{T, N}
GPUArrays.dense_vector_type(a::JLArray{T, N}) where {T, N} = JLArray{T, 1}
GPUArrays.dense_vector_type(::Type{JLArray{T, N}}) where {T, N} = JLArray{T, 1}

GPUArrays.sparse_array_type(sa::JLSparseMatrixCSC) = JLSparseMatrixCSC
GPUArrays.sparse_array_type(::Type{<:JLSparseMatrixCSC}) = JLSparseMatrixCSC
GPUArrays.sparse_array_type(sa::JLSparseMatrixCSR) = JLSparseMatrixCSR
GPUArrays.sparse_array_type(::Type{<:JLSparseMatrixCSR}) = JLSparseMatrixCSR
GPUArrays.sparse_array_type(sa::JLSparseVector) = JLSparseVector
GPUArrays.sparse_array_type(::Type{<:JLSparseVector}) = JLSparseVector

GPUArrays.dense_array_type(sa::JLSparseVector) = JLArray
GPUArrays.dense_array_type(::Type{<:JLSparseVector}) = JLArray
GPUArrays.dense_array_type(sa::JLSparseMatrixCSC) = JLArray
GPUArrays.dense_array_type(::Type{<:JLSparseMatrixCSC}) = JLArray
GPUArrays.dense_array_type(sa::JLSparseMatrixCSR) = JLArray
GPUArrays.dense_array_type(::Type{<:JLSparseMatrixCSR}) = JLArray

GPUArrays.csc_type(sa::JLSparseMatrixCSR) = JLSparseMatrixCSC
GPUArrays.csr_type(sa::JLSparseMatrixCSC) = JLSparseMatrixCSR

Base.similar(Mat::JLSparseMatrixCSR) = JLSparseMatrixCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat)), size(Mat))
Base.similar(Mat::JLSparseMatrixCSR, T::Type) = JLSparseMatrixCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat), T), size(Mat))

Base.similar(Mat::JLSparseMatrixCSC, T::Type, N::Int, M::Int) = JLSparseMatrixCSC(JLVector([zero(Int32)]), JLVector{Int32}(undef, 0), JLVector{T}(undef, 0), (N, M))
Base.similar(Mat::JLSparseMatrixCSR, T::Type, N::Int, M::Int) = JLSparseMatrixCSR(JLVector([zero(Int32)]), JLVector{Int32}(undef, 0), JLVector{T}(undef, 0), (N, M))

Base.similar(Mat::JLSparseMatrixCSC{Tv, Ti}, N::Int, M::Int) where {Tv, Ti} = similar(Mat, Tv, N, M)
Base.similar(Mat::JLSparseMatrixCSR{Tv, Ti}, N::Int, M::Int) where {Tv, Ti} = similar(Mat, Tv, N, M)

Base.similar(Mat::JLSparseMatrixCSC, T::Type, dims::Tuple{Int, Int}) = similar(Mat, T, dims...)
Base.similar(Mat::JLSparseMatrixCSR, T::Type, dims::Tuple{Int, Int}) = similar(Mat, T, dims...)

Base.similar(Mat::JLSparseMatrixCSC, dims::Tuple{Int, Int}) = similar(Mat, dims...)
Base.similar(Mat::JLSparseMatrixCSR, dims::Tuple{Int, Int}) = similar(Mat, dims...)

JLArray(x::JLSparseVector) = JLArray(collect(SparseVector(x)))
JLArray(x::JLSparseMatrixCSC) = JLArray(collect(SparseMatrixCSC(x)))
JLArray(x::JLSparseMatrixCSR) = JLArray(collect(SparseMatrixCSC(x)))

# conversion of untyped data to a typed Array
function typed_data(x::JLArray{T}) where {T}
Expand Down Expand Up @@ -217,6 +359,79 @@ JLArray{T}(xs::AbstractArray{S,N}) where {T,N,S} = JLArray{T,N}(xs)
(::Type{JLArray{T,N} where T})(x::AbstractArray{S,N}) where {S,N} = JLArray{S,N}(x)
JLArray(A::AbstractArray{T,N}) where {T,N} = JLArray{T,N}(A)

function JLSparseVector(xs::SparseVector{Tv, Ti}) where {Ti, Tv}
iPtr = JLVector{Ti}(undef, length(xs.nzind))
nzVal = JLVector{Tv}(undef, length(xs.nzval))
copyto!(iPtr, convert(Vector{Ti}, xs.nzind))
copyto!(nzVal, convert(Vector{Tv}, xs.nzval))
return JLSparseVector{Tv, Ti}(iPtr, nzVal, length(xs),)
end
Base.length(x::JLSparseVector) = x.len
Base.size(x::JLSparseVector) = (x.len,)

function JLSparseMatrixCSC(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv}
colPtr = JLVector{Ti}(undef, length(xs.colptr))
rowVal = JLVector{Ti}(undef, length(xs.rowval))
nzVal = JLVector{Tv}(undef, length(xs.nzval))
copyto!(colPtr, convert(Vector{Ti}, xs.colptr))
copyto!(rowVal, convert(Vector{Ti}, xs.rowval))
copyto!(nzVal, convert(Vector{Tv}, xs.nzval))
return JLSparseMatrixCSC{Tv, Ti}(colPtr, rowVal, nzVal, (xs.m, xs.n))
end
JLSparseMatrixCSC(xs::SparseVector) = JLSparseMatrixCSC(SparseMatrixCSC(xs))
Base.length(x::JLSparseMatrixCSC) = prod(x.dims)
Base.size(x::JLSparseMatrixCSC) = x.dims

function JLSparseMatrixCSR(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv}
csr_xs = SparseMatrixCSC(transpose(xs))
rowPtr = JLVector{Ti}(undef, length(csr_xs.colptr))
colVal = JLVector{Ti}(undef, length(csr_xs.rowval))
nzVal = JLVector{Tv}(undef, length(csr_xs.nzval))
copyto!(rowPtr, convert(Vector{Ti}, csr_xs.colptr))
copyto!(colVal, convert(Vector{Ti}, csr_xs.rowval))
copyto!(nzVal, convert(Vector{Tv}, csr_xs.nzval))
return JLSparseMatrixCSR{Tv, Ti}(rowPtr, colVal, nzVal, (xs.m, xs.n))
end
JLSparseMatrixCSR(xs::SparseVector{Tv, Ti}) where {Ti, Tv} = JLSparseMatrixCSR(SparseMatrixCSC(xs))
function JLSparseMatrixCSR(xs::JLSparseMatrixCSC{Tv, Ti}) where {Ti, Tv}
return JLSparseMatrixCSR(SparseMatrixCSC(xs))
end
function JLSparseMatrixCSC(xs::JLSparseMatrixCSR{Tv, Ti}) where {Ti, Tv}
return JLSparseMatrixCSC(SparseMatrixCSC(xs))
end
function Base.copyto!(dst::JLSparseMatrixCSR, src::JLSparseMatrixCSR)
if size(dst) != size(src)
throw(ArgumentError("Inconsistent Sparse Matrix size"))
end
resize!(dst.rowPtr, length(src.rowPtr))
resize!(dst.colVal, length(src.colVal))
resize!(SparseArrays.nonzeros(dst), length(SparseArrays.nonzeros(src)))
copyto!(dst.rowPtr, src.rowPtr)
copyto!(dst.colVal, src.colVal)
copyto!(SparseArrays.nonzeros(dst), SparseArrays.nonzeros(src))
dst.nnz = src.nnz
dst
end
Base.length(x::JLSparseMatrixCSR) = prod(x.dims)
Base.size(x::JLSparseMatrixCSR) = x.dims

function GPUArrays._spadjoint(A::JLSparseMatrixCSR)
Aᴴ = JLSparseMatrixCSC(A.rowPtr, A.colVal, conj(A.nzVal), reverse(size(A)))
JLSparseMatrixCSR(Aᴴ)
end
function GPUArrays._sptranspose(A::JLSparseMatrixCSR)
Aᵀ = JLSparseMatrixCSC(A.rowPtr, A.colVal, A.nzVal, reverse(size(A)))
JLSparseMatrixCSR(Aᵀ)
end
function _spadjoint(A::JLSparseMatrixCSC)
Aᴴ = JLSparseMatrixCSR(A.colPtr, A.rowVal, conj(A.nzVal), reverse(size(A)))
JLSparseMatrixCSC(Aᴴ)
end
function _sptranspose(A::JLSparseMatrixCSC)
Aᵀ = JLSparseMatrixCSR(A.colPtr, A.rowVal, A.nzVal, reverse(size(A)))
JLSparseMatrixCSC(Aᵀ)
end

# idempotency
JLArray{T,N}(xs::JLArray{T,N}) where {T,N} = xs

Expand Down Expand Up @@ -358,9 +573,17 @@ function GPUArrays.mapreducedim!(f, op, R::AnyJLArray, A::Union{AbstractArray,Br
R
end

Adapt.adapt_structure(to::Adaptor, x::JLSparseMatrixCSC{Tv,Ti}) where {Tv,Ti} =
GPUSparseDeviceMatrixCSC{Tv,Ti,JLDeviceArray{Ti, 1}, JLDeviceArray{Tv, 1}, AS.Generic}(adapt(to, x.colPtr), adapt(to, x.rowVal), adapt(to, x.nzVal), x.dims, x.nnz)
Adapt.adapt_structure(to::Adaptor, x::JLSparseMatrixCSR{Tv,Ti}) where {Tv,Ti} =
GPUSparseDeviceMatrixCSR{Tv,Ti,JLDeviceArray{Ti, 1}, JLDeviceArray{Tv, 1}, AS.Generic}(adapt(to, x.rowPtr), adapt(to, x.colVal), adapt(to, x.nzVal), x.dims, x.nnz)
Adapt.adapt_structure(to::Adaptor, x::JLSparseVector{Tv,Ti}) where {Tv,Ti} =
GPUSparseDeviceVector{Tv,Ti,JLDeviceArray{Ti, 1}, JLDeviceArray{Tv, 1}, AS.Generic}(adapt(to, x.iPtr), adapt(to, x.nzVal), x.len, x.nnz)

## KernelAbstractions interface

KernelAbstractions.get_backend(a::JLA) where JLA <: JLArray = JLBackend()
KernelAbstractions.get_backend(a::JLA) where JLA <: Union{JLSparseMatrixCSC, JLSparseMatrixCSR, JLSparseVector} = JLBackend()

function KernelAbstractions.mkcontext(kernel::Kernel{JLBackend}, I, _ndrange, iterspace, ::Dynamic) where Dynamic
return KernelAbstractions.CompilerMetadata{KernelAbstractions.ndrange(kernel), Dynamic}(I, _ndrange, iterspace)
Expand Down
2 changes: 2 additions & 0 deletions src/GPUArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ using KernelAbstractions

# device functionality
include("device/abstractarray.jl")
include("device/sparse.jl")

# host abstractions
include("host/abstractarray.jl")
Expand All @@ -34,6 +35,7 @@ include("host/random.jl")
include("host/quirks.jl")
include("host/uniformscaling.jl")
include("host/statistics.jl")
include("host/sparse.jl")
include("host/alloc_cache.jl")


Expand Down
Loading
Loading