Skip to content

Commit e7fd923

Browse files
committed
Rename GPUArray to AbstractGPUArray.
1 parent 684b507 commit e7fd923

File tree

14 files changed

+223
-107
lines changed

14 files changed

+223
-107
lines changed

src/array.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
# reference implementation of the GPUArray interfaces
1+
# reference implementation of the GPUArrays interfaces
22

33
export JLArray
44

5-
struct JLArray{T, N} <: GPUArray{T, N}
5+
struct JLArray{T, N} <: AbstractGPUArray{T, N}
66
data::Array{T, N}
77
dims::Dims{N}
88

src/device/gpu.jl

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# gpu-specific functionality
2+
3+
export global_size, synchronize_threads
4+
5+
6+
## synchronization
7+
8+
"""
9+
synchronize_threads(state)
10+
11+
in CUDA terms `__synchronize`
12+
in OpenCL terms: `barrier(CLK_LOCAL_MEM_FENCE)`
13+
"""
14+
function synchronize_threads(state)
15+
error("Not implemented") # COV_EXCL_LINE
16+
end
17+
18+
19+
## device memory
20+
21+
const shmem_counter = Ref{Int}(0)
22+
23+
"""
24+
Creates a local static memory shared inside one block.
25+
Equivalent to `__local` of OpenCL or `__shared__ (<variable>)` of CUDA.
26+
"""
27+
macro LocalMemory(state, T, N)
28+
id = (shmem_counter[] += 1)
29+
quote
30+
lémem = LocalMemory($(esc(state)), $(esc(T)), Val($(esc(N))), Val($id))
31+
AbstractDeviceArray(lémem, $(esc(N)))
32+
end
33+
end
34+
35+
export @LocalMemory
36+
37+
"""
38+
Creates a block local array pointer with `T` being the element type
39+
and `N` the length. Both T and N need to be static! C is a counter for
40+
approriately get the correct Local mem id in CUDAnative.
41+
This is an internal method which needs to be overloaded by the GPU Array backends
42+
"""
43+
function LocalMemory(state, ::Type{T}, ::Val{N}, ::Val{C}) where {N, T, C}
44+
error("Not implemented") # COV_EXCL_LINE
45+
end

src/device/indexing.jl

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# indexing
2+
3+
export global_size, synchronize_threads, linear_index
4+
5+
6+
# thread indexing functions
7+
for sym in (:x, :y, :z)
8+
for f in (:blockidx, :blockdim, :threadidx, :griddim)
9+
fname = Symbol(string(f, '_', sym))
10+
@eval $fname(state)::Int = error("Not implemented") # COV_EXCL_LINE
11+
@eval export $fname
12+
end
13+
end
14+
15+
"""
16+
global_size(state)
17+
18+
Global size == blockdim * griddim == total number of kernel execution
19+
"""
20+
@inline function global_size(state)
21+
# TODO nd version
22+
griddim_x(state) * blockdim_x(state)
23+
end
24+
25+
"""
26+
linear_index(state)
27+
28+
linear index corresponding to each kernel launch (in OpenCL equal to get_global_id).
29+
30+
"""
31+
@inline function linear_index(state)
32+
(blockidx_x(state) - 1) * blockdim_x(state) + threadidx_x(state)
33+
end
34+
35+
"""
36+
linearidx(A, statesym = :state)
37+
38+
Macro form of `linear_index`, which calls return when out of bounds.
39+
So it can be used like this:
40+
41+
```julia
42+
function kernel(state, A)
43+
idx = @linear_index A state
44+
# from here on it's save to index into A with idx
45+
@inbounds begin
46+
A[idx] = ...
47+
end
48+
end
49+
```
50+
"""
51+
macro linearidx(A, statesym = :state)
52+
quote
53+
x1 = $(esc(A))
54+
i1 = linear_index($(esc(statesym)))
55+
i1 > length(x1) && return
56+
i1
57+
end
58+
end
59+
60+
"""
61+
cartesianidx(A, statesym = :state)
62+
63+
Like [`@linearidx(A, statesym = :state)`](@ref), but returns an N-dimensional `NTuple{ndim(A), Int}` as index
64+
"""
65+
macro cartesianidx(A, statesym = :state)
66+
quote
67+
x = $(esc(A))
68+
i2 = @linearidx(x, $(esc(statesym)))
69+
gpu_ind2sub(x, i2)
70+
end
71+
end

src/host/abstractarray.jl

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,27 @@
1-
# core definition of the GPUArray type
1+
# core definition of the AbstractGPUArray type
22

3-
export GPUArray
3+
export AbstractGPUArray
44

5-
abstract type GPUArray{T, N} <: DenseArray{T, N} end
5+
abstract type AbstractGPUArray{T, N} <: DenseArray{T, N} end
66

77
# Sampler type that acts like a texture/image and allows interpolated access
88
abstract type Sampler{T, N} <: DenseArray{T, N} end
99

10-
const GPUVector{T} = GPUArray{T, 1}
11-
const GPUMatrix{T} = GPUArray{T, 2}
12-
const GPUVecOrMat{T} = Union{GPUArray{T, 1}, GPUArray{T, 2}}
10+
const GPUVector{T} = AbstractGPUArray{T, 1}
11+
const GPUMatrix{T} = AbstractGPUArray{T, 2}
12+
const GPUVecOrMat{T} = Union{AbstractGPUArray{T, 1}, AbstractGPUArray{T, 2}}
1313

1414
# input/output
1515

1616
## serialization
1717

1818
import Serialization: AbstractSerializer, serialize, deserialize, serialize_type
1919

20-
function serialize(s::AbstractSerializer, t::T) where T <: GPUArray
20+
function serialize(s::AbstractSerializer, t::T) where T <: AbstractGPUArray
2121
serialize_type(s, T)
2222
serialize(s, Array(t))
2323
end
24-
function deserialize(s::AbstractSerializer, ::Type{T}) where T <: GPUArray
24+
function deserialize(s::AbstractSerializer, ::Type{T}) where T <: AbstractGPUArray
2525
A = deserialize(s)
2626
T(A)
2727
end
@@ -56,15 +56,15 @@ convert_to_cpu(xs) = adapt(Array, xs)
5656
for (W, ctor) in (:AT => (A,mut)->mut(A), Adapt.wrappers...)
5757
@eval begin
5858
# display
59-
Base.print_array(io::IO, X::$W where {AT <: GPUArray}) =
59+
Base.print_array(io::IO, X::$W where {AT <: AbstractGPUArray}) =
6060
Base.print_array(io, $ctor(X, convert_to_cpu))
6161

6262
# show
63-
Base._show_nonempty(io::IO, X::$W where {AT <: GPUArray}, prefix::String) =
63+
Base._show_nonempty(io::IO, X::$W where {AT <: AbstractGPUArray}, prefix::String) =
6464
Base._show_nonempty(io, $ctor(X, convert_to_cpu), prefix)
65-
Base._show_empty(io::IO, X::$W where {AT <: GPUArray}) =
65+
Base._show_empty(io::IO, X::$W where {AT <: AbstractGPUArray}) =
6666
Base._show_empty(io, $ctor(X, convert_to_cpu))
67-
Base.show_vector(io::IO, v::$W where {AT <: GPUArray}, args...) =
67+
Base.show_vector(io::IO, v::$W where {AT <: AbstractGPUArray}, args...) =
6868
Base.show_vector(io, $ctor(v, convert_to_cpu), args...)
6969
end
7070
end
@@ -75,7 +75,7 @@ collect_to_cpu(xs::AbstractArray) = collect(convert_to_cpu(xs))
7575

7676
for (W, ctor) in (:AT => (A,mut)->mut(A), Adapt.wrappers...)
7777
@eval begin
78-
Base.collect(X::$W where {AT <: GPUArray}) = collect_to_cpu(X)
78+
Base.collect(X::$W where {AT <: AbstractGPUArray}) = collect_to_cpu(X)
7979
end
8080
end
8181

@@ -86,18 +86,18 @@ end
8686

8787
# convert to something we can get a pointer to
8888
materialize(x::AbstractArray) = Array(x)
89-
materialize(x::GPUArray) = x
89+
materialize(x::AbstractGPUArray) = x
9090
materialize(x::Array) = x
9191

92-
# TODO: do we want to support `copyto(..., WrappedArray{GPUArray})`
92+
# TODO: do we want to support `copyto(..., WrappedArray{AbstractGPUArray})`
9393
# if so (does not work due to lack of copy constructors):
9494
#for (W, ctor) in (:AT => (A,mut)->mut(A), Adapt.wrappers...)
9595
# @eval begin
96-
# materialize(X::$W) where {AT <: GPUArray} = AT(X)
96+
# materialize(X::$W) where {AT <: AbstractGPUArray} = AT(X)
9797
# end
9898
#end
9999

100-
for (D, S) in ((GPUArray, AbstractArray), (Array, GPUArray), (GPUArray, GPUArray))
100+
for (D, S) in ((AbstractGPUArray, AbstractArray), (Array, AbstractGPUArray), (AbstractGPUArray, AbstractGPUArray))
101101
@eval begin
102102
function Base.copyto!(dest::$D{T, N}, rdest::NTuple{N, UnitRange},
103103
src::$S{T, N}, ssrc::NTuple{N, UnitRange}) where {T, N}
@@ -128,7 +128,7 @@ end
128128

129129
## generalized blocks of heterogeneous memory
130130

131-
Base.copyto!(dest::GPUArray, src::GPUArray) =
131+
Base.copyto!(dest::AbstractGPUArray, src::AbstractGPUArray) =
132132
copyto!(dest, CartesianIndices(dest), src, CartesianIndices(src))
133133

134134
function copy_kernel!(state, dest, dest_offsets, src, src_offsets, shape, shape_dest, shape_source, length)
@@ -143,8 +143,8 @@ function copy_kernel!(state, dest, dest_offsets, src, src_offsets, shape, shape_
143143
return
144144
end
145145

146-
function Base.copyto!(dest::GPUArray{T, N}, destcrange::CartesianIndices{N},
147-
src::GPUArray{U, N}, srccrange::CartesianIndices{N}) where {T, U, N}
146+
function Base.copyto!(dest::AbstractGPUArray{T, N}, destcrange::CartesianIndices{N},
147+
src::AbstractGPUArray{U, N}, srccrange::CartesianIndices{N}) where {T, U, N}
148148
shape = size(destcrange)
149149
if shape != size(srccrange)
150150
throw(DimensionMismatch("Ranges don't match their size. Found: $shape, $(size(srccrange))"))
@@ -159,7 +159,7 @@ function Base.copyto!(dest::GPUArray{T, N}, destcrange::CartesianIndices{N},
159159
dest
160160
end
161161

162-
function Base.copyto!(dest::GPUArray{T, N}, destcrange::CartesianIndices{N},
162+
function Base.copyto!(dest::AbstractGPUArray{T, N}, destcrange::CartesianIndices{N},
163163
src::AbstractArray{T, N}, srccrange::CartesianIndices{N}) where {T, N}
164164
# Is this efficient? Maybe!
165165
# TODO: compare to a pure intrinsic copyto implementation!
@@ -172,7 +172,7 @@ function Base.copyto!(dest::GPUArray{T, N}, destcrange::CartesianIndices{N},
172172
end
173173

174174
function Base.copyto!(dest::AbstractArray{T, N}, destcrange::CartesianIndices{N},
175-
src::GPUArray{T, N}, srccrange::CartesianIndices{N}) where {T, N}
175+
src::AbstractGPUArray{T, N}, srccrange::CartesianIndices{N}) where {T, N}
176176
# Is this efficient? Maybe!
177177
dest_gpu = similar(src, size(destcrange))
178178
nrange = CartesianIndices(size(dest_gpu))
@@ -183,9 +183,9 @@ end
183183

184184
## other
185185

186-
Base.copy(x::GPUArray) = identity.(x)
186+
Base.copy(x::AbstractGPUArray) = identity.(x)
187187

188-
Base.deepcopy(x::GPUArray) = copy(x)
188+
Base.deepcopy(x::AbstractGPUArray) = copy(x)
189189

190190

191191
# reinterpret
@@ -221,20 +221,20 @@ This makes it easier to do checks just on the high level.
221221
"""
222222
function unsafe_reinterpret end
223223

224-
function reinterpret(::Type{T}, a::GPUArray{S,1}) where T where S
224+
function reinterpret(::Type{T}, a::AbstractGPUArray{S,1}) where T where S
225225
nel = (length(a)*sizeof(S)) ÷ sizeof(T)
226226
# TODO: maybe check that remainder is zero?
227227
return reinterpret(T, a, (nel,))
228228
end
229229

230-
function reinterpret(::Type{T}, a::GPUArray{S}) where T where S
230+
function reinterpret(::Type{T}, a::AbstractGPUArray{S}) where T where S
231231
if sizeof(S) != sizeof(T)
232232
throw(ArgumentError("result shape not specified"))
233233
end
234234
reinterpret(T, a, size(a))
235235
end
236236

237-
function reinterpret(::Type{T}, a::GPUArray{S}, dims::NTuple{N, Integer}) where T where S where N
237+
function reinterpret(::Type{T}, a::AbstractGPUArray{S}, dims::NTuple{N, Integer}) where T where S where N
238238
if !isbitstype(T)
239239
throw(ArgumentError("cannot reinterpret Array{$(S)} to ::Type{Array{$(T)}}, type $(T) is not a bits type"))
240240
end
@@ -248,13 +248,13 @@ function reinterpret(::Type{T}, a::GPUArray{S}, dims::NTuple{N, Integer}) where
248248
unsafe_reinterpret(T, a, dims)
249249
end
250250

251-
function Base._reshape(A::GPUArray{T}, dims::Dims) where T
251+
function Base._reshape(A::AbstractGPUArray{T}, dims::Dims) where T
252252
n = length(A)
253253
prod(dims) == n || throw(DimensionMismatch("parent has $n elements, which is incompatible with size $dims"))
254254
return unsafe_reinterpret(T, A, dims)
255255
end
256256
#ambig
257-
function Base._reshape(A::GPUArray{T, 1}, dims::Tuple{Integer}) where T
257+
function Base._reshape(A::AbstractGPUArray{T, 1}, dims::Tuple{Integer}) where T
258258
n = Base._length(A)
259259
prod(dims) == n || throw(DimensionMismatch("parent has $n elements, which is incompatible with size $dims"))
260260
return unsafe_reinterpret(T, A, dims)
@@ -266,4 +266,4 @@ end
266266
# TODO: filter!
267267

268268
# revert of JuliaLang/julia#31929
269-
Base.filter(f, As::GPUArray) = As[map(f, As)::GPUArray{Bool}]
269+
Base.filter(f, As::AbstractGPUArray) = As[map(f, As)::AbstractGPUArray{Bool}]

src/host/base.jl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,22 @@
22

33
allequal(x) = true
44
allequal(x, y, z...) = x == y && allequal(y, z...)
5-
function Base.map!(f, y::GPUArray, xs::GPUArray...)
5+
function Base.map!(f, y::AbstractGPUArray, xs::AbstractGPUArray...)
66
@assert allequal(size.((y, xs...))...)
77
return y .= f.(xs...)
88
end
9-
function Base.map(f, y::GPUArray, xs::GPUArray...)
9+
function Base.map(f, y::AbstractGPUArray, xs::AbstractGPUArray...)
1010
@assert allequal(size.((y, xs...))...)
1111
return f.(y, xs...)
1212
end
1313

1414
# Break ambiguities with base
15-
Base.map!(f, y::GPUArray) =
16-
invoke(map!, Tuple{Any,GPUArray,Vararg{GPUArray}}, f, y)
17-
Base.map!(f, y::GPUArray, x::GPUArray) =
18-
invoke(map!, Tuple{Any,GPUArray, Vararg{GPUArray}}, f, y, x)
19-
Base.map!(f, y::GPUArray, x1::GPUArray, x2::GPUArray) =
20-
invoke(map!, Tuple{Any,GPUArray, Vararg{GPUArray}}, f, y, x1, x2)
15+
Base.map!(f, y::AbstractGPUArray) =
16+
invoke(map!, Tuple{Any,AbstractGPUArray,Vararg{AbstractGPUArray}}, f, y)
17+
Base.map!(f, y::AbstractGPUArray, x::AbstractGPUArray) =
18+
invoke(map!, Tuple{Any,AbstractGPUArray, Vararg{AbstractGPUArray}}, f, y, x)
19+
Base.map!(f, y::AbstractGPUArray, x1::AbstractGPUArray, x2::AbstractGPUArray) =
20+
invoke(map!, Tuple{Any,AbstractGPUArray, Vararg{AbstractGPUArray}}, f, y, x1, x2)
2121

2222

2323
# Base functions that are sadly not fit for the the GPU yet (they only work for Int64)

src/host/broadcast.jl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,34 +11,34 @@ import Base.Broadcast: BroadcastStyle, Broadcasted, ArrayStyle
1111
# TODO: investigate if we should define out own `GPUArrayStyle{N} <: AbstractArrayStyle{N}`
1212
#
1313
# NOTE: this uses the specific `T` that was used e.g. `JLArray` or `CLArray` for ArrayStyle,
14-
# instead of using `ArrayStyle{GPUArray}`, due to the fact how `similar` works.
15-
BroadcastStyle(::Type{T}) where {T<:GPUArray} = ArrayStyle{T}()
14+
# instead of using `ArrayStyle{AbstractGPUArray}`, due to the fact how `similar` works.
15+
BroadcastStyle(::Type{T}) where {T<:AbstractGPUArray} = ArrayStyle{T}()
1616

1717
# Wrapper types otherwise forget that they are GPU compatible
1818
#
19-
# NOTE: Don't directly use ArrayStyle{GPUArray} here since that would mean that `CuArrays`
19+
# NOTE: Don't directly use ArrayStyle{AbstractGPUArray} here since that would mean that `CuArrays`
2020
# customization no longer take effect.
2121
for (W, ctor) in Adapt.wrappers
2222
@eval begin
23-
BroadcastStyle(::Type{<:$W}) where {AT<:GPUArray} = BroadcastStyle(AT)
24-
backend(::Type{<:$W}) where {AT<:GPUArray} = backend(AT)
23+
BroadcastStyle(::Type{<:$W}) where {AT<:AbstractGPUArray} = BroadcastStyle(AT)
24+
backend(::Type{<:$W}) where {AT<:AbstractGPUArray} = backend(AT)
2525
end
2626
end
2727

2828
# This Union is a hack. Ideally Base would have a Transpose <: WrappedArray <: AbstractArray
29-
# and we could define our methods in terms of Union{GPUArray, WrappedArray{<:Any, <:GPUArray}}
29+
# and we could define our methods in terms of Union{AbstractGPUArray, WrappedArray{<:Any, <:AbstractGPUArray}}
3030
@eval const GPUDestArray =
31-
Union{GPUArray, $((:($W where {AT <: GPUArray}) for (W, _) in Adapt.wrappers)...)}
31+
Union{AbstractGPUArray, $((:($W where {AT <: AbstractGPUArray}) for (W, _) in Adapt.wrappers)...)}
3232

3333
# We purposefully only specialize `copyto!`, dependent packages need to make sure that they
3434
# can handle:
3535
# - `bc::Broadcast.Broadcasted{Style}`
3636
# - `ex::Broadcast.Extruded`
37-
# - `LinearAlgebra.Transpose{,<:GPUArray}` and `LinearAlgebra.Adjoint{,<:GPUArray}`, etc
37+
# - `LinearAlgebra.Transpose{,<:AbstractGPUArray}` and `LinearAlgebra.Adjoint{,<:AbstractGPUArray}`, etc
3838
# as arguments to a kernel and that they do the right conversion.
3939
#
4040
# This Broadcast can be further customize by:
41-
# - `Broadcast.preprocess(dest::GPUArray, bc::Broadcasted{Nothing})` which allows for a
41+
# - `Broadcast.preprocess(dest::AbstractGPUArray, bc::Broadcasted{Nothing})` which allows for a
4242
# complete transformation based on the output type just at the end of the pipeline.
4343
# - `Broadcast.broadcasted(::Style, f)` selection of an implementation of `f` compatible
4444
# with `Style`

0 commit comments

Comments
 (0)