Skip to content

Commit 2b00163

Browse files
authored
Merge pull request #136 from JuliaGPU/tb/one
Rework for 1.0
2 parents 1049323 + 9b97847 commit 2b00163

18 files changed

+169
-188
lines changed

LICENSE.md

Lines changed: 22 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,23 @@
1-
The GPUArrays.jl package is licensed under the MIT "Expat" License:
1+
The MIT License (MIT)
22

3-
> Copyright (c) 2016: Simon Danisch.
4-
>
5-
>
6-
> Permission is hereby granted, free of charge, to any person obtaining a copy
7-
>
8-
> of this software and associated documentation files (the "Software"), to deal
9-
>
10-
> in the Software without restriction, including without limitation the rights
11-
>
12-
> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13-
>
14-
> copies of the Software, and to permit persons to whom the Software is
15-
>
16-
> furnished to do so, subject to the following conditions:
17-
>
18-
>
19-
>
20-
> The above copyright notice and this permission notice shall be included in all
21-
>
22-
> copies or substantial portions of the Software.
23-
>
24-
>
25-
>
26-
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27-
>
28-
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29-
>
30-
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31-
>
32-
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33-
>
34-
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
35-
>
36-
> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37-
>
38-
> SOFTWARE.
39-
>
40-
>
3+
Copyright © 2016 Simon Danisch
4+
5+
Copyright © 2018 JuliaGPU developers
6+
7+
Permission is hereby granted, free of charge, to any person obtaining a copy
8+
of this software and associated documentation files (the "Software"), to deal
9+
in the Software without restriction, including without limitation the rights
10+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
copies of the Software, and to permit persons to whom the Software is
12+
furnished to do so, subject to the following conditions:
13+
14+
The above copyright notice and this permission notice shall be included in
15+
all copies or substantial portions of the Software.
16+
17+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23+
THE SOFTWARE.

src/GPUArrays.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,18 @@
1-
__precompile__(true)
21
module GPUArrays
32

43
using Serialization
54
using Random
65
using LinearAlgebra
76
using Printf
8-
import Base: copyto!
97

108
import Random: rand, rand!
119
using LinearAlgebra.BLAS
12-
using FFTW
13-
import FFTW: *, plan_ifft!, plan_fft!, plan_fft, plan_ifft, size, plan_bfft, plan_bfft!
1410
import Base: pointer, similar, size, convert
1511
using Base: @propagate_inbounds, @pure, RefValue
1612
using Base.Cartesian
17-
using Random
13+
14+
using FFTW
15+
import FFTW: *, plan_ifft!, plan_fft!, plan_fft, plan_ifft, size, plan_bfft, plan_bfft!
1816

1917
include("abstractarray.jl")
2018
include("abstract_gpu_interface.jl")
@@ -30,10 +28,12 @@ include("linalg.jl")
3028
include("mapreduce.jl")
3129
include("vectors.jl")
3230
include("convolution.jl")
33-
include("testsuite/testsuite.jl")
34-
include("jlbackend.jl")
3531
include("random.jl")
3632

33+
include("array.jl")
34+
35+
include("testsuite.jl")
36+
3737
export GPUArray, gpu_call, thread_blocks_heuristic, global_size, synchronize_threads
3838
export linear_index, @linearidx, @cartesianidx, convolution!, device, synchronize
3939
export JLArray

src/jlbackend.jl renamed to src/array.jl

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
# Very simple Julia backend which is just for testing the implementation
2-
# and can be used as a reference implementation
1+
# Very simple Julia back-end which is just for testing the implementation and can be used as
2+
# a reference implementation
3+
4+
5+
## construction
36

47
struct JLArray{T, N} <: GPUArray{T, N}
58
data::Array{T, N}
@@ -12,6 +15,22 @@ end
1215

1316
JLArray(data::AbstractArray{T, N}, size::Dims{N}) where {T,N} = JLArray{T,N}(data, size)
1417

18+
(::Type{<: JLArray{T}})(x::AbstractArray) where T = JLArray(convert(Array{T}, x), size(x))
19+
20+
function JLArray{T, N}(size::NTuple{N, Integer}) where {T, N}
21+
JLArray{T, N}(Array{T, N}(undef, size), size)
22+
end
23+
24+
25+
## getters
26+
27+
size(x::JLArray) = x.size
28+
29+
pointer(x::JLArray) = pointer(x.data)
30+
31+
32+
## I/O
33+
1534
Base.show(io::IO, x::JLArray) = show(io, collect(x))
1635
Base.show(io::IO, x::LinearAlgebra.Adjoint{<:Any,<:JLArray}) = show(io, LinearAlgebra.adjoint(collect(x.parent)))
1736
Base.show(io::IO, x::LinearAlgebra.Transpose{<:Any,<:JLArray}) = show(io, LinearAlgebra.transpose(collect(x.parent)))
@@ -20,15 +39,16 @@ Base.show(io::IO, ::MIME"text/plain", x::JLArray) = show(io, MIME"text/plain"(),
2039
Base.show(io::IO, ::MIME"text/plain", x::LinearAlgebra.Adjoint{<:Any,<:JLArray}) = show(io, MIME"text/plain"(), LinearAlgebra.adjoint(collect(x.parent)))
2140
Base.show(io::IO, ::MIME"text/plain", x::LinearAlgebra.Transpose{<:Any,<:JLArray}) = show(io, MIME"text/plain"(), LinearAlgebra.transpose(collect(x.parent)))
2241

42+
43+
## other
44+
2345
"""
2446
Thread group local memory
2547
"""
2648
struct LocalMem{N, T}
2749
x::NTuple{N, Vector{T}}
2850
end
2951

30-
size(x::JLArray) = x.size
31-
pointer(x::JLArray) = pointer(x.data)
3252
to_device(state, x::JLArray) = x.data
3353
to_device(state, x::Tuple) = to_device.(Ref(state), x)
3454
to_device(state, x::RefValue{<: JLArray}) = RefValue(to_device(state, x[]))
@@ -40,12 +60,6 @@ to_blocks(state, x) = x
4060
# unpacks local memory for each block
4161
to_blocks(state, x::LocalMem) = x.x[blockidx_x(state)]
4262

43-
(::Type{<: JLArray{T}})(x::AbstractArray) where T = JLArray(convert(Array{T}, x), size(x))
44-
45-
function JLArray{T, N}(size::NTuple{N, Integer}) where {T, N}
46-
JLArray{T, N}(Array{T, N}(undef, size), size)
47-
end
48-
4963
similar(::Type{<: JLArray}, ::Type{T}, size::Base.Dims{N}) where {T, N} = JLArray{T, N}(size)
5064

5165
function unsafe_reinterpret(::Type{T}, A::JLArray{ET}, size::NTuple{N, Integer}) where {T, ET, N}
@@ -131,7 +145,8 @@ function _gpu_call(f, A::JLArray, args::Tuple, blocks_threads::Tuple{T, T}) wher
131145
block_args = to_blocks.(Ref(state), device_args)
132146
for threadidx in CartesianIndices(threads)
133147
thread_state = JLState(state, threadidx.I)
134-
tasks[threadidx] = @async f(thread_state, block_args...)
148+
tasks[threadidx] = @async @allowscalar f(thread_state, block_args...)
149+
# TODO: @async obfuscates the trace to any exception which happens during f
135150
end
136151
for t in tasks
137152
fetch(t)
@@ -146,7 +161,6 @@ device(x::JLArray) = JLDevice()
146161
threads(dev::JLDevice) = 256
147162
blocks(dev::JLDevice) = (256, 256, 256)
148163

149-
150164
@inline function synchronize_threads(::JLState)
151165
#=
152166
All threads are getting started asynchronously,so a yield will
@@ -168,8 +182,9 @@ end
168182
blas_module(::JLArray) = LinearAlgebra.BLAS
169183
blasbuffer(A::JLArray) = A.data
170184

185+
# defining our own plan type is the easiest way to pass around the plans in Base interface
186+
# without ambiguities
171187

172-
# defining our own plan type is the easiest way to pass around the plans in Base interface without ambiguities
173188
struct FFTPlan{T}
174189
p::T
175190
end
@@ -192,7 +207,6 @@ function plan_ifft(A::JLArray; kw_args...)
192207
FFTPlan(plan_ifft(A.data; kw_args...))
193208
end
194209

195-
196210
function *(plan::FFTPlan, A::JLArray)
197211
x = plan.p * A.data
198212
JLArray(x)

src/indexing.jl

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,20 @@
1-
const _allowslow = Ref(true)
1+
const _allowscalar = Ref(true)
22

3-
allowslow(flag = true) = (_allowslow[] = flag)
3+
allowscalar(flag = true) = (_allowscalar[] = flag)
44

5-
function assertslow(op = "Operation")
6-
# _allowslow[] || error("$op is disabled")
7-
return
5+
function assertscalar(op = "Operation")
6+
_allowscalar[] || error("$op is disabled")
7+
return
8+
end
9+
10+
macro allowscalar(ex)
11+
quote
12+
local prev = _allowscalar[]
13+
_allowscalar[] = true
14+
local ret = $(esc(ex))
15+
_allowscalar[] = prev
16+
ret
17+
end
818
end
919

1020
Base.IndexStyle(::Type{<:GPUArray}) = Base.IndexLinear()
@@ -16,7 +26,7 @@ function _getindex(xs::GPUArray{T}, i::Integer) where T
1626
end
1727

1828
function Base.getindex(xs::GPUArray{T}, i::Integer) where T
19-
assertslow("getindex")
29+
ndims(xs) > 0 && assertscalar("scalar getindex")
2030
_getindex(xs, i)
2131
end
2232

@@ -27,12 +37,13 @@ function _setindex!(xs::GPUArray{T}, v::T, i::Integer) where T
2737
end
2838

2939
function Base.setindex!(xs::GPUArray{T}, v::T, i::Integer) where T
30-
assertslow("setindex!")
40+
assertscalar("scalar setindex!")
3141
_setindex!(xs, v, i)
3242
end
3343

3444
Base.setindex!(xs::GPUArray, v, i::Integer) = xs[i] = convert(eltype(xs), v)
3545

46+
3647
# Vector indexing
3748

3849
to_index(a, x) = x

src/testsuite.jl

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Abstract test suite that can be used for all packages inheriting from GPUArray
2+
3+
module TestSuite
4+
5+
using GPUArrays
6+
using GPUArrays: mapidx, gpu_sub2ind
7+
8+
using LinearAlgebra
9+
using Random
10+
using Test
11+
12+
using FFTW
13+
using FillArrays
14+
using StaticArrays
15+
16+
toarray(T, x::Tuple{X, Vararg{Int}}) where X = fill(first(x), Base.tail(x))
17+
toarray(::Type{T}, x::NTuple{N, Int}) where {T <: Bool, N} = rand(T, x)
18+
toarray(::Type{T}, x::NTuple{N, Int}) where {T <: Integer, N} = rand(T(1):T(10), x)
19+
toarray(T, x::NTuple{N, Int}) where N = rand(T, x)
20+
toarray(T, x) = x
21+
togpu(T, x::AbstractArray) = T(x)
22+
togpu(T, x) = x
23+
24+
"""
25+
Calls function `f` on input arrays generated by `sizes` as Base.Array and converted to
26+
`Typ`. Compares the result of `f` and tests if they agree. `sizes` can be the shape of the
27+
array, a value or a tuple `(val, shape...)` which will create a `fill(val, shape...)`.
28+
"""
29+
function against_base(f, Typ, sizes...)
30+
jl_arrays = toarray.(eltype(Typ), sizes)
31+
gpu_arrays = togpu.(Typ, jl_arrays)
32+
res_jl = f(jl_arrays...)
33+
res_gpu = f(gpu_arrays...)
34+
@test res_jl Array(res_gpu)
35+
end
36+
37+
38+
include("testsuite/blas.jl")
39+
include("testsuite/broadcasting.jl")
40+
include("testsuite/construction.jl")
41+
include("testsuite/fft.jl")
42+
include("testsuite/gpuinterface.jl")
43+
include("testsuite/linalg.jl")
44+
include("testsuite/mapreduce.jl")
45+
include("testsuite/base.jl")
46+
include("testsuite/indexing.jl")
47+
include("testsuite/vector.jl")
48+
include("testsuite/random.jl")
49+
50+
function supported_eltypes()
51+
(Float32, Float64, Int32, Int64, ComplexF32, ComplexF64)
52+
end
53+
54+
export against_base, run_tests, supported_eltypes
55+
56+
end
57+
58+
59+
"""
60+
Runs the entire GPUArrays test suite on array type `Typ`
61+
"""
62+
function test(Typ)
63+
TestSuite.test_gpuinterface(Typ)
64+
TestSuite.test_base(Typ)
65+
TestSuite.test_blas(Typ)
66+
TestSuite.test_broadcasting(Typ)
67+
TestSuite.test_construction(Typ)
68+
TestSuite.test_fft(Typ)
69+
TestSuite.test_linalg(Typ)
70+
TestSuite.test_mapreduce(Typ)
71+
TestSuite.test_indexing(Typ)
72+
#TestSuite.test_vectors(Typ)
73+
TestSuite.test_random(Typ)
74+
end

src/testsuite/base.jl

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ function ntuple_closure(state, result, ::Val{N}, testval) where N
2626
return
2727
end
2828

29-
function run_base(Typ)
29+
function test_base(Typ)
3030
@testset "base functionality" begin
3131
@testset "mapidx" begin
3232
a = rand(ComplexF32, 77)
@@ -68,9 +68,6 @@ function run_base(Typ)
6868
@test Array(a) == x
6969
end
7070

71-
GPUArrays.allowslow(true)
72-
# right now in CLArrays we fallback to geindex since on some hardware
73-
# somehow the vcat kernel segfaults -.-
7471
@testset "vcat + hcat" begin
7572
x = fill(0f0, (10, 10))
7673
y = rand(Float32, 20, 10)
@@ -84,7 +81,6 @@ function run_base(Typ)
8481
against_base(hcat, Typ{Float32}, (3, 3), (3, 3))
8582
against_base(vcat, Typ{Float32}, (3, 3), (3, 3))
8683
end
87-
GPUArrays.allowslow(false)
8884

8985
@testset "reinterpret" begin
9086
a = rand(ComplexF32, 22)

src/testsuite/blas.jl

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
using LinearAlgebra
2-
3-
function run_blas(Typ)
1+
function test_blas(Typ)
42
@testset "BLAS" begin
53
T = Typ{Float32}
64
@testset "matmul" begin

0 commit comments

Comments
 (0)