Skip to content
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/GPUArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ include("convolution.jl")
include("testsuite/testsuite.jl")
include("jlbackend.jl")
include("random.jl")
include("pool.jl")

export GPUArray, gpu_call, thread_blocks_heuristic, global_size, synchronize_threads
export linear_index, @linearidx, @cartesianidx, convolution!, device, synchronize
export linear_index, @linearidx, @cartesianidx, convolution!, device, synchronize, maxpool2d
export JLArray

end # module
4 changes: 2 additions & 2 deletions src/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function _getindex(xs::GPUArray{T}, i::Integer) where T
end

function Base.getindex(xs::GPUArray{T}, i::Integer) where T
assertslow("getindex")
# assertslow("getindex")
_getindex(xs, i)
end

Expand All @@ -27,7 +27,7 @@ function _setindex!(xs::GPUArray{T}, v::T, i::Integer) where T
end

function Base.setindex!(xs::GPUArray{T}, v::T, i::Integer) where T
assertslow("setindex!")
# assertslow("setindex!")
_setindex!(xs, v, i)
end

Expand Down
44 changes: 44 additions & 0 deletions src/pool.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import CUDAnative
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's not needed, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


function maxpool2d_kernel(state, A::AbstractArray{T}, out, Asize, pool, stride, outSize) where T
ilin = linear_index(state)
idx = GPUArrays.gpu_ind2sub(Asize, ilin)
if (idx[1] > outSize[1] || idx[2] > outSize[2] || idx[3] > outSize[3] || idx[4] > outSize[4])
return
end

temp_max = A[((idx[1] - 1) * stride) + Asize[1] * (idx[2] - 1) * stride + (Asize[1] * Asize[2]) * (idx[3] - 1) + (Asize[1] * Asize[2] * Asize[3]) * (idx[4] - 1) + 1]
max_pos = ((idx[1] - 1) * stride) + Asize[1] * (idx[2] - 1) * stride + (Asize[1] * Asize[2]) * (idx[3] - 1) + (Asize[1] * Asize[2] * Asize[3]) * (idx[4] - 1) + 1
curr_pos = ((idx[1] - 1) * stride) + Asize[1] * (idx[2] - 1) * stride + (Asize[1] * Asize[2]) * (idx[3] - 1) + (Asize[1] * Asize[2] * Asize[3]) * (idx[4] - 1) + 1

for p in 1:pool
for p in 1:pool
m = A[curr_pos]
if (m > temp_max)
temp_max = m
max_pos = curr_pos
end
curr_pos += 1
end
curr_pos += Asize[1] - pool
end
out[(idx[1] - 1) + outSize[1] * (idx[2] - 1) + (outSize[1] * outSize[2]) * (idx[3] - 1) + (outSize[1] * outSize[2] * outSize[3]) * (idx[4] - 1) + 1] = temp_max
return
end


function maxpool2d{T <: Integer}(a, pool::T; stride = pool, pad = 0)
b = zeros(typeof(a), size(a,1) + pad * 2, size(a,2) + pad * 2, size(a,3), size(a,4))
b[pad + 1 : pad + size(a,1), pad + 1 : pad + size(a,2), :, :] = a
Asize = UInt32.(size(b))
pool = UInt32(pool)
stride = UInt32(stride)
out = similar(b)
out = out[1:(div(Asize[1] - pool, stride) + 1), 1:(div(Asize[2] - pool, stride) + 1), :, :]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could just do similar(b, outsize) no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I was unaware of this. It should be similar(b, outSize...) perhaps. Also, outSize needs to be determined before similar is called.

outSize = UInt32.(size(out))
gpu_call(maxpool2d_kernel, b, (b, out, Asize, pool, stride, outSize))
GPUArrays.synchronize(out)
out
end


54 changes: 54 additions & 0 deletions src/testsuite/pool.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
using GPUArrays.TestSuite, Base.Test, Flux

function run_pool(Typ)
for ET in supported_eltypes()
T = Typ{ET}
if (ET == Complex{Float32} || ET == Complex{Float64})
continue
end
@testset "$ET" begin
@testset "maxpool with padding" begin
pool = 3
stride = 3
pad = 3

a = rand(ET, 9,9,3,1)
b = zeros(eltype(a), size(a,1) + pad * 2, size(a,2) + pad * 2, size(a,3), size(a,4))
b[pad + 1 : pad + size(a,1), pad + 1 : pad + size(a,2), :, :] = a
out1 = maxpool(b, (3, 3))

a = T(a)
out2 = GPUArrays.maxpool2d(a, pool, pad = pad)

@test out1 ≈ out2
end

@testset "maxpool without padding" begin
pool = 3
stride = 3

a = rand(ET, 9,9,3,1)
out1 = maxpool(a, (3, 3))

a = T(a)
out2 = GPUArrays.maxpool2d(a, pool)

@test out1 ≈ out2
end


@testset "maxpool with full kernel" begin
pool = 9
stride = 1

a = rand(ET, 9,9,3,1)
out1 = maxpool(a, (9, 9))

a = T(a)
out2 = GPUArrays.maxpool2d(a, pool, stride = stride)

@test out1 ≈ out2
end
end
end
end
2 changes: 2 additions & 0 deletions src/testsuite/testsuite.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ include("base.jl")
include("indexing.jl")
# include("vector.jl")
include("random.jl")
include("pool.jl")

function supported_eltypes()
(Float32, Float64, Int32, Int64, Complex64, Complex128)
Expand All @@ -62,6 +63,7 @@ function run_tests(Typ)
run_mapreduce(Typ)
run_indexing(Typ)
run_random(Typ)
run_pool(Typ)
end

export against_base, run_tests, supported_eltypes
Expand Down
2 changes: 2 additions & 0 deletions test/REQUIRE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Flux
CUDAnative