Skip to content

Commit dde8f86

Browse files
committed
max pool
1 parent ec91274 commit dde8f86

File tree

2 files changed

+44
-1
lines changed

2 files changed

+44
-1
lines changed

src/GPUArrays.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@ include("convolution.jl")
1818
include("testsuite/testsuite.jl")
1919
include("jlbackend.jl")
2020
include("random.jl")
21+
include("pool.jl")
2122

2223
export GPUArray, gpu_call, thread_blocks_heuristic, global_size, synchronize_threads
23-
export linear_index, @linearidx, @cartesianidx, convolution!, device, synchronize
24+
export linear_index, @linearidx, @cartesianidx, convolution!, device, synchronize, maxpool2d
2425
export JLArray
2526

2627
end # module

src/pool.jl

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import CUDAnative
2+
3+
function maxpool2d_kernel(state, A::AbstractArray{T}, out, Asize, pool, stride_, outSize) where T
4+
ilin = linear_index(state)
5+
idx = GPUArrays.gpu_ind2sub(Asize, ilin)
6+
if (idx[1] > outSize[1] || idx[2] > outSize[2] || idx[3] > outSize[3] || idx[4] > outSize[4])
7+
return
8+
end
9+
10+
temp_max = A[((idx[1] - 1) * stride_) + Asize[1] * (idx[2] - 1) * stride_ + (Asize[1] * Asize[2]) * (idx[3] - 1) + (Asize[1] * Asize[2] * Asize[3]) * (idx[4] - 1) + 1]
11+
max_pos = ((idx[1] - 1) * stride_) + Asize[1] * (idx[2] - 1) * stride_ + (Asize[1] * Asize[2]) * (idx[3] - 1) + (Asize[1] * Asize[2] * Asize[3]) * (idx[4] - 1) + 1
12+
curr_pos = ((idx[1] - 1) * stride_) + Asize[1] * (idx[2] - 1) * stride_ + (Asize[1] * Asize[2]) * (idx[3] - 1) + (Asize[1] * Asize[2] * Asize[3]) * (idx[4] - 1) + 1
13+
14+
for p in 1:pool
15+
for p in 1:pool
16+
m = A[curr_pos]
17+
if (m > temp_max)
18+
temp_max = m
19+
max_pos = curr_pos
20+
end
21+
curr_pos += 1
22+
end
23+
curr_pos += Asize[1] - pool
24+
end
25+
out[(idx[1] - 1) + outSize[1] * (idx[2] - 1) + (outSize[1] * outSize[2]) * (idx[3] - 1) + (outSize[1] * outSize[2] * outSize[3]) * (idx[4] - 1) + 1] = temp_max
26+
return
27+
end
28+
29+
30+
function maxpool2d(a, pool; stride_ = 1)
31+
Asize = UInt32.(size(a))
32+
pool = UInt32(pool)
33+
stride_ = UInt32(stride_)
34+
out = similar(a)
35+
out = out[1:(div(Asize[1] - pool, stride_) + 1), 1:(div(Asize[2] - pool, stride_) + 1), :, :]
36+
outSize = UInt32.(size(out))
37+
gpu_call(maxpool2d_kernel, a, (a, out, Asize, pool, stride_, outSize))
38+
GPUArrays.synchronize(out)
39+
out
40+
end
41+
42+

0 commit comments

Comments
 (0)