Skip to content

Commit 45a8d5d

Browse files
authored
Merge pull request #100 from JuliaGPU/sd/repmat
add repmat #93
2 parents 64b99bb + 798e5a5 commit 45a8d5d

File tree

5 files changed

+51
-12
lines changed

5 files changed

+51
-12
lines changed

REQUIRE

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
julia 0.6
22
StaticArrays
3-
NNlib

src/GPUArrays.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ include("convolution.jl")
1818
include("testsuite/testsuite.jl")
1919
include("jlbackend.jl")
2020
include("random.jl")
21-
include("nnlib.jl")
2221

2322
export GPUArray, gpu_call, thread_blocks_heuristic, global_size, synchronize_threads
2423
export linear_index, @linearidx, @cartesianidx, convolution!, device, synchronize

src/base.jl

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,47 @@ function _sub2ind(inds, L, ind, i::IT, I::IT...) where IT
9292
r1 = inds[1]
9393
_sub2ind(Base.tail(inds), L * r1, ind + (i - IT(1)) * L, I...)
9494
end
95+
96+
# This is pretty ugly, but I feel bad to add those to device arrays, since
97+
# we're never bound checking... So getindex(a::GPUVector, 10, 10) would silently go unnoticed
98+
# we need this here for easier implementation of repmat
99+
@inline Base.@propagate_inbounds getidx_2d1d(x::AbstractVector, i, j) = x[i]
100+
@inline Base.@propagate_inbounds getidx_2d1d(x::AbstractMatrix, i, j) = x[i, j]
101+
102+
function Base.repmat(a::GPUVecOrMat, m::Int, n::Int = 1)
103+
o, p = size(a, 1), size(a, 2)
104+
b = similar(a, o*m, p*n)
105+
args = (b, a, UInt32.((o, p, m, n))...)
106+
gpu_call(a, args, n) do state, b, a, o, p, m, n
107+
j = linear_index(state)
108+
j > n && return
109+
ui1 = UInt32(1)
110+
d = (j - ui1) * p + ui1
111+
@inbounds for i in ui1:m
112+
c = (i - ui1) * o + ui1
113+
for r in ui1:p
114+
for k in ui1:o
115+
b[k - ui1 + c, r - ui1 + d] = getidx_2d1d(a, k, r)
116+
end
117+
end
118+
end
119+
return
120+
end
121+
return b
122+
end
123+
124+
function Base.repmat(a::GPUVector, m::Int)
125+
o = length(a)
126+
b = similar(a, o*m)
127+
gpu_call(a, (b, a, UInt32(o), UInt32(m)), m) do state, b, a, o, m
128+
i = linear_index(state)
129+
i > m && return
130+
ui1 = UInt32(1)
131+
c = (i - ui1)*o + ui1
132+
@inbounds for i in ui1:o
133+
b[c + i - ui1] = a[i]
134+
end
135+
return
136+
end
137+
return b
138+
end

src/nnlib.jl

Lines changed: 0 additions & 10 deletions
This file was deleted.

src/testsuite/base.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,5 +133,12 @@ function run_base(Typ)
133133
against_base((a, b)-> map!(-, a, b), T, (10,), (10,))
134134
against_base((a, b, c, d)-> map!(*, a, b, c, d), T, (10,), (10,), (10,), (10,))
135135
end
136+
137+
@testset "repmat" begin
138+
against_base(a-> repmat(a, 5, 6), T, (10,))
139+
against_base(a-> repmat(a, 5), T, (10,))
140+
against_base(a-> repmat(a, 5), T, (5, 4))
141+
against_base(a-> repmat(a, 4, 3), T, (10, 15))
142+
end
136143
end
137144
end

0 commit comments

Comments
 (0)