Skip to content

Commit 6062180

Browse files
committed
Merge branch 'sd/abstractgpu' of github.com:JuliaGPU/GPUArrays.jl into sd/abstractgpu
2 parents 71e2d8a + 240067a commit 6062180

File tree

1 file changed

+36
-33
lines changed

1 file changed

+36
-33
lines changed

src/base.jl

Lines changed: 36 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -21,40 +21,43 @@ map!(f, y::GPUArray, x1::GPUArray, x2::GPUArray) =
2121
invoke(map!, Tuple{Any,GPUArray, Vararg{GPUArray}}, f, y, x1, x2)
2222

2323

24-
@generated function nindex(i::T, ls::NTuple{N}) where {T, N}
25-
quote
26-
Base.@_inline_meta
27-
$(foldr(:($T(0), $T(0)), T(1):T(N)) do n, els
28-
:(i ls[$n] ? ($T($n), i) : (i -= $T(ls[$n]); $els))
29-
end)
30-
end
31-
end
32-
function catindex(dim, I::NTuple{N, T}, shapes) where {T, N}
33-
xi = nindex(I[dim], map(s-> s[dim], shapes))
34-
x = xi[1]; i = xi[2]
35-
x, ntuple(n -> n == dim ? i : I[n], Val{N})
36-
end
37-
38-
function _cat(dim, dest, xs...)
39-
gpu_call(dest, (Cuint(dim), dest, xs)) do state, dim, dest, xs
40-
I = @cartesianidx dest state
41-
nI = catindex(dim, I, size.(xs))
42-
n = nI[1]; I′ = nI[2]
43-
@inbounds dest[I...] = xs[n][I′...]
44-
return
45-
end
46-
return dest
47-
end
48-
49-
function cat_t(dims::Integer, T::Type, x::GPUArray, xs::GPUArray...)
50-
catdims = Base.dims2cat(dims)
51-
shape = Base.cat_shape(catdims, (), size.((x, xs...))...)
52-
dest = Base.cat_similar(x, T, shape)
53-
_cat(dims, dest, x, xs...)
54-
end
24+
# TODO find out why this segfaults julia without stack trace on AMD
25+
# produces wrong results on Titan X and passes on GTX 950..........
5526

56-
vcat(xs::GPUArray...) = cat(1, xs...)
57-
hcat(xs::GPUArray...) = cat(2, xs...)
27+
# @generated function nindex(i::T, ls::NTuple{N}) where {T, N}
28+
# quote
29+
# Base.@_inline_meta
30+
# $(foldr(:($T(0), $T(0)), T(1):T(N)) do n, els
31+
# :(i ≤ ls[$n] ? ($T($n), i) : (i -= $T(ls[$n]); $els))
32+
# end)
33+
# end
34+
# end
35+
# function catindex(dim, I::NTuple{N, T}, shapes) where {T, N}
36+
# xi = nindex(I[dim], map(s-> s[dim], shapes))
37+
# x = xi[1]; i = xi[2]
38+
# x, ntuple(n -> n == dim ? i : I[n], Val{N})
39+
# end
40+
#
41+
# function _cat(dim, dest, xs...)
42+
# gpu_call(dest, (Cuint(dim), dest, xs)) do state, dim, dest, xs
43+
# I = @cartesianidx dest state
44+
# nI = catindex(dim, I, size.(xs))
45+
# n = nI[1]; I′ = nI[2]
46+
# @inbounds dest[I...] = xs[n][I′...]
47+
# return
48+
# end
49+
# return dest
50+
# end
51+
#
52+
# function cat_t(dims::Integer, T::Type, x::GPUArray, xs::GPUArray...)
53+
# catdims = Base.dims2cat(dims)
54+
# shape = Base.cat_shape(catdims, (), size.((x, xs...))...)
55+
# dest = Base.cat_similar(x, T, shape)
56+
# _cat(dims, dest, x, xs...)
57+
# end
58+
#
59+
# vcat(xs::GPUArray...) = cat(1, xs...)
60+
# hcat(xs::GPUArray...) = cat(2, xs...)
5861

5962

6063
# Base functions that are sadly not fit for the the GPU yet (they only work for Int64)

0 commit comments

Comments
 (0)