@@ -21,40 +21,43 @@ map!(f, y::GPUArray, x1::GPUArray, x2::GPUArray) =
21
21
invoke (map!, Tuple{Any,GPUArray, Vararg{GPUArray}}, f, y, x1, x2)
22
22
23
23
24
- @generated function nindex (i:: T , ls:: NTuple{N} ) where {T, N}
25
- quote
26
- Base. @_inline_meta
27
- $ (foldr (:($ T (0 ), $ T (0 )), T (1 ): T (N)) do n, els
28
- :(i ≤ ls[$ n] ? ($ T ($ n), i) : (i -= $ T (ls[$ n]); $ els))
29
- end )
30
- end
31
- end
32
- function catindex (dim, I:: NTuple{N, T} , shapes) where {T, N}
33
- xi = nindex (I[dim], map (s-> s[dim], shapes))
34
- x = xi[1 ]; i = xi[2 ]
35
- x, ntuple (n -> n == dim ? i : I[n], Val{N})
36
- end
37
-
38
- function _cat (dim, dest, xs... )
39
- gpu_call (dest, (Cuint (dim), dest, xs)) do state, dim, dest, xs
40
- I = @cartesianidx dest state
41
- nI = catindex (dim, I, size .(xs))
42
- n = nI[1 ]; I′ = nI[2 ]
43
- @inbounds dest[I... ] = xs[n][I′... ]
44
- return
45
- end
46
- return dest
47
- end
48
-
49
- function cat_t (dims:: Integer , T:: Type , x:: GPUArray , xs:: GPUArray... )
50
- catdims = Base. dims2cat (dims)
51
- shape = Base. cat_shape (catdims, (), size .((x, xs... ))... )
52
- dest = Base. cat_similar (x, T, shape)
53
- _cat (dims, dest, x, xs... )
54
- end
24
+ # TODO find out why this segfaults julia without stack trace on AMD
25
+ # produces wrong results on Titan X and passes on GTX 950..........
55
26
56
- vcat (xs:: GPUArray... ) = cat (1 , xs... )
57
- hcat (xs:: GPUArray... ) = cat (2 , xs... )
27
+ # @generated function nindex(i::T, ls::NTuple{N}) where {T, N}
28
+ # quote
29
+ # Base.@_inline_meta
30
+ # $(foldr(:($T(0), $T(0)), T(1):T(N)) do n, els
31
+ # :(i ≤ ls[$n] ? ($T($n), i) : (i -= $T(ls[$n]); $els))
32
+ # end)
33
+ # end
34
+ # end
35
+ # function catindex(dim, I::NTuple{N, T}, shapes) where {T, N}
36
+ # xi = nindex(I[dim], map(s-> s[dim], shapes))
37
+ # x = xi[1]; i = xi[2]
38
+ # x, ntuple(n -> n == dim ? i : I[n], Val{N})
39
+ # end
40
+ #
41
+ # function _cat(dim, dest, xs...)
42
+ # gpu_call(dest, (Cuint(dim), dest, xs)) do state, dim, dest, xs
43
+ # I = @cartesianidx dest state
44
+ # nI = catindex(dim, I, size.(xs))
45
+ # n = nI[1]; I′ = nI[2]
46
+ # @inbounds dest[I...] = xs[n][I′...]
47
+ # return
48
+ # end
49
+ # return dest
50
+ # end
51
+ #
52
+ # function cat_t(dims::Integer, T::Type, x::GPUArray, xs::GPUArray...)
53
+ # catdims = Base.dims2cat(dims)
54
+ # shape = Base.cat_shape(catdims, (), size.((x, xs...))...)
55
+ # dest = Base.cat_similar(x, T, shape)
56
+ # _cat(dims, dest, x, xs...)
57
+ # end
58
+ #
59
+ # vcat(xs::GPUArray...) = cat(1, xs...)
60
+ # hcat(xs::GPUArray...) = cat(2, xs...)
58
61
59
62
60
63
# Base functions that are sadly not fit for the the GPU yet (they only work for Int64)
0 commit comments