Skip to content

Commit f57893e

Browse files
Merge pull request #28 from FluxML/cl/scatter
remove unused kernels
2 parents 92210d6 + 7eb0b43 commit f57893e

File tree

1 file changed

+0
-26
lines changed

1 file changed

+0
-26
lines changed

ext/NNlibCUDA/src/utils.jl

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,3 @@
1-
function NNlib.count_indices(idx::AnyCuArray)
2-
dst_counts = length.(NNlib.reverse_indices(idx))
3-
src_counts = NNlib.gather(cu(dst_counts), idx)
4-
return src_counts
5-
end
6-
71
function divide_kernel!(xs, ys, max_idx)
82
index = threadIdx().x + (blockIdx().x - 1) * blockDim().x
93

@@ -24,26 +18,6 @@ function divide_kernel!(xs, counts, max_idx, max_dims_idx, dims_size)
2418
return nothing
2519
end
2620

27-
function NNlib.divide_by_counts!(xs::AnyCuArray{T}, idx::AnyCuArray, dims) where {T}
28-
counts = CuArray{T}(NNlib.count_indices(idx))
29-
args = if dims == 0
30-
max_idx = length(idx)
31-
xs, counts, max_idx
32-
else
33-
dims_size = size(xs)[1:dims]
34-
max_dims_idx = prod(dims_size)
35-
max_idx = prod(size(xs))
36-
xs, counts, max_idx, max_dims_idx, dims_size
37-
end
38-
39-
kernel = @cuda launch=false divide_kernel!(args...)
40-
config = launch_configuration(kernel.fun; max_threads=256)
41-
threads = min(max_idx, config.threads)
42-
blocks = cld(max_idx, threads)
43-
kernel(args...; threads=threads, blocks=blocks)
44-
return xs
45-
end
46-
4721
function NNlib.reverse_indices(idx::AnyCuArray{<:Any,N}) where N
4822
max_dims = NNlib.maximum_dims(idx)
4923
T = CartesianIndex{N}

0 commit comments

Comments
 (0)