Skip to content

Commit cd686cc

Browse files
committed
Test GPUArrays reverse
[only julia] [only benchmarks]
1 parent 205c238 commit cd686cc

File tree

5 files changed

+190
-184
lines changed

5 files changed

+190
-184
lines changed

perf/Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@
22
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
33
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
44
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
5+
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
56
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
67
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

perf/runbenchmarks.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# benchmark suite execution and codespeed submission
2+
using Pkg
3+
Pkg.add(url="https://github.com/christiangnrd/GPUArrays.jl", rev="reverse")
24

35
using CUDA
46

src/reverse.jl

Lines changed: 140 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -1,155 +1,155 @@
11
# reversing
22

3-
# the kernel works by treating the array as 1d. after reversing by dimension x an element at
4-
# pos [i1, i2, i3, ... , i{x}, ..., i{n}] will be at
5-
# pos [i1, i2, i3, ... , d{x} - i{x} + 1, ..., i{n}] where d{x} is the size of dimension x
6-
7-
# out-of-place version, copying a single value per thread from input to output
8-
function _reverse(input::AnyCuArray{T, N}, output::AnyCuArray{T, N};
9-
dims=1:ndims(input)) where {T, N}
10-
@assert size(input) == size(output)
11-
rev_dims = ntuple((d)-> d in dims && size(input, d) > 1, N)
12-
ref = size(input) .+ 1
13-
# converts an ND-index in the data array to the linear index
14-
lin_idx = LinearIndices(input)
15-
# converts a linear index in a reduced array to an ND-index, but using the reduced size
16-
nd_idx = CartesianIndices(input)
17-
18-
## COV_EXCL_START
19-
function kernel(input::AbstractArray{T, N}, output::AbstractArray{T, N}) where {T, N}
20-
offset_in = blockDim().x * (blockIdx().x - 1i32)
21-
index_in = offset_in + threadIdx().x
22-
23-
@inbounds if index_in <= length(input)
24-
idx = Tuple(nd_idx[index_in])
25-
idx = ifelse.(rev_dims, ref .- idx, idx)
26-
index_out = lin_idx[idx...]
27-
output[index_out] = input[index_in]
28-
end
29-
30-
return
31-
end
32-
## COV_EXCL_STOP
33-
34-
nthreads = 256
35-
nblocks = cld(length(input), nthreads)
36-
37-
@cuda threads=nthreads blocks=nblocks kernel(input, output)
38-
end
39-
40-
# in-place version, swapping elements on half the number of threads
41-
function _reverse!(data::AnyCuArray{T, N}; dims=1:ndims(data)) where {T, N}
42-
rev_dims = ntuple((d)-> d in dims && size(data, d) > 1, N)
43-
half_dim = findlast(rev_dims)
44-
if isnothing(half_dim)
45-
# no reverse operation needed at all in this case.
46-
return
47-
end
48-
ref = size(data) .+ 1
49-
# converts an ND-index in the data array to the linear index
50-
lin_idx = LinearIndices(data)
51-
reduced_size = ntuple((d)->ifelse(d==half_dim, cld(size(data,d),2), size(data,d)), N)
52-
reduced_length = prod(reduced_size)
53-
# converts a linear index in a reduced array to an ND-index, but using the reduced size
54-
nd_idx = CartesianIndices(reduced_size)
55-
56-
## COV_EXCL_START
57-
function kernel(data::AbstractArray{T, N}) where {T, N}
58-
offset_in = blockDim().x * (blockIdx().x - 1i32)
59-
60-
index_in = offset_in + threadIdx().x
61-
62-
@inbounds if index_in <= reduced_length
63-
idx = Tuple(nd_idx[index_in])
64-
index_in = lin_idx[idx...]
65-
idx = ifelse.(rev_dims, ref .- idx, idx)
66-
index_out = lin_idx[idx...]
67-
68-
if index_in < index_out
69-
temp = data[index_out]
70-
data[index_out] = data[index_in]
71-
data[index_in] = temp
72-
end
73-
end
74-
75-
return
76-
end
77-
## COV_EXCL_STOP
78-
79-
# NOTE: we launch slightly more than half the number of elements in the array as threads.
80-
# The last non-singleton dimension along which to reverse is used to define how the array is split.
81-
# Only the middle row in case of an odd array dimension could cause trouble, but this is prevented by
82-
# ignoring the threads that cross the mid-point
83-
84-
nthreads = 256
85-
nblocks = cld(prod(reduced_size), nthreads)
86-
87-
@cuda threads=nthreads blocks=nblocks kernel(data)
88-
end
3+
# # the kernel works by treating the array as 1d. after reversing by dimension x an element at
4+
# # pos [i1, i2, i3, ... , i{x}, ..., i{n}] will be at
5+
# # pos [i1, i2, i3, ... , d{x} - i{x} + 1, ..., i{n}] where d{x} is the size of dimension x
6+
7+
# # out-of-place version, copying a single value per thread from input to output
8+
# function _reverse(input::AnyCuArray{T, N}, output::AnyCuArray{T, N};
9+
# dims=1:ndims(input)) where {T, N}
10+
# @assert size(input) == size(output)
11+
# rev_dims = ntuple((d)-> d in dims && size(input, d) > 1, N)
12+
# ref = size(input) .+ 1
13+
# # converts an ND-index in the data array to the linear index
14+
# lin_idx = LinearIndices(input)
15+
# # converts a linear index in a reduced array to an ND-index, but using the reduced size
16+
# nd_idx = CartesianIndices(input)
17+
18+
# ## COV_EXCL_START
19+
# function kernel(input::AbstractArray{T, N}, output::AbstractArray{T, N}) where {T, N}
20+
# offset_in = blockDim().x * (blockIdx().x - 1i32)
21+
# index_in = offset_in + threadIdx().x
22+
23+
# @inbounds if index_in <= length(input)
24+
# idx = Tuple(nd_idx[index_in])
25+
# idx = ifelse.(rev_dims, ref .- idx, idx)
26+
# index_out = lin_idx[idx...]
27+
# output[index_out] = input[index_in]
28+
# end
29+
30+
# return
31+
# end
32+
# ## COV_EXCL_STOP
33+
34+
# nthreads = 256
35+
# nblocks = cld(length(input), nthreads)
36+
37+
# @cuda threads=nthreads blocks=nblocks kernel(input, output)
38+
# end
39+
40+
# # in-place version, swapping elements on half the number of threads
41+
# function _reverse!(data::AnyCuArray{T, N}; dims=1:ndims(data)) where {T, N}
42+
# rev_dims = ntuple((d)-> d in dims && size(data, d) > 1, N)
43+
# half_dim = findlast(rev_dims)
44+
# if isnothing(half_dim)
45+
# # no reverse operation needed at all in this case.
46+
# return
47+
# end
48+
# ref = size(data) .+ 1
49+
# # converts an ND-index in the data array to the linear index
50+
# lin_idx = LinearIndices(data)
51+
# reduced_size = ntuple((d)->ifelse(d==half_dim, cld(size(data,d),2), size(data,d)), N)
52+
# reduced_length = prod(reduced_size)
53+
# # converts a linear index in a reduced array to an ND-index, but using the reduced size
54+
# nd_idx = CartesianIndices(reduced_size)
55+
56+
# ## COV_EXCL_START
57+
# function kernel(data::AbstractArray{T, N}) where {T, N}
58+
# offset_in = blockDim().x * (blockIdx().x - 1i32)
59+
60+
# index_in = offset_in + threadIdx().x
61+
62+
# @inbounds if index_in <= reduced_length
63+
# idx = Tuple(nd_idx[index_in])
64+
# index_in = lin_idx[idx...]
65+
# idx = ifelse.(rev_dims, ref .- idx, idx)
66+
# index_out = lin_idx[idx...]
67+
68+
# if index_in < index_out
69+
# temp = data[index_out]
70+
# data[index_out] = data[index_in]
71+
# data[index_in] = temp
72+
# end
73+
# end
74+
75+
# return
76+
# end
77+
# ## COV_EXCL_STOP
78+
79+
# # NOTE: we launch slightly more than half the number of elements in the array as threads.
80+
# # The last non-singleton dimension along which to reverse is used to define how the array is split.
81+
# # Only the middle row in case of an odd array dimension could cause trouble, but this is prevented by
82+
# # ignoring the threads that cross the mid-point
83+
84+
# nthreads = 256
85+
# nblocks = cld(prod(reduced_size), nthreads)
86+
87+
# @cuda threads=nthreads blocks=nblocks kernel(data)
88+
# end
8989

9090

9191
# n-dimensional API
9292

93-
function Base.reverse!(data::AnyCuArray{T, N}; dims=:) where {T, N}
94-
if isa(dims, Colon)
95-
dims = 1:ndims(data)
96-
end
97-
if !applicable(iterate, dims)
98-
throw(ArgumentError("dimension $dims is not an iterable"))
99-
end
100-
if !all(1 .≤ dims .≤ ndims(data))
101-
throw(ArgumentError("dimension $dims is not 1 ≤ $dims$(ndims(data))"))
102-
end
103-
104-
_reverse!(data; dims=dims)
105-
106-
return data
107-
end
108-
109-
# out-of-place
110-
function Base.reverse(input::AnyCuArray{T, N}; dims=:) where {T, N}
111-
if isa(dims, Colon)
112-
dims = 1:ndims(input)
113-
end
114-
if !applicable(iterate, dims)
115-
throw(ArgumentError("dimension $dims is not an iterable"))
116-
end
117-
if !all(1 .≤ dims .≤ ndims(input))
118-
throw(ArgumentError("dimension $dims is not 1 ≤ $dims$(ndims(input))"))
119-
end
120-
121-
if all(size(input)[[dims...]].==1)
122-
# no reverse operation needed at all in this case.
123-
return copy(input)
124-
else
125-
output = similar(input)
126-
_reverse(input, output; dims=dims)
127-
return output
128-
end
129-
end
93+
# function Base.reverse!(data::AnyCuArray{T, N}; dims=:) where {T, N}
94+
# if isa(dims, Colon)
95+
# dims = 1:ndims(data)
96+
# end
97+
# if !applicable(iterate, dims)
98+
# throw(ArgumentError("dimension $dims is not an iterable"))
99+
# end
100+
# if !all(1 .≤ dims .≤ ndims(data))
101+
# throw(ArgumentError("dimension $dims is not 1 ≤ $dims ≤ $(ndims(data))"))
102+
# end
103+
104+
# _reverse!(data; dims=dims)
105+
106+
# return data
107+
# end
108+
109+
# # out-of-place
110+
# function Base.reverse(input::AnyCuArray{T, N}; dims=:) where {T, N}
111+
# if isa(dims, Colon)
112+
# dims = 1:ndims(input)
113+
# end
114+
# if !applicable(iterate, dims)
115+
# throw(ArgumentError("dimension $dims is not an iterable"))
116+
# end
117+
# if !all(1 .≤ dims .≤ ndims(input))
118+
# throw(ArgumentError("dimension $dims is not 1 ≤ $dims ≤ $(ndims(input))"))
119+
# end
120+
121+
# if all(size(input)[[dims...]].==1)
122+
# # no reverse operation needed at all in this case.
123+
# return copy(input)
124+
# else
125+
# output = similar(input)
126+
# _reverse(input, output; dims=dims)
127+
# return output
128+
# end
129+
# end
130130

131131

132132
# 1-dimensional API
133133

134-
# in-place
135-
Base.@propagate_inbounds function Base.reverse!(data::AnyCuVector{T}, start::Integer,
136-
stop::Integer=length(data)) where {T}
137-
_reverse!(view(data, start:stop))
138-
return data
139-
end
134+
# # in-place
135+
# Base.@propagate_inbounds function Base.reverse!(data::AnyCuVector{T}, start::Integer,
136+
# stop::Integer=length(data)) where {T}
137+
# _reverse!(view(data, start:stop))
138+
# return data
139+
# end
140140

141-
Base.reverse!(data::AnyCuVector{T}) where {T} = @inbounds reverse!(data, 1, length(data))
141+
# Base.reverse!(data::AnyCuVector{T}) where {T} = @inbounds reverse!(data, 1, length(data))
142142

143-
# out-of-place
144-
Base.@propagate_inbounds function Base.reverse(input::AnyCuVector{T}, start::Integer,
145-
stop::Integer=length(input)) where {T}
146-
output = similar(input)
143+
# # out-of-place
144+
# Base.@propagate_inbounds function Base.reverse(input::AnyCuVector{T}, start::Integer,
145+
# stop::Integer=length(input)) where {T}
146+
# output = similar(input)
147147

148-
start > 1 && copyto!(output, 1, input, 1, start-1)
149-
_reverse(view(input, start:stop), view(output, start:stop))
150-
stop < length(input) && copyto!(output, stop+1, input, stop+1)
148+
# start > 1 && copyto!(output, 1, input, 1, start-1)
149+
# _reverse(view(input, start:stop), view(output, start:stop))
150+
# stop < length(input) && copyto!(output, stop+1, input, stop+1)
151151

152-
return output
153-
end
152+
# return output
153+
# end
154154

155-
Base.reverse(data::AnyCuVector{T}) where {T} = @inbounds reverse(data, 1, length(data))
155+
# Base.reverse(data::AnyCuVector{T}) where {T} = @inbounds reverse(data, 1, length(data))

0 commit comments

Comments
 (0)