Skip to content

Commit a0319f4

Browse files
committed
cosmetic changes
1 parent a101a4c commit a0319f4

14 files changed

+59
-59
lines changed

src/abstract_gpu_interface.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Uses CUDA like names
55
for sym in (:x, :y, :z)
66
for f in (:blockidx, :blockdim, :threadidx, :griddim)
77
fname = Symbol(string(f, '_', sym))
8-
@eval $fname(state)::Cuint = error("Not implemented")
8+
@eval $fname(state)::UInt32 = error("Not implemented")
99
@eval export $fname
1010
end
1111
end
@@ -23,7 +23,7 @@ end
2323
linear index in a GPU kernel (equal to OpenCL.get_global_id)
2424
"""
2525
@inline function linear_index(state)
26-
Cuint((blockidx_x(state) - Cuint(1)) * blockdim_x(state) + threadidx_x(state))
26+
UInt32((blockidx_x(state) - UInt32(1)) * blockdim_x(state) + threadidx_x(state))
2727
end
2828
@inline function global_size(state)
2929
griddim_x(state) * blockdim_x(state)
@@ -72,9 +72,9 @@ Calls function `f` on the GPU.
7272
and supplies queues and contexts.
7373
Calls kernel with `kernel(state, args...)`, where state is dependant on the backend
7474
and can be used for e.g getting an index into A with `linear_index(state)`.
75-
Optionally, lunch configuration can be supplied in the following way:
75+
Optionally, launch configuration can be supplied in the following way:
7676
77-
1) A single integer, indicating how many work items (total number of threads) you want to lunch.
77+
1) A single integer, indicating how many work items (total number of threads) you want to launch.
7878
in this case `linear_index(state)` will be a number in the range 1:configuration
7979
2) Pass a tuple of integer tuples to define blocks and threads per blocks!
8080
@@ -94,10 +94,10 @@ function gpu_call(f, A::GPUArray, args::Tuple, configuration = length(A))
9494
end
9595
map(x-> Int.(x), configuration) # make sure it all has the same int type
9696
else
97-
error("""Please lunch a gpu kernel with a valid configuration.
97+
error("""Please launch a gpu kernel with a valid configuration.
9898
Found: $configurations
9999
Configuration needs to be:
100-
1) A single integer, indicating how many work items (total number of threads) you want to lunch.
100+
1) A single integer, indicating how many work items (total number of threads) you want to launch.
101101
in this case `linear_index(state)` will be a number in the range 1:configuration
102102
2) Pass a tuple of integer tuples to define blocks and threads per blocks!
103103
`linear_index` will be inbetween 1:prod((blocks..., threads...))

src/abstractarray.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,12 +143,12 @@ function copy!{T, N}(
143143
throw(DimensionMismatch("Ranges don't match their size. Found: $shape, $(size(srccrange))"))
144144
end
145145
len = length(destcrange)
146-
dest_offsets = Cuint.(destcrange.start.I .- 1)
147-
src_offsets = Cuint.(srccrange.start.I .- 1)
148-
ui_shape = Cuint.(shape)
146+
dest_offsets = UInt32.(destcrange.start.I .- 1)
147+
src_offsets = UInt32.(srccrange.start.I .- 1)
148+
ui_shape = UInt32.(shape)
149149
gpu_call(
150150
copy_kernel!, dest,
151-
(dest, dest_offsets, src, src_offsets, ui_shape, Cuint.(size(dest)), Cuint.(size(src)), Cuint(len)),
151+
(dest, dest_offsets, src, src_offsets, ui_shape, UInt32.(size(dest)), UInt32.(size(src)), UInt32(len)),
152152
len
153153
)
154154
dest

src/base.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ map!(f, y::GPUArray, x1::GPUArray, x2::GPUArray) =
3939
# end
4040
#
4141
# function _cat(dim, dest, xs...)
42-
# gpu_call(dest, (Cuint(dim), dest, xs)) do state, dim, dest, xs
42+
# gpu_call(dest, (UInt32(dim), dest, xs)) do state, dim, dest, xs
4343
# I = @cartesianidx dest state
4444
# nI = catindex(dim, I, size.(xs))
4545
# n = nI[1]; I′ = nI[2]

src/broadcast.jl

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@ end
1919
end
2020

2121
function broadcast!(f, A::GPUArray)
22-
gpu_call(const_kernel, A, (A, f, Cuint(length(A))))
22+
gpu_call(const_kernel, A, (A, f, UInt32(length(A))))
2323
A
2424
end
2525
function broadcast!(f::typeof(identity), A::GPUArray, val::Number)
2626
valconv = convert(eltype(A), val)
27-
gpu_call(const_kernel2, A, (A, valconv, Cuint(length(A))))
27+
gpu_call(const_kernel2, A, (A, valconv, UInt32(length(A))))
2828
A
2929
end
3030
@inline function broadcast_t(f, T::Type{Bool}, shape, it, A::GPUArrays.GPUArray, Bs::Vararg{Any,N}) where N
@@ -81,48 +81,48 @@ function _broadcast!(
8181
A::AT, Bs::BT, ::Type{Val{N}}, unused2 # we don't need those arguments
8282
) where {N, K, ID, AT, BT}
8383

84-
shape = Cuint.(size(out))
84+
shape = UInt32.(size(out))
8585
args = (A, Bs...)
8686
descriptor_tuple = ntuple(length(args)) do i
8787
BroadcastDescriptor(args[i], keeps[i], Idefaults[i])
8888
end
89-
gpu_call(broadcast_kernel!, out, (func, out, shape, Cuint(length(out)), descriptor_tuple, A, deref.(Bs)...))
89+
gpu_call(broadcast_kernel!, out, (func, out, shape, UInt32(length(out)), descriptor_tuple, A, deref.(Bs)...))
9090
out
9191
end
9292

9393

9494

9595
function Base.foreach(func, over::GPUArray, Bs...)
96-
shape = Cuint.(size(over))
96+
shape = UInt32.(size(over))
9797
keeps, Idefaults = map_newindexer(shape, over, Bs)
9898
args = (over, Bs...)
9999
descriptor_tuple = ntuple(length(args)) do i
100100
BroadcastDescriptor(args[i], keeps[i], Idefaults[i])
101101
end
102-
gpu_call(foreach_kernel, over, (func, shape, Cuint.(length(over)), descriptor_tuple, over, deref.(Bs)...))
102+
gpu_call(foreach_kernel, over, (func, shape, UInt32.(length(over)), descriptor_tuple, over, deref.(Bs)...))
103103
return
104104
end
105105

106106

107-
arg_length(x::Tuple) = (Cuint(length(x)),)
108-
arg_length(x::GPUArray) = Cuint.(size(x))
107+
arg_length(x::Tuple) = (UInt32(length(x)),)
108+
arg_length(x::GPUArray) = UInt32.(size(x))
109109
arg_length(x) = ()
110110

111111
abstract type BroadcastDescriptor{Typ} end
112112

113113
immutable BroadcastDescriptorN{Typ, N} <: BroadcastDescriptor{Typ}
114-
size::NTuple{N, Cuint}
115-
keep::NTuple{N, Cuint}
116-
idefault::NTuple{N, Cuint}
114+
size::NTuple{N, UInt32}
115+
keep::NTuple{N, UInt32}
116+
idefault::NTuple{N, UInt32}
117117
end
118118
function BroadcastDescriptor(val::RefValue, keep, idefault)
119-
BroadcastDescriptorN{Tuple, 1}((Cuint(1),), (Cuint(0),), (Cuint(1),))
119+
BroadcastDescriptorN{Tuple, 1}((UInt32(1),), (UInt32(0),), (UInt32(1),))
120120
end
121121

122122
function BroadcastDescriptor(val, keep, idefault)
123123
N = length(keep)
124124
typ = Broadcast.containertype(val)
125-
BroadcastDescriptorN{typ, N}(arg_length(val), Cuint.(keep), Cuint.(idefault))
125+
BroadcastDescriptorN{typ, N}(arg_length(val), UInt32.(keep), UInt32.(idefault))
126126
end
127127

128128
@propagate_inbounds @inline function _broadcast_getindex(
@@ -208,11 +208,11 @@ for N = 0:10
208208
end
209209

210210
function mapidx{N}(f, A::GPUArray, args::NTuple{N, Any})
211-
gpu_call(mapidx_kernel, A, (f, A, Cuint(length(A)), args...))
211+
gpu_call(mapidx_kernel, A, (f, A, UInt32(length(A)), args...))
212212
end
213213

214214
# don't do anything for empty tuples
215-
@pure newindex(I, ilin, keep::Tuple{}, Idefault::Tuple{}, size::Tuple{}) = Cuint(1)
215+
@pure newindex(I, ilin, keep::Tuple{}, Idefault::Tuple{}, size::Tuple{}) = UInt32(1)
216216

217217
# optimize for 1D arrays
218218
@pure function newindex(I::NTuple{1}, ilin, keep::NTuple{1}, Idefault, size)

src/construction.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ end
1111

1212
function fill!{T, N}(A::GPUArray{T, N}, val)
1313
valconv = T(val)
14-
gpu_call(const_kernel2, A, (A, valconv, Cuint(length(A))))
14+
gpu_call(const_kernel2, A, (A, valconv, UInt32(length(A))))
1515
A
1616
end
1717

@@ -21,7 +21,7 @@ ones(T::Type{<: GPUArray}, dims::NTuple{N, Integer}) where N = fill(T, one(eltyp
2121
function eyekernel(state, res::AbstractArray{T}, stride) where T
2222
i = linear_index(state)
2323
i > stride && return
24-
ilin = (stride * (i - Cuint(1))) + i
24+
ilin = (stride * (i - UInt32(1))) + i
2525
@inbounds res[ilin] = one(T)
2626
return
2727
end
@@ -30,7 +30,7 @@ eye(T::Type{<: GPUArray}, i1::Integer) = eye(T, (i1, i1))
3030
eye(T::Type{<: GPUArray}, i1::Integer, i2::Integer) = eye(T, (i1, i2))
3131
function eye(T::Type{<: GPUArray}, dims::NTuple{2, Integer})
3232
res = zeros(T, dims)
33-
gpu_call(eyekernel, res, (res, Cuint(size(res, 1))), minimum(dims))
33+
gpu_call(eyekernel, res, (res, UInt32(size(res, 1))), minimum(dims))
3434
res
3535
end
3636

src/convolution.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# ::Val{BLOCK_SIZE},
88
# ::Val{LOCAL_WIDTH}
99
# ) where {T, BLOCK_SIZE, LOCAL_WIDTH}
10-
# ui1 = Cuint(1); ui0 = Cuint(0)
10+
# ui1 = UInt32(1); ui0 = UInt32(0)
1111
# w = kernel_width
1212
# wBy2 = w >> ui1 #w divided by 2
1313
# #Goes up to 15x15 filters
@@ -54,8 +54,8 @@ function convolution_kernel(state, A::AbstractArray{T}, out, K, Asize, Ksize) wh
5454
end
5555
accum = zero(T)
5656
kw, kh = Ksize[1], Ksize[2]
57-
for ix = Cuint(0):(kw - Cuint(1))
58-
for jy = Cuint(0):(kh - Cuint(1))
57+
for ix = UInt32(0):(kw - UInt32(1))
58+
for jy = UInt32(0):(kh - UInt32(1))
5959
temp = A[gpu_sub2ind(Asize, idx .+ (ix, jy))]
6060
accum += temp * K[ix + kw*jy + 1]
6161
end
@@ -66,7 +66,7 @@ end
6666

6767

6868
function convolution!(a, out, k)
69-
gpu_call(convolution_kernel, a, (a, out, k, Cuint.(size(a)), Cuint.(size(k))))
69+
gpu_call(convolution_kernel, a, (a, out, k, UInt32.(size(a)), UInt32.(size(k))))
7070
GPUArrays.synchronize(out)
7171
out
7272
end

src/indexing.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ Base.setindex!(xs::GPUArray, v, i::Integer) = xs[i] = convert(eltype(xs), v)
3838
using Base.Cartesian
3939
to_index(a, x) = x
4040
to_index(::A, x::Array{ET}) where {A, ET} = copy!(similar(A, ET, size(x)), x)
41-
to_index(a, x::UnitRange{<: Integer}) = convert(UnitRange{Cuint}, x)
41+
to_index(a, x::UnitRange{<: Integer}) = convert(UnitRange{UInt32}, x)
4242
to_index(a, x::Base.LogicalIndex) = error("Logical indexing not implemented")
4343

4444
@generated function index_kernel(state, dest::AbstractArray, src::AbstractArray, idims, Is)
@@ -59,6 +59,6 @@ function Base._unsafe_getindex!(dest::GPUArray, src::GPUArray, Is::Union{Real, A
5959
return dest
6060
end
6161
idims = map(length, Is)
62-
gpu_call(index_kernel, dest, (dest, src, Cuint.(idims), map(x-> to_index(dest, x), Is)))
62+
gpu_call(index_kernel, dest, (dest, src, UInt32.(idims), map(x-> to_index(dest, x), Is)))
6363
return dest
6464
end

src/jlbackend.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ end
149149
for (i, sym) in enumerate((:x, :y, :z))
150150
for f in (:blockidx, :blockdim, :threadidx, :griddim)
151151
fname = Symbol(string(f, '_', sym))
152-
@eval $fname(state::JLState) = Cuint(state.$f[$i])
152+
@eval $fname(state::JLState) = UInt32(state.$f[$i])
153153
end
154154
end
155155

src/linalg.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ function transpose_blocks!(
2727
state, odata::AbstractArray{T}, idata, ::Val{SHMEM}, ::Val{TDIM}, ::Val{BLOCK_ROWS}, ::Val{NROW}
2828
) where {T, SHMEM, TDIM, BLOCK_ROWS, NROW}
2929

30-
ui1 = Cuint(1)
30+
ui1 = UInt32(1)
3131
tile = @LocalMemory(state, T, SHMEM)
3232
bidx_x = blockidx_x(state) - ui1
3333
bidx_y = blockidx_y(state) - ui1
@@ -38,15 +38,15 @@ function transpose_blocks!(
3838
y = bidx_y * TDIM + tidx_y + ui1
3939
dims = size(idata)
4040

41-
(x <= dims[2] && (y + (BLOCK_ROWS * Cuint(3))) <= dims[1]) || return
41+
(x <= dims[2] && (y + (BLOCK_ROWS * UInt32(3))) <= dims[1]) || return
4242

43-
for j = Cuint(0):Cuint(3)
43+
for j = UInt32(0):UInt32(3)
4444
j0 = j * BLOCK_ROWS
4545
tile[tidx_x + ui1, tidx_y + j0 + ui1] = idata[y + j0, x]
4646
end
4747

4848
synchronize_threads(state)
49-
for j = Cuint(0):Cuint(3)
49+
for j = UInt32(0):UInt32(3)
5050
j0 = j * BLOCK_ROWS
5151
odata[x, y + j0] = tile[tidx_x + ui1, tidx_y + j0 + ui1]
5252
end
@@ -56,9 +56,9 @@ end
5656
function transpose!{T}(At::GPUArray{T, 2}, A::GPUArray{T, 2})
5757
if size(A, 1) == size(A, 2) && all(x-> x % 32 == 0, size(A))
5858
outsize = UInt32.(size(At))
59-
TDIM = Cuint(32); BLOCK_ROWS = Cuint(8)
59+
TDIM = UInt32(32); BLOCK_ROWS = UInt32(8)
6060
nrows = TDIM ÷ BLOCK_ROWS
61-
shmemdim = (TDIM, (TDIM + Cuint(1)))
61+
shmemdim = (TDIM, (TDIM + UInt32(1)))
6262
static_params = map(x-> Val{x}(), (shmemdim, TDIM, BLOCK_ROWS, nrows))
6363
args = (At, A, static_params...)
6464

@@ -82,7 +82,7 @@ function genperm(I::NTuple{N}, perm::NTuple{N}) where N
8282
end
8383

8484
function permutedims!(dest::GPUArray, src::GPUArray, perm)
85-
perm = Cuint.((perm...,))
85+
perm = UInt32.((perm...,))
8686
gpu_call(dest, (dest, src, perm)) do state, dest, src, perm
8787
I = @cartesianidx dest state
8888
@inbounds dest[I...] = src[genperm(I, perm)...]

src/mapreduce.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ import Base: any, count, countnz
44
# reduce
55
# functions in base implemented with a direct loop need to be overloaded to use mapreduce
66
any(pred, A::GPUArray) = Bool(mapreduce(pred, |, Cint(0), (u)))
7-
count(pred, A::GPUArray) = Int(mapreduce(pred, +, Cuint(0), A))
8-
countnz(A::GPUArray) = Int(mapreduce(x-> x != 0, +, Cuint(0), A))
9-
countnz(A::GPUArray, dim) = Int(mapreducedim(x-> x != 0, +, Cuint(0), A, dim))
7+
count(pred, A::GPUArray) = Int(mapreduce(pred, +, UInt32(0), A))
8+
countnz(A::GPUArray) = Int(mapreduce(x-> x != 0, +, UInt32(0), A))
9+
countnz(A::GPUArray, dim) = Int(mapreducedim(x-> x != 0, +, UInt32(0), A, dim))
1010

1111

1212
# hack to get around of fetching the first element of the GPUArray
@@ -49,10 +49,10 @@ end
4949

5050

5151
function mapreducedim_kernel(state, f, op, R::AbstractArray{T1, N}, A::AbstractArray{T, N}, slice_size, sizeA, dim) where {T1, T, N}
52-
ilin = Cuint(linear_index(state))
52+
ilin = UInt32(linear_index(state))
5353
ilin > length(R) && return
5454
accum = zero(T1)
55-
@inbounds for i = Cuint(1):slice_size
55+
@inbounds for i = UInt32(1):slice_size
5656
idx = N == dim ? (ilin, i) : (i, ilin)
5757
i2d = gpu_sub2ind(sizeA, idx)
5858
accum = op(accum, f(A[i2d]))
@@ -70,7 +70,7 @@ function Base._mapreducedim!(f, op, R::GPUArray, A::GPUArray)
7070
@assert count(x-> x == 1, sizeR) == (ndims(R) - 1) "Not implemented"
7171
dim = findfirst(x-> x == 1, sizeR)
7272
slice_size = size(A, dim)
73-
gpu_call(mapreducedim_kernel, R, (f, op, R, A, Cuint(slice_size), Cuint.(size(A)), Cuint(dim)))
73+
gpu_call(mapreducedim_kernel, R, (f, op, R, A, UInt32(slice_size), UInt32.(size(A)), UInt32(dim)))
7474
return R
7575
end
7676

@@ -80,7 +80,7 @@ for i = 0:10
8080
@eval begin
8181
# http://developer.amd.com/resources/articles-whitepapers/opencl-optimization-case-study-simple-reductions/
8282
function reduce_kernel(state, f, op, v0::T, A, ::Val{LMEM}, result, $(args...)) where {T, LMEM}
83-
ui0 = Cuint(0); ui1 = Cuint(1); ui2 = Cuint(2)
83+
ui0 = UInt32(0); ui1 = UInt32(1); ui2 = UInt32(2)
8484
tmp_local = @LocalMemory(state, T, LMEM)
8585
global_index = linear_index(state)
8686
acc = v0

0 commit comments

Comments
 (0)