diff --git a/src/device/array.jl b/src/device/array.jl index f9726222..0a4b6011 100644 --- a/src/device/array.jl +++ b/src/device/array.jl @@ -48,6 +48,7 @@ Base.size(g::CLDeviceArray) = g.dims Base.sizeof(x::CLDeviceArray) = Base.elsize(x) * length(x) # we store the array length too; computing prod(size) is expensive +Base.size(g::CLDeviceArray{<:Any, 1}) = (g.len,) Base.length(g::CLDeviceArray) = g.len Base.pointer(x::CLDeviceArray{T,<:Any,A}) where {T,A} = Base.unsafe_convert(LLVMPtr{T,A}, x) @@ -81,7 +82,11 @@ Base.unsafe_convert(::Type{LLVMPtr{T,A}}, x::CLDeviceArray{T,<:Any,A}) where {T, end @device_function @inline function arrayref(A::CLDeviceArray{T}, index::Integer) where {T} - @boundscheck checkbounds(A, index) + # simplified bounds check to avoid the OneTo construction, which calls `max` + # and breaks elimination of redundant bounds checks in the generated code. + #@boundscheck checkbounds(A, index) + @boundscheck index <= length(A) || Base.throw_boundserror(A, index) + if isbitstype(T) arrayref_bits(A, index) else #if isbitsunion(T) @@ -123,7 +128,10 @@ end end @device_function @inline function arrayset(A::CLDeviceArray{T}, x::T, index::Integer) where {T} - @boundscheck checkbounds(A, index) + # simplified bounds check (see `arrayref`) + #@boundscheck checkbounds(A, index) + @boundscheck index <= length(A) || Base.throw_boundserror(A, index) + if isbitstype(T) arrayset_bits(A, x, index) else #if isbitsunion(T) @@ -154,7 +162,10 @@ end end @device_function @inline function const_arrayref(A::CLDeviceArray{T}, index::Integer) where {T} - @boundscheck checkbounds(A, index) + # simplified bounds check (see `arrayset`) + #@boundscheck checkbounds(A, index) + @boundscheck index <= length(A) || Base.throw_boundserror(A, index) + align = alignment(A) unsafe_cached_load(pointer(A), index, Val(align)) end