diff --git a/src/array.jl b/src/array.jl index d66396bfce..64a5685043 100644 --- a/src/array.jl +++ b/src/array.jl @@ -876,42 +876,53 @@ Base.unsafe_convert(::Type{CuPtr{T}}, A::PermutedDimsArray) where {T} = ## resizing +const RESIZE_THRESHOLD = 10 * 1024^2 # 10 MiB +const RESIZE_INCREMENT = 1 * 1024^2 # 1 MiB """ resize!(a::CuVector, n::Integer) Resize `a` to contain `n` elements. If `n` is smaller than the current collection length, -the first `n` elements will be retained. If `n` is larger, the new elements are not -guaranteed to be initialized. +the first `n` elements will be retained. If `n` is larger, the new elements are initialized +with undefined values. """ function Base.resize!(A::CuVector{T}, n::Integer) where T n == length(A) && return A - # TODO: add additional space to allow for quicker resizing - maxsize = n * aligned_sizeof(T) - bufsize = if isbitstype(T) - maxsize - else - # type tag array past the data - maxsize + n - end + cap = A.maxsize ÷ aligned_sizeof(T) + + # do nothing when new length is smaller than maxsize + if n > cap # n > length(A) + + # if maxsize is larger than 10 MiB + if A.maxsize > RESIZE_THRESHOLD + len = max(cap + RESIZE_INCREMENT ÷ aligned_sizeof(T), n) # add at least 1 MiB + else + len = max(n, 2 * length(A)) + end + + maxsize = len * aligned_sizeof(T) + bufsize = if isbitstype(T) + maxsize + else + # type tag array past the data + maxsize + len + end - # replace the data with a new one. this 'unshares' the array. - # as a result, we can safely support resizing unowned buffers. - new_data = context!(context(A)) do - mem = pool_alloc(memory_type(A), bufsize) - ptr = convert(CuPtr{T}, mem) - m = min(length(A), n) - if m > 0 - GC.@preserve A unsafe_copyto!(ptr, pointer(A), m) + new_data = context!(context(A)) do + mem = pool_alloc(memory_type(A), bufsize) + ptr = convert(CuPtr{T}, mem) + m = min(length(A), n) + if m > 0 + GC.@preserve A unsafe_copyto!(ptr, pointer(A), m) + end + DataRef(pool_free, mem) end - DataRef(pool_free, mem) + unsafe_free!(A) + A.data = new_data + A.maxsize = maxsize + A.offset = 0 end - unsafe_free!(A) - A.data = new_data A.dims = (n,) - A.maxsize = maxsize - A.offset = 0 - A end diff --git a/test/base/array.jl b/test/base/array.jl index 51fb2fc219..a0573c73f8 100644 --- a/test/base/array.jl +++ b/test/base/array.jl @@ -550,30 +550,68 @@ end end @testset "resizing" begin - a = CuArray([1,2,3]) - - resize!(a, 3) - @test length(a) == 3 - @test Array(a) == [1,2,3] - - resize!(a, 5) - @test length(a) == 5 - @test Array(a)[1:3] == [1,2,3] - - resize!(a, 2) - @test length(a) == 2 - @test Array(a)[1:2] == [1,2] - - # we should be able to resize an unsafe_wrapped array too, as it replaces the buffer - b = unsafe_wrap(CuArray{Int}, pointer(a), 2) - resize!(b, 3) - @test length(b) == 3 - @test Array(b)[1:2] == [1,2] - - b = CuArray{Int}(undef, 0) - @test length(b) == 0 - resize!(b, 1) - @test length(b) == 1 + # 1) small arrays (<=10 MiB): should still use doubling policy + a = CuArray([1, 2, 3]) + + # reallocation (add less than half) + CUDA.resize!(a, 4) + @test length(a) == 4 + @test Array(a)[1:3] == [1, 2, 3] + @test a.maxsize == max(4, 2*3) * sizeof(eltype(a)) + + # no reallocation + CUDA.resize!(a, 5) + @test length(a) == 5 + @test Array(a)[1:3] == [1, 2, 3] + @test a.maxsize == 6 * sizeof(eltype(a)) + + # reallocation (add more than half) + CUDA.resize!(a, 12) + @test length(a) == 12 + @test Array(a)[1:3] == [1, 2, 3] + @test a.maxsize == max(12, 2*5) * sizeof(eltype(a)) + + # 2) large arrays (>10 MiB): should use 1 MiB increments + b = CUDA.fill(1, 2*1024^2) + maxsize = b.maxsize + + # should bump by exactly 1 MiB + CUDA.resize!(b, 2*1024^2 + 1) + @test length(b) == 2*1024^2 + 1 + @test b.maxsize == maxsize + CUDA.RESIZE_INCREMENT + @test all(Array(b)[1:2*1024^2] .== 1) + + b = CUDA.fill(1, 2*1024^2) + maxsize = b.maxsize + + # should bump greater than 1 MiB + new = CUDA.RESIZE_INCREMENT ÷ sizeof(eltype(b)) + CUDA.resize!(b, 2*1024^2 + new + 1) + @test length(b) == 2*1024^2 + new + 1 + @test b.maxsize > maxsize + CUDA.RESIZE_INCREMENT + @test all(Array(b)[1:2*1024^2] .== 1) + + b = CUDA.fill(1, 2*1024^2) + maxsize = b.maxsize + + # no reallocation + CUDA.resize!(b, 2*1024^2 - 1) + @test length(b) == 2*1024^2 - 1 + @test b.maxsize == maxsize + @test all(Array(b)[1:2*1024^2 - 1] .== 1) + + # 3) corner cases + c = CuArray{Int}(undef, 0) + @test length(c) == 0 + CUDA.resize!(c, 1) + @test length(c) == 1 + @test c.maxsize == 1 * sizeof(eltype(c)) + + c = CuArray{Int}(undef, 1) + @test length(c) == 1 + CUDA.resize!(c, 0) + @test length(c) == 0 + @test c.maxsize == 1 * sizeof(eltype(c)) end @testset "aliasing" begin