Skip to content

Make resize! run faster #2828

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 35 additions & 24 deletions src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -876,42 +876,53 @@ Base.unsafe_convert(::Type{CuPtr{T}}, A::PermutedDimsArray) where {T} =

## resizing

const RESIZE_THRESHOLD = 10 * 1024^2 # 10 MiB
const RESIZE_INCREMENT = 1 * 1024^2 # 1 MiB
"""
resize!(a::CuVector, n::Integer)

Resize `a` to contain `n` elements. If `n` is smaller than the current collection length,
the first `n` elements will be retained. If `n` is larger, the new elements are not
guaranteed to be initialized.
the first `n` elements will be retained. If `n` is larger, the new elements are initialized
with undefined values.
"""
function Base.resize!(A::CuVector{T}, n::Integer) where T
n == length(A) && return A

# TODO: add additional space to allow for quicker resizing
maxsize = n * aligned_sizeof(T)
bufsize = if isbitstype(T)
maxsize
else
# type tag array past the data
maxsize + n
end
cap = A.maxsize ÷ aligned_sizeof(T)

# do nothing when new length is smaller than maxsize
if n > cap # n > length(A)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

resize should be made aware of that, not doing anything when the needed size is smaller than maxsize

Is that really ok if we never shrink the GPU arrays?


# if maxsize is larger than 10 MiB
if A.maxsize > RESIZE_THRESHOLD
len = max(cap + RESIZE_INCREMENT ÷ aligned_sizeof(T), n) # add at least 1 MiB
else
len = max(n, 2 * length(A))
end

maxsize = len * aligned_sizeof(T)
bufsize = if isbitstype(T)
maxsize
else
# type tag array past the data
maxsize + len
Copy link
Contributor Author

@huiyuxie huiyuxie Aug 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have to cover the test for this line since https://app.codecov.io/gh/JuliaGPU/CUDA.jl/pull/2828?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=JuliaGPU?
But the original test also did not cover this line, I think.

end

# replace the data with a new one. this 'unshares' the array.
# as a result, we can safely support resizing unowned buffers.
new_data = context!(context(A)) do
mem = pool_alloc(memory_type(A), bufsize)
ptr = convert(CuPtr{T}, mem)
m = min(length(A), n)
if m > 0
GC.@preserve A unsafe_copyto!(ptr, pointer(A), m)
new_data = context!(context(A)) do
mem = pool_alloc(memory_type(A), bufsize)
ptr = convert(CuPtr{T}, mem)
m = min(length(A), n)
if m > 0
GC.@preserve A unsafe_copyto!(ptr, pointer(A), m)
end
DataRef(pool_free, mem)
end
DataRef(pool_free, mem)
unsafe_free!(A)
A.data = new_data
A.maxsize = maxsize
A.offset = 0
end
unsafe_free!(A)

A.data = new_data
A.dims = (n,)
A.maxsize = maxsize
A.offset = 0

A
end
86 changes: 62 additions & 24 deletions test/base/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -550,30 +550,68 @@ end
end

@testset "resizing" begin
a = CuArray([1,2,3])

resize!(a, 3)
@test length(a) == 3
@test Array(a) == [1,2,3]

resize!(a, 5)
@test length(a) == 5
@test Array(a)[1:3] == [1,2,3]

resize!(a, 2)
@test length(a) == 2
@test Array(a)[1:2] == [1,2]

# we should be able to resize an unsafe_wrapped array too, as it replaces the buffer
b = unsafe_wrap(CuArray{Int}, pointer(a), 2)
resize!(b, 3)
@test length(b) == 3
@test Array(b)[1:2] == [1,2]

b = CuArray{Int}(undef, 0)
@test length(b) == 0
resize!(b, 1)
@test length(b) == 1
# 1) small arrays (<=10 MiB): should still use doubling policy
a = CuArray([1, 2, 3])

# reallocation (add less than half)
CUDA.resize!(a, 4)
@test length(a) == 4
@test Array(a)[1:3] == [1, 2, 3]
@test a.maxsize == max(4, 2*3) * sizeof(eltype(a))

# no reallocation
CUDA.resize!(a, 5)
@test length(a) == 5
@test Array(a)[1:3] == [1, 2, 3]
@test a.maxsize == 6 * sizeof(eltype(a))

# reallocation (add more than half)
CUDA.resize!(a, 12)
@test length(a) == 12
@test Array(a)[1:3] == [1, 2, 3]
@test a.maxsize == max(12, 2*5) * sizeof(eltype(a))

# 2) large arrays (>10 MiB): should use 1 MiB increments
b = CUDA.fill(1, 2*1024^2)
maxsize = b.maxsize

# should bump by exactly 1 MiB
CUDA.resize!(b, 2*1024^2 + 1)
@test length(b) == 2*1024^2 + 1
@test b.maxsize == maxsize + CUDA.RESIZE_INCREMENT
@test all(Array(b)[1:2*1024^2] .== 1)

b = CUDA.fill(1, 2*1024^2)
maxsize = b.maxsize

# should bump greater than 1 MiB
new = CUDA.RESIZE_INCREMENT ÷ sizeof(eltype(b))
CUDA.resize!(b, 2*1024^2 + new + 1)
@test length(b) == 2*1024^2 + new + 1
@test b.maxsize > maxsize + CUDA.RESIZE_INCREMENT
@test all(Array(b)[1:2*1024^2] .== 1)

b = CUDA.fill(1, 2*1024^2)
maxsize = b.maxsize

# no reallocation
CUDA.resize!(b, 2*1024^2 - 1)
@test length(b) == 2*1024^2 - 1
@test b.maxsize == maxsize
@test all(Array(b)[1:2*1024^2 - 1] .== 1)

# 3) corner cases
c = CuArray{Int}(undef, 0)
@test length(c) == 0
CUDA.resize!(c, 1)
@test length(c) == 1
@test c.maxsize == 1 * sizeof(eltype(c))

c = CuArray{Int}(undef, 1)
@test length(c) == 1
CUDA.resize!(c, 0)
@test length(c) == 0
@test c.maxsize == 1 * sizeof(eltype(c))
end

@testset "aliasing" begin
Expand Down