Skip to content

Commit de714d4

Browse files
committed
Add preserve_buffer to help avoid heap allocations in some cases.
1 parent fee8fb5 commit de714d4

File tree

2 files changed

+76
-5
lines changed

2 files changed

+76
-5
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.9.3"
4+
version = "0.9.4"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/lowering.jl

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -551,16 +551,87 @@ function reduce_expr!(q::Expr, ls::LoopSet, U::Int)
551551
end
552552
end
553553
end
554+
555+
"""
556+
For structs wrapping arrays, using `GC.@preserve` can trigger heap allocations.
557+
`preserve_buffer` attempts to extract the heap-allocated part. Isolating it by itself
558+
will often allow the heap allocations to be elided. For example:
559+
560+
```julia
561+
julia> using StaticArrays, BenchmarkTools
562+
563+
julia> # Needed until a release is made featuring https://github.com/JuliaArrays/StaticArrays.jl/commit/a0179213b741c0feebd2fc6a1101a7358a90caed
564+
Base.elsize(::Type{<:MArray{S,T}}) where {S,T} = sizeof(T)
565+
566+
julia> @noinline foo(A) = unsafe_load(A,1)
567+
foo (generic function with 1 method)
568+
569+
julia> function alloc_test_1()
570+
A = view(MMatrix{8,8,Float64}(undef), 2:5, 3:7)
571+
A[begin] = 4
572+
GC.@preserve A foo(pointer(A))
573+
end
574+
alloc_test_1 (generic function with 1 method)
575+
576+
julia> function alloc_test_2()
577+
A = view(MMatrix{8,8,Float64}(undef), 2:5, 3:7)
578+
A[begin] = 4
579+
pb = parent(A) # or `LoopVectorization.preserve_buffer(A)`; `perserve_buffer(::SubArray)` calls `parent`
580+
GC.@preserve pb foo(pointer(A))
581+
end
582+
alloc_test_2 (generic function with 1 method)
583+
584+
julia> @benchmark alloc_test_1()
585+
BenchmarkTools.Trial:
586+
memory estimate: 544 bytes
587+
allocs estimate: 1
588+
--------------
589+
minimum time: 17.227 ns (0.00% GC)
590+
median time: 21.352 ns (0.00% GC)
591+
mean time: 26.151 ns (13.33% GC)
592+
maximum time: 571.130 ns (78.53% GC)
593+
--------------
594+
samples: 10000
595+
evals/sample: 998
596+
597+
julia> @benchmark alloc_test_2()
598+
BenchmarkTools.Trial:
599+
memory estimate: 0 bytes
600+
allocs estimate: 0
601+
--------------
602+
minimum time: 3.275 ns (0.00% GC)
603+
median time: 3.493 ns (0.00% GC)
604+
mean time: 3.491 ns (0.00% GC)
605+
maximum time: 4.998 ns (0.00% GC)
606+
--------------
607+
samples: 10000
608+
evals/sample: 1000
609+
```
610+
"""
611+
@inline preserve_buffer(A::AbstractArray) = A
612+
@inline preserve_buffer(A::SubArray) = preserve_buffer(parent(A))
613+
@inline preserve_buffer(A::PermutedDimsArray) = preserve_buffer(parent(A))
614+
@inline preserve_buffer(A::Union{Transpose,Adjoint}) = preserve_buffer(parent(A))
615+
@inline preserve_buffer(x) = x
616+
554617
function gc_preserve(ls::LoopSet, q::Expr)
555618
length(ls.opdict) == 0 && return q
556-
gcp = Expr(:macrocall, Expr(:(.), :GC, QuoteNode(Symbol("@preserve"))), LineNumberNode(@__LINE__, Symbol(@__FILE__)))
619+
q2 = Expr(:block)
620+
gcp = Expr(:gc_preserve, q)
621+
# gcp = Expr(:macrocall, Expr(:(.), :GC, QuoteNode(Symbol("@preserve"))), LineNumberNode(@__LINE__, Symbol(@__FILE__)))
557622
for array ls.includedactualarrays
558-
push!(gcp.args, array)
623+
pb = gensym(array);
624+
push!(q2.args, Expr(:(=), pb, Expr(:call, lv(:preserve_buffer), array)))
625+
push!(gcp.args, pb)
559626
end
560627
q.head === :block && push!(q.args, nothing)
561-
push!(gcp.args, q)
562-
Expr(:block, gcp)
628+
# push!(gcp.args, q)
629+
push!(q2.args, gcp)
630+
q2
631+
# Expr(:block, gcp)
563632
end
633+
634+
564635
function determine_eltype(ls::LoopSet)
565636
if length(ls.includedactualarrays) == 0
566637
return Expr(:call, :typeof, 0)

0 commit comments

Comments
 (0)