@@ -551,16 +551,87 @@ function reduce_expr!(q::Expr, ls::LoopSet, U::Int)
551
551
end
552
552
end
553
553
end
554
+
555
+ """
556
+ For structs wrapping arrays, using `GC.@preserve` can trigger heap allocations.
557
+ `preserve_buffer` attempts to extract the heap-allocated part. Isolating it by itself
558
+ will often allow the heap allocations to be elided. For example:
559
+
560
+ ```julia
561
+ julia> using StaticArrays, BenchmarkTools
562
+
563
+ julia> # Needed until a release is made featuring https://github.com/JuliaArrays/StaticArrays.jl/commit/a0179213b741c0feebd2fc6a1101a7358a90caed
564
+ Base.elsize(::Type{<:MArray{S,T}}) where {S,T} = sizeof(T)
565
+
566
+ julia> @noinline foo(A) = unsafe_load(A,1)
567
+ foo (generic function with 1 method)
568
+
569
+ julia> function alloc_test_1()
570
+ A = view(MMatrix{8,8,Float64}(undef), 2:5, 3:7)
571
+ A[begin] = 4
572
+ GC.@preserve A foo(pointer(A))
573
+ end
574
+ alloc_test_1 (generic function with 1 method)
575
+
576
+ julia> function alloc_test_2()
577
+ A = view(MMatrix{8,8,Float64}(undef), 2:5, 3:7)
578
+ A[begin] = 4
579
+ pb = parent(A) # or `LoopVectorization.preserve_buffer(A)`; `perserve_buffer(::SubArray)` calls `parent`
580
+ GC.@preserve pb foo(pointer(A))
581
+ end
582
+ alloc_test_2 (generic function with 1 method)
583
+
584
+ julia> @benchmark alloc_test_1()
585
+ BenchmarkTools.Trial:
586
+ memory estimate: 544 bytes
587
+ allocs estimate: 1
588
+ --------------
589
+ minimum time: 17.227 ns (0.00% GC)
590
+ median time: 21.352 ns (0.00% GC)
591
+ mean time: 26.151 ns (13.33% GC)
592
+ maximum time: 571.130 ns (78.53% GC)
593
+ --------------
594
+ samples: 10000
595
+ evals/sample: 998
596
+
597
+ julia> @benchmark alloc_test_2()
598
+ BenchmarkTools.Trial:
599
+ memory estimate: 0 bytes
600
+ allocs estimate: 0
601
+ --------------
602
+ minimum time: 3.275 ns (0.00% GC)
603
+ median time: 3.493 ns (0.00% GC)
604
+ mean time: 3.491 ns (0.00% GC)
605
+ maximum time: 4.998 ns (0.00% GC)
606
+ --------------
607
+ samples: 10000
608
+ evals/sample: 1000
609
+ ```
610
+ """
611
+ @inline preserve_buffer (A:: AbstractArray ) = A
612
+ @inline preserve_buffer (A:: SubArray ) = preserve_buffer (parent (A))
613
+ @inline preserve_buffer (A:: PermutedDimsArray ) = preserve_buffer (parent (A))
614
+ @inline preserve_buffer (A:: Union{Transpose,Adjoint} ) = preserve_buffer (parent (A))
615
+ @inline preserve_buffer (x) = x
616
+
554
617
function gc_preserve (ls:: LoopSet , q:: Expr )
555
618
length (ls. opdict) == 0 && return q
556
- gcp = Expr (:macrocall , Expr (:(.), :GC , QuoteNode (Symbol (" @preserve" ))), LineNumberNode (@__LINE__ , Symbol (@__FILE__ )))
619
+ q2 = Expr (:block )
620
+ gcp = Expr (:gc_preserve , q)
621
+ # gcp = Expr(:macrocall, Expr(:(.), :GC, QuoteNode(Symbol("@preserve"))), LineNumberNode(@__LINE__, Symbol(@__FILE__)))
557
622
for array ∈ ls. includedactualarrays
558
- push! (gcp. args, array)
623
+ pb = gensym (array);
624
+ push! (q2. args, Expr (:(= ), pb, Expr (:call , lv (:preserve_buffer ), array)))
625
+ push! (gcp. args, pb)
559
626
end
560
627
q. head === :block && push! (q. args, nothing )
561
- push! (gcp. args, q)
562
- Expr (:block , gcp)
628
+ # push!(gcp.args, q)
629
+ push! (q2. args, gcp)
630
+ q2
631
+ # Expr(:block, gcp)
563
632
end
633
+
634
+
564
635
function determine_eltype (ls:: LoopSet )
565
636
if length (ls. includedactualarrays) == 0
566
637
return Expr (:call , :typeof , 0 )
0 commit comments