Merge pull request #61 from JamesWrigley/alloc-helpers

brenhinkeller · web-flow · commit 9ebbe4b72b92 · 2026-02-10T12:54:17.000-05:00
Allocation helpers and `movmean!()`
diff --git a/README.md b/README.md
@@ -14,12 +14,14 @@ See also [JuliaSIMD/VectorizedStatistics.jl](https://github.com/JuliaSIMD/Vector
 Summary statistics exported by NaNStatistics are generally named the same as their normal counterparts, but with "nan" in front of the name, similar to the Matlab and NumPy conventions. Options include:
 ##### Reductions
 * `nansum`
+* `nansum!`
 * `nanminimum`
 * `nanmaximum`
 * `nanextrema`
 
 ##### Measures of central tendency
 * `nanmean` &emsp; arithmetic mean, ignoring `NaN`s
+* `nanmean!`&emsp; as `nanmean`, but writes to a given output array
 * `nanmedian` &emsp; median, ignoring `NaN`s
 * `nanmedian!` &emsp; as `nanmedian` but quicksorts in-place for efficiency
 
@@ -127,7 +129,7 @@ julia> @btime nanbinmean($x,$y,xmin,xmax,nbins)
  90.30275863080671
 ```
 ### Other functions
-* `movmean`
+* `movmean` / `movmean!`
 A simple moving average function, which can operate in 1D or 2D, ignoring NaNs.
 ```
 julia> A = rand(1:10, 4,4)
@@ -148,6 +150,15 @@ julia> movmean(A, 3)
  * `nanstandardize` / `nanstandardize!`
  De-mean and set to unit variance
 
+### Allocation functions
+To use mutating functions like `nanmean!` you can call the appropriate
+allocation function and get back an array that can be passed as the output
+argument.
+
+* `allocate_nanmean`
+* `allocate_nansum`
+* `allocate_movmean`
+
 ### DimensionalData support
 Almost all functions support
 [DimArrays](https://rafaqz.github.io/DimensionalData.jl/stable/dimarrays) and
diff --git a/ext/NaNStatisticsDimensionalDataExt.jl b/ext/NaNStatisticsDimensionalDataExt.jl
@@ -78,4 +78,8 @@ function NaNStatistics.movmean(A::DD.AbstractDimVecOrMat, n::Number)
     rebuild(A, data)
 end
 
+function NaNStatistics._allocate_reduce(Tₒ, A::AbstractDimArray, dims)
+    rebuild(A, NaNStatistics._allocate_reduce(Tₒ, parent(A), dims), DD.reducedims(A, dims))
+end
+
 end
diff --git a/src/ArrayStats/ArrayStats.jl b/src/ArrayStats/ArrayStats.jl
@@ -498,6 +498,17 @@
 
 ## -- Moving average, ignoring NaNs
 
+    """
+        allocate_movmean(x::AbstractVecOrMat)
+
+    Allocate an array of the right type and shape to pass as the output parameter to
+    `movmean!()` for the given `x`.
+    """
+    function allocate_movmean(x::AbstractVecOrMat{T}) where T
+        mean_type = Base.promote_op(/, T, Int64)
+        similar(x, mean_type)
+    end
+
     """
     ```julia
     movmean(x::AbstractVecOrMat, n::Number)
@@ -509,19 +520,24 @@
     if `n` is not an odd integer, the first odd integer greater than `n` will be
     used instead.
     """
-    function movmean(x::AbstractVector{T}, n::Number) where T
-        mean_type = Base.promote_op(/, T, Int64)
-        m = Array{mean_type}(undef, size(x))
+    movmean(x::AbstractVector, n::Number) = movmean!(allocate_movmean(x), x, n)
+
+    """
+        movmean!(out, x, win_or_n::Union{Number, Tuple})
+
+    Non-allocating version of `movmean()`. Generate the `out` parameter with
+    `allocate_movmean(x)`.
+    """
+    function movmean!(out::AbstractVector, x::AbstractVector{T}, n::Number) where T
         δ = ceil(Int, (n-1)/2)
         @inbounds for i ∈ eachindex(x)
             iₗ = max(i-δ, firstindex(x))
             iᵤ = min(i+δ, lastindex(x))
-            m[i] = nanmean(view(x, iₗ:iᵤ))
+            out[i] = nanmean(view(x, iₗ:iᵤ))
         end
-        return m
+        return out
     end
 
-
     """
         movmean(x::AbstractVector{T}, win::Tuple{Int, Int}=(1, 1); skip_centre=false) where {T<:Real}
     
@@ -542,13 +558,13 @@
     movmean(x, win)  # returns [1.5, 2.0, 3.0, 4.0, 4.5]
     ```
     """
-    function movmean(x::AbstractVector{T}, win::Tuple{Int, Int}=(1, 1); 
+    movmean(x::AbstractVector, win::Tuple{Int, Int}=(1, 1); skip_centre=false) = movmean!(allocate_movmean(x), x, win; skip_centre)
+
+    function movmean!(out::AbstractVector, x::AbstractVector{T}, win::Tuple{Int, Int}=(1, 1);
         skip_centre=false) where {T<:Real}
         win_left, win_right = win
         
-        FT = Base.promote_op(/, T, Int64)
-        z = similar(x, FT)
-        ∑ = ∅ = FT(0)
+        ∑ = ∅ = zero(eltype(out))
         ∑w = ∅w = 0
 
         @inbounds @simd for i ∈ eachindex(x)
@@ -563,14 +579,14 @@
                 ∑ += ifelse(notnan, xᵢ, ∅)
                 ∑w += ifelse(notnan, 1, 0)
             end
-            z[i] = ∑ / ∑w
+            out[i] = ∑ / ∑w
         end
-        z
+        return out
     end
 
-    function movmean(x::AbstractMatrix{T}, n::Number) where T
-        mean_type = Base.promote_op(/, T, Int64)
-        m = Array{mean_type}(undef, size(x))
+    movmean(x::AbstractMatrix, n::Number) = movmean!(allocate_movmean(x), x, n)
+
+    function movmean!(out::AbstractMatrix, x::AbstractMatrix{T}, n::Number) where T
         δ = ceil(Int, (n-1)/2)
         𝐼 = repeat((firstindex(x,1):lastindex(x,1)), 1, size(x,2))
         𝐽 = repeat((firstindex(x,2):lastindex(x,2))', size(x,1), 1)
@@ -581,11 +597,11 @@
             j = 𝐽[k]
             jₗ = max(j-δ, firstindex(x,2))
             jᵤ = min(j+δ, lastindex(x,2))
-            m[i,j] = nanmean(view(x, iₗ:iᵤ, jₗ:jᵤ))
+            out[i,j] = nanmean(view(x, iₗ:iᵤ, jₗ:jᵤ))
         end
-        return m
+        return out
     end
-    export movmean
+    export movmean, movmean!
 
 ## --- Internal helpers
 
@@ -625,4 +641,10 @@ function _normalize_dims(dims)
     end
 end
 
+function _allocate_reduce(Tₒ, A, dims)
+    output_size = _normalize_dims(dims)
+    sₒ = _reduced_size(A, output_size)
+    similar(A, Tₒ, sₒ)
+end
+
 ## --- End of File
diff --git a/src/ArrayStats/nanmean.jl b/src/ArrayStats/nanmean.jl
@@ -3,6 +3,22 @@ NANMEAN_SIZE_THRESHOLD::Union{Int, Symbol} = 2^20
 
 get_size_threshold(x::Integer) = x
 
+"""
+    allocate_nanmean(A::AbstractArray, dims)
+
+Allocates an array that can be passed as the output array to `nanmean!()` for
+the given `A`.
+
+Note that you should prefer using the output of `nanmean!()` rather than the
+array returned from this function because `nanmean!()` will drop dimensions if
+`dim` is used (but that's a zero-copy operation, the underlying array is
+shared).
+"""
+function allocate_nanmean(A::AbstractArray{T}, dims) where T
+    Tₒ = Base.promote_op(/, T, Int)
+    _allocate_reduce(Tₒ, A, dims)
+end
+
 """
 ```julia
 nanmean(A; dims, size_threshold)
@@ -73,13 +89,7 @@ export nanmean!
 _nanmean(A, dims::Int, st) = _nanmean(A, (dims,), st)
 
 # Reduce some dims
-function _nanmean(A::AbstractArray{T,N}, dims::Tuple, st) where {T,N}
-    sₒ = _reduced_size(A, dims)
-    Tₒ = Base.promote_op(/, T, Int)
-    B = similar(A, Tₒ, sₒ)
-
-    _nanmean!(B, A, dims, st)
-end
+_nanmean(A::AbstractArray, dims::Tuple, st) = _nanmean!(allocate_nanmean(A, dims), A, dims, st)
 
 function _nanmean!(B, A, dims, st)
     if 1 in dims || sizeof(A) < get_size_threshold(st)
diff --git a/src/ArrayStats/nansum.jl b/src/ArrayStats/nansum.jl
@@ -1,3 +1,16 @@
+"""
+    allocate_nansum(A::AbstractArray, dims)
+
+Allocates an array that can be passed as the output array to `nanmean!()` for
+the given `A`.
+
+See the `allocate_nanmean()` docstring for info on using the returned array.
+"""
+function allocate_nansum(A::AbstractArray{T}, dims) where T
+    Tₒ = T <: Integer ? Base.promote_op(+, T, Int) : T
+    _allocate_reduce(Tₒ, A, dims)
+end
+
 """
 ```julia
 nansum(A; dims)
@@ -60,11 +73,7 @@ export nansum!
 _nansum(A, dims::Int) = _nansum(A, (dims,))
 
 # Reduce some dims
-function _nansum(A::AbstractArray{T,N}, dims::Tuple) where {T,N}
-    sₒ = _reduced_size(A, dims)
-    B = similar(A, T, sₒ)
-    _nansum!(B, A, dims)
-end
+_nansum(A::AbstractArray, dims::Tuple) = _nansum!(allocate_nansum(A, dims), A, dims)
 
 function _nansum!(B, A, dims::Tuple)
     if 1 in dims
@@ -77,16 +86,6 @@ function _nansum!(B, A, dims::Tuple)
     end
 end
 
-function _nansum(A::AbstractArray{T,N}, dims::Tuple) where {T<:Integer,N}
-    sᵢ = size(A)
-    sₒ = ntuple(Val{N}()) do d
-        ifelse(d ∈ dims, 1, sᵢ[d])
-    end
-    Tₒ = Base.promote_op(+, T, Int)
-    B = similar(A, Tₒ, sₒ)
-    _nansum!(B, A, dims)
-end
-
 # Reduce all the dims!
 function _nansum(A, ::Colon)
     Tₒ = eltype(A)
diff --git a/test/testArrayStats.jl b/test/testArrayStats.jl
@@ -570,5 +570,12 @@
     @test nanmedian(x, dim=(1, 3)) == ones(100)
     @test nanmedian(x, dim=(1, 2, 3)) == fill(1.0)
 
+## --- Allocation functions
+
+    @test size(NaNStatistics.allocate_nanmean(rand(10, 10), 1)) == (1, 10)
+    @test NaNStatistics.allocate_nanmean(rand(Float32, 10, 10), 1) isa Matrix{Float32}
+    @test NaNStatistics.allocate_nansum(rand(10, 10), 1) isa Matrix{Float64}
+    @test NaNStatistics.allocate_nansum(rand(Int, 10, 10), 1) isa Matrix{Int}
+    @test NaNStatistics.allocate_movmean(rand(10)) isa Vector{Float64}
 
 ## --- End of File
diff --git a/test/testDimensionalDataExt.jl b/test/testDimensionalDataExt.jl
@@ -89,3 +89,19 @@ end
     res = nanrange(x; dim=:foo)
     @test res == nanrange(parent(x); dim=1)
 end
+
+@testset "Allocation helpers" begin
+    data = rand(X(5), Y(11:15))
+
+    # Just test allocate_nanmean() since the other reduction allocators all go
+    # through _allocate_reduce().
+    out = NaNStatistics.allocate_nanmean(data, 1)
+    @test size(out) == (1, 5)
+    @test out isa DimMatrix{Float64}
+    @test lookup(out, Y) == lookup(data, Y)
+
+    # Test allocate_movmean() explicitly since it doesn't go through _allocate_reduce()
+    out = NaNStatistics.allocate_movmean(data)
+    @test size(out) == size(data)
+    @test out isa DimMatrix{Float64}
+end