Skip to content

Commit 9ebbe4b

Browse files
Merge pull request #61 from JamesWrigley/alloc-helpers
Allocation helpers and `movmean!()`
2 parents 5ac0655 + 005c0c5 commit 9ebbe4b

File tree

7 files changed

+110
-41
lines changed

7 files changed

+110
-41
lines changed

README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@ See also [JuliaSIMD/VectorizedStatistics.jl](https://github.com/JuliaSIMD/Vector
1414
Summary statistics exported by NaNStatistics are generally named the same as their normal counterparts, but with "nan" in front of the name, similar to the Matlab and NumPy conventions. Options include:
1515
##### Reductions
1616
* `nansum`
17+
* `nansum!`
1718
* `nanminimum`
1819
* `nanmaximum`
1920
* `nanextrema`
2021

2122
##### Measures of central tendency
2223
* `nanmean`   arithmetic mean, ignoring `NaN`s
24+
* `nanmean!`  as `nanmean`, but writes to a given output array
2325
* `nanmedian`   median, ignoring `NaN`s
2426
* `nanmedian!`   as `nanmedian` but quicksorts in-place for efficiency
2527

@@ -127,7 +129,7 @@ julia> @btime nanbinmean($x,$y,xmin,xmax,nbins)
127129
90.30275863080671
128130
```
129131
### Other functions
130-
* `movmean`
132+
* `movmean` / `movmean!`
131133
A simple moving average function, which can operate in 1D or 2D, ignoring NaNs.
132134
```
133135
julia> A = rand(1:10, 4,4)
@@ -148,6 +150,15 @@ julia> movmean(A, 3)
148150
* `nanstandardize` / `nanstandardize!`
149151
De-mean and set to unit variance
150152

153+
### Allocation functions
154+
To use mutating functions like `nanmean!` you can call the appropriate
155+
allocation function and get back an array that can be passed as the output
156+
argument.
157+
158+
* `allocate_nanmean`
159+
* `allocate_nansum`
160+
* `allocate_movmean`
161+
151162
### DimensionalData support
152163
Almost all functions support
153164
[DimArrays](https://rafaqz.github.io/DimensionalData.jl/stable/dimarrays) and

ext/NaNStatisticsDimensionalDataExt.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,4 +78,8 @@ function NaNStatistics.movmean(A::DD.AbstractDimVecOrMat, n::Number)
7878
rebuild(A, data)
7979
end
8080

81+
function NaNStatistics._allocate_reduce(Tₒ, A::AbstractDimArray, dims)
82+
rebuild(A, NaNStatistics._allocate_reduce(Tₒ, parent(A), dims), DD.reducedims(A, dims))
83+
end
84+
8185
end

src/ArrayStats/ArrayStats.jl

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,17 @@
498498

499499
## -- Moving average, ignoring NaNs
500500

501+
"""
502+
allocate_movmean(x::AbstractVecOrMat)
503+
504+
Allocate an array of the right type and shape to pass as the output parameter to
505+
`movmean!()` for the given `x`.
506+
"""
507+
function allocate_movmean(x::AbstractVecOrMat{T}) where T
508+
mean_type = Base.promote_op(/, T, Int64)
509+
similar(x, mean_type)
510+
end
511+
501512
"""
502513
```julia
503514
movmean(x::AbstractVecOrMat, n::Number)
@@ -509,19 +520,24 @@
509520
if `n` is not an odd integer, the first odd integer greater than `n` will be
510521
used instead.
511522
"""
512-
function movmean(x::AbstractVector{T}, n::Number) where T
513-
mean_type = Base.promote_op(/, T, Int64)
514-
m = Array{mean_type}(undef, size(x))
523+
movmean(x::AbstractVector, n::Number) = movmean!(allocate_movmean(x), x, n)
524+
525+
"""
526+
movmean!(out, x, win_or_n::Union{Number, Tuple})
527+
528+
Non-allocating version of `movmean()`. Generate the `out` parameter with
529+
`allocate_movmean(x)`.
530+
"""
531+
function movmean!(out::AbstractVector, x::AbstractVector{T}, n::Number) where T
515532
δ = ceil(Int, (n-1)/2)
516533
@inbounds for i eachindex(x)
517534
iₗ = max(i-δ, firstindex(x))
518535
iᵤ = min(i+δ, lastindex(x))
519-
m[i] = nanmean(view(x, iₗ:iᵤ))
536+
out[i] = nanmean(view(x, iₗ:iᵤ))
520537
end
521-
return m
538+
return out
522539
end
523540

524-
525541
"""
526542
movmean(x::AbstractVector{T}, win::Tuple{Int, Int}=(1, 1); skip_centre=false) where {T<:Real}
527543
@@ -542,13 +558,13 @@
542558
movmean(x, win) # returns [1.5, 2.0, 3.0, 4.0, 4.5]
543559
```
544560
"""
545-
function movmean(x::AbstractVector{T}, win::Tuple{Int, Int}=(1, 1);
561+
movmean(x::AbstractVector, win::Tuple{Int, Int}=(1, 1); skip_centre=false) = movmean!(allocate_movmean(x), x, win; skip_centre)
562+
563+
function movmean!(out::AbstractVector, x::AbstractVector{T}, win::Tuple{Int, Int}=(1, 1);
546564
skip_centre=false) where {T<:Real}
547565
win_left, win_right = win
548566

549-
FT = Base.promote_op(/, T, Int64)
550-
z = similar(x, FT)
551-
== FT(0)
567+
== zero(eltype(out))
552568
∑w = ∅w = 0
553569

554570
@inbounds @simd for i eachindex(x)
@@ -563,14 +579,14 @@
563579
+= ifelse(notnan, xᵢ, ∅)
564580
∑w += ifelse(notnan, 1, 0)
565581
end
566-
z[i] =/ ∑w
582+
out[i] =/ ∑w
567583
end
568-
z
584+
return out
569585
end
570586

571-
function movmean(x::AbstractMatrix{T}, n::Number) where T
572-
mean_type = Base.promote_op(/, T, Int64)
573-
m = Array{mean_type}(undef, size(x))
587+
movmean(x::AbstractMatrix, n::Number) = movmean!(allocate_movmean(x), x, n)
588+
589+
function movmean!(out::AbstractMatrix, x::AbstractMatrix{T}, n::Number) where T
574590
δ = ceil(Int, (n-1)/2)
575591
𝐼 = repeat((firstindex(x,1):lastindex(x,1)), 1, size(x,2))
576592
𝐽 = repeat((firstindex(x,2):lastindex(x,2))', size(x,1), 1)
@@ -581,11 +597,11 @@
581597
j = 𝐽[k]
582598
jₗ = max(j-δ, firstindex(x,2))
583599
jᵤ = min(j+δ, lastindex(x,2))
584-
m[i,j] = nanmean(view(x, iₗ:iᵤ, jₗ:jᵤ))
600+
out[i,j] = nanmean(view(x, iₗ:iᵤ, jₗ:jᵤ))
585601
end
586-
return m
602+
return out
587603
end
588-
export movmean
604+
export movmean, movmean!
589605

590606
## --- Internal helpers
591607

@@ -625,4 +641,10 @@ function _normalize_dims(dims)
625641
end
626642
end
627643

644+
function _allocate_reduce(Tₒ, A, dims)
645+
output_size = _normalize_dims(dims)
646+
sₒ = _reduced_size(A, output_size)
647+
similar(A, Tₒ, sₒ)
648+
end
649+
628650
## --- End of File

src/ArrayStats/nanmean.jl

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,22 @@ NANMEAN_SIZE_THRESHOLD::Union{Int, Symbol} = 2^20
33

44
get_size_threshold(x::Integer) = x
55

6+
"""
7+
allocate_nanmean(A::AbstractArray, dims)
8+
9+
Allocates an array that can be passed as the output array to `nanmean!()` for
10+
the given `A`.
11+
12+
Note that you should prefer using the output of `nanmean!()` rather than the
13+
array returned from this function because `nanmean!()` will drop dimensions if
14+
`dim` is used (but that's a zero-copy operation, the underlying array is
15+
shared).
16+
"""
17+
function allocate_nanmean(A::AbstractArray{T}, dims) where T
18+
Tₒ = Base.promote_op(/, T, Int)
19+
_allocate_reduce(Tₒ, A, dims)
20+
end
21+
622
"""
723
```julia
824
nanmean(A; dims, size_threshold)
@@ -73,13 +89,7 @@ export nanmean!
7389
_nanmean(A, dims::Int, st) = _nanmean(A, (dims,), st)
7490

7591
# Reduce some dims
76-
function _nanmean(A::AbstractArray{T,N}, dims::Tuple, st) where {T,N}
77-
sₒ = _reduced_size(A, dims)
78-
Tₒ = Base.promote_op(/, T, Int)
79-
B = similar(A, Tₒ, sₒ)
80-
81-
_nanmean!(B, A, dims, st)
82-
end
92+
_nanmean(A::AbstractArray, dims::Tuple, st) = _nanmean!(allocate_nanmean(A, dims), A, dims, st)
8393

8494
function _nanmean!(B, A, dims, st)
8595
if 1 in dims || sizeof(A) < get_size_threshold(st)

src/ArrayStats/nansum.jl

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
"""
2+
allocate_nansum(A::AbstractArray, dims)
3+
4+
Allocates an array that can be passed as the output array to `nanmean!()` for
5+
the given `A`.
6+
7+
See the `allocate_nanmean()` docstring for info on using the returned array.
8+
"""
9+
function allocate_nansum(A::AbstractArray{T}, dims) where T
10+
Tₒ = T <: Integer ? Base.promote_op(+, T, Int) : T
11+
_allocate_reduce(Tₒ, A, dims)
12+
end
13+
114
"""
215
```julia
316
nansum(A; dims)
@@ -60,11 +73,7 @@ export nansum!
6073
_nansum(A, dims::Int) = _nansum(A, (dims,))
6174

6275
# Reduce some dims
63-
function _nansum(A::AbstractArray{T,N}, dims::Tuple) where {T,N}
64-
sₒ = _reduced_size(A, dims)
65-
B = similar(A, T, sₒ)
66-
_nansum!(B, A, dims)
67-
end
76+
_nansum(A::AbstractArray, dims::Tuple) = _nansum!(allocate_nansum(A, dims), A, dims)
6877

6978
function _nansum!(B, A, dims::Tuple)
7079
if 1 in dims
@@ -77,16 +86,6 @@ function _nansum!(B, A, dims::Tuple)
7786
end
7887
end
7988

80-
function _nansum(A::AbstractArray{T,N}, dims::Tuple) where {T<:Integer,N}
81-
sᵢ = size(A)
82-
sₒ = ntuple(Val{N}()) do d
83-
ifelse(d dims, 1, sᵢ[d])
84-
end
85-
Tₒ = Base.promote_op(+, T, Int)
86-
B = similar(A, Tₒ, sₒ)
87-
_nansum!(B, A, dims)
88-
end
89-
9089
# Reduce all the dims!
9190
function _nansum(A, ::Colon)
9291
Tₒ = eltype(A)

test/testArrayStats.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,5 +570,12 @@
570570
@test nanmedian(x, dim=(1, 3)) == ones(100)
571571
@test nanmedian(x, dim=(1, 2, 3)) == fill(1.0)
572572

573+
## --- Allocation functions
574+
575+
@test size(NaNStatistics.allocate_nanmean(rand(10, 10), 1)) == (1, 10)
576+
@test NaNStatistics.allocate_nanmean(rand(Float32, 10, 10), 1) isa Matrix{Float32}
577+
@test NaNStatistics.allocate_nansum(rand(10, 10), 1) isa Matrix{Float64}
578+
@test NaNStatistics.allocate_nansum(rand(Int, 10, 10), 1) isa Matrix{Int}
579+
@test NaNStatistics.allocate_movmean(rand(10)) isa Vector{Float64}
573580

574581
## --- End of File

test/testDimensionalDataExt.jl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,19 @@ end
8989
res = nanrange(x; dim=:foo)
9090
@test res == nanrange(parent(x); dim=1)
9191
end
92+
93+
@testset "Allocation helpers" begin
94+
data = rand(X(5), Y(11:15))
95+
96+
# Just test allocate_nanmean() since the other reduction allocators all go
97+
# through _allocate_reduce().
98+
out = NaNStatistics.allocate_nanmean(data, 1)
99+
@test size(out) == (1, 5)
100+
@test out isa DimMatrix{Float64}
101+
@test lookup(out, Y) == lookup(data, Y)
102+
103+
# Test allocate_movmean() explicitly since it doesn't go through _allocate_reduce()
104+
out = NaNStatistics.allocate_movmean(data)
105+
@test size(out) == size(data)
106+
@test out isa DimMatrix{Float64}
107+
end

0 commit comments

Comments
 (0)