Skip to content

Commit 82190d1

Browse files
authored
summarystats improvements (#858)
- Add standard deviation - Update documentation - Add tests
1 parent a39f189 commit 82190d1

File tree

3 files changed

+19
-3
lines changed

3 files changed

+19
-3
lines changed

src/scalarstats.jl

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,7 @@ kldivergence(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}, b::Real) =
857857

858858
struct SummaryStats{T<:Union{AbstractFloat,Missing}}
859859
mean::T
860+
sd::T
860861
min::T
861862
q25::T
862863
median::T
@@ -871,14 +872,16 @@ end
871872
summarystats(a)
872873
873874
Compute summary statistics for a real-valued array `a`. Returns a
874-
`SummaryStats` object containing the mean, minimum, 25th percentile,
875-
median, 75th percentile, and maxmimum.
875+
`SummaryStats` object containing the number of observations,
876+
number of missing observations, standard deviation, mean, minimum,
877+
25th percentile, median, 75th percentile, and maximum.
876878
"""
877879
function summarystats(a::AbstractArray{T}) where T<:Union{Real,Missing}
878880
# `mean` doesn't fail on empty input but rather returns `NaN`, so we can use the
879881
# return type to populate the `SummaryStats` structure.
880882
s = T >: Missing ? collect(skipmissing(a)) : a
881883
m = mean(s)
884+
stdev = std(s, mean=m)
882885
R = typeof(m)
883886
n = length(a)
884887
ns = length(s)
@@ -889,7 +892,7 @@ function summarystats(a::AbstractArray{T}) where T<:Union{Real,Missing}
889892
else
890893
quantile(s, [0.00, 0.25, 0.50, 0.75, 1.00])
891894
end
892-
SummaryStats{R}(m, qs..., n, n - ns)
895+
SummaryStats{R}(m, stdev, qs..., n, n - ns)
893896
end
894897

895898
function Base.show(io::IO, ss::SummaryStats)
@@ -898,6 +901,7 @@ function Base.show(io::IO, ss::SummaryStats)
898901
ss.nobs > 0 || return
899902
@printf(io, "Missing Count: %i\n", ss.nmiss)
900903
@printf(io, "Mean: %.6f\n", ss.mean)
904+
@printf(io, "Std. Deviation: %.6f\n", ss.sd)
901905
@printf(io, "Minimum: %.6f\n", ss.min)
902906
@printf(io, "1st Quartile: %.6f\n", ss.q25)
903907
@printf(io, "Median: %.6f\n", ss.median)

test/misc.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ describe(io, collect(1:10))
5555
Length: 10
5656
Missing Count: 0
5757
Mean: 5.500000
58+
Std. Deviation: 3.027650
5859
Minimum: 1.000000
5960
1st Quartile: 3.250000
6061
Median: 5.500000
@@ -69,6 +70,7 @@ describe(io, Union{Float32,Missing}[1.0, 4.5, missing, missing, 33.1])
6970
Length: 5
7071
Missing Count: 2
7172
Mean: 12.866666
73+
Std. Deviation: 17.609751
7274
Minimum: 1.000000
7375
1st Quartile: 2.750000
7476
Median: 4.500000

test/scalarstats.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,30 +339,39 @@ s = summarystats(1:5)
339339
@test isa(s, StatsBase.SummaryStats)
340340
@test s.min == 1.0
341341
@test s.max == 5.0
342+
@test s.nobs == 5
343+
@test s.nmiss == 0
342344
@test s.mean 3.0
343345
@test s.median 3.0
344346
@test s.q25 2.0
345347
@test s.q75 4.0
348+
@test s.sd 1.5811388300841898
346349

347350
# Issue #631
348351
s = summarystats([-2, -1, 0, 1, 2, missing])
349352
@test isa(s, StatsBase.SummaryStats)
350353
@test s.min == -2.0
351354
@test s.max == 2.0
355+
@test s.nobs == 6
356+
@test s.nmiss == 1
352357
@test s.mean 0.0
353358
@test s.median 0.0
354359
@test s.q25 -1.0
355360
@test s.q75 +1.0
361+
@test s.sd 1.5811388300841898
356362

357363
# Issue #631
358364
s = summarystats(zeros(10))
359365
@test isa(s, StatsBase.SummaryStats)
360366
@test s.min == 0.0
361367
@test s.max == 0.0
368+
@test s.nobs == 10
369+
@test s.nmiss == 0
362370
@test s.mean 0.0
363371
@test s.median 0.0
364372
@test s.q25 0.0
365373
@test s.q75 0.0
374+
@test s.sd 0.0
366375

367376
# Issue #631
368377
s = summarystats(Union{Float64,Missing}[missing, missing])
@@ -371,3 +380,4 @@ s = summarystats(Union{Float64,Missing}[missing, missing])
371380
@test s.nmiss == 2
372381
@test isnan(s.mean)
373382
@test isnan(s.median)
383+
@test isnan(s.sd)

0 commit comments

Comments
 (0)