@@ -172,16 +172,17 @@ Return the `p`th percentile of a collection `x`, i.e. `quantile(x, p / 100)`.
172172"""
173173percentile (x, p) = quantile (x, p * 0.01 )
174174
175+ # TODO : move to same place as other quantile methods
175176"""
176- nquantile (x, n::Integer)
177+ quantile (x, n::Integer)
177178
178179Return the n-quantiles of collection `x`, i.e. the values which
179180partition `v` into `n` subsets of nearly equal size.
180181
181- Equivalent to `quantile(x, [0:n]/n)`. For example, `nquantiles (x, 5)`
182+ Equivalent to `quantile(x, [0:n]/n)`. For example, `quantile (x, 5)`
182183returns a vector of quantiles, respectively at `[0.0, 0.2, 0.4, 0.6, 0.8, 1.0]`.
183184"""
184- nquantile (x, n:: Integer ) = quantile (x, (0 : n)/ n)
185+ quantile (x, n:: Integer ) = quantile (x, (0 : n)/ n)
185186
186187
187188# ############################
@@ -199,16 +200,15 @@ The minimum and maximum of `x` are computed in one pass using `extrema`.
199200"""
200201span (x) = ((a, b) = extrema (x); a: b)
201202
202- # Variation coefficient : std / mean
203+ # Coefficient of variation : std / mean
203204"""
204205 variation(x, m=mean(x))
205206
206207Return the coefficient of variation of collection `x`, optionally specifying
207208a precomputed mean `m`. The coefficient of variation is the ratio of the
208209standard deviation to the mean.
209210"""
210- variation (x, m) = stdm (x, m) / m
211- variation (x) = ((m, s) = mean_and_std (x); s/ m)
211+ variation (x, m= mean (x)) = std (x, mean= m) / m
212212
213213# Standard error of the mean: std / sqrt(len)
214214# Code taken from var in the Statistics stdlib module
@@ -221,34 +221,13 @@ realXcY(x::Complex, y::Complex) = real(x)*real(y) + imag(x)*imag(y)
221221 sem(x)
222222
223223Return the standard error of the mean of collection `x`,
224- i.e. `sqrt(var( x, corrected=true) / length(x))`.
224+ i.e. `std( x, corrected=true) / sqrt( length(x))`.
225225"""
226226function sem (x)
227- y = iterate (x)
228- if y === nothing
229- T = eltype (x)
230- # Return the NaN of the type that we would get, had this collection
231- # contained any elements (this is consistent with std)
232- return oftype (sqrt ((abs2 (zero (T)) + abs2 (zero (T)))/ 2 ), NaN )
233- end
234- count = 1
235- value, state = y
236- y = iterate (x, state)
237- # Use Welford algorithm as seen in (among other places)
238- # Knuth's TAOCP, Vol 2, page 232, 3rd edition.
239- M = value / 1
240- S = real (zero (M))
241- while y != = nothing
242- value, state = y
243- y = iterate (x, state)
244- count += 1
245- new_M = M + (value - M) / count
246- S = S + realXcY (value - M, value - new_M)
247- M = new_M
248- end
249- var = S / (count - 1 )
250- return sqrt (var/ count)
227+ s, count = _sumsq (iterable, mean)
228+ sqrt ((s / (count - 1 )) / count)
251229end
230+ sem (x:: AbstractArray ) = sqrt (var (x, corrected= true ) / length (x))
252231
253232# Median absolute deviation
254233@irrational mad_constant 1.4826022185056018 BigFloat (" 1.482602218505601860547076529360423431326703202590312896536266275245674447622701" )
@@ -355,7 +334,7 @@ matrix of `X`.
355334genvar (X:: AbstractMatrix ) = size (X, 2 ) == 1 ? var (vec (X)) : det (cov (X))
356335genvar (itr) = var (itr)
357336
358- # Total variation
337+ # Total variance
359338"""
360339 totalvar(X)
361340
@@ -367,114 +346,6 @@ of the covariance matrix of `X`.
367346totalvar (X:: AbstractMatrix ) = sum (var (X, dims= 1 ))
368347totalvar (itr) = var (itr)
369348
370- # ############################
371- #
372- # Z-scores
373- #
374- # ############################
375-
376- function _zscore! (Z:: AbstractArray , X:: AbstractArray , μ:: Real , σ:: Real )
377- # Z and X are assumed to have the same size
378- iσ = inv (σ)
379- if μ == zero (μ)
380- for i = 1 : length (X)
381- @inbounds Z[i] = X[i] * iσ
382- end
383- else
384- for i = 1 : length (X)
385- @inbounds Z[i] = (X[i] - μ) * iσ
386- end
387- end
388- return Z
389- end
390-
391- @generated function _zscore! (Z:: AbstractArray{S,N} , X:: AbstractArray{T,N} ,
392- μ:: AbstractArray , σ:: AbstractArray ) where {S,T,N}
393- quote
394- # Z and X are assumed to have the same size
395- # μ and σ are assumed to have the same size, that is compatible with size(X)
396- siz1 = size (X, 1 )
397- @nextract $ N ud d-> size (μ, d)
398- if size (μ, 1 ) == 1 && siz1 > 1
399- @nloops $ N i d-> (d> 1 ? (1 : size (X,d)) : (1 : 1 )) d-> (j_d = ud_d == 1 ? 1 : i_d) begin
400- v = (@nref $ N μ j)
401- c = inv (@nref $ N σ j)
402- for i_1 = 1 : siz1
403- (@nref $ N Z i) = ((@nref $ N X i) - v) * c
404- end
405- end
406- else
407- @nloops $ N i X d-> (j_d = ud_d == 1 ? 1 : i_d) begin
408- (@nref $ N Z i) = ((@nref $ N X i) - (@nref $ N μ j)) / (@nref $ N σ j)
409- end
410- end
411- return Z
412- end
413- end
414-
415- function _zscore_chksize (X:: AbstractArray , μ:: AbstractArray , σ:: AbstractArray )
416- size (μ) == size (σ) || throw (DimensionMismatch (" μ and σ should have the same size." ))
417- for i= 1 : ndims (X)
418- dμ_i = size (μ,i)
419- (dμ_i == 1 || dμ_i == size (X,i)) || throw (DimensionMismatch (" X and μ have incompatible sizes." ))
420- end
421- end
422-
423-
424- """
425- zscore!([Z], X, μ, σ)
426-
427- Compute the z-scores of an array `X` with mean `μ` and standard deviation `σ`.
428- z-scores are the signed number of standard deviations above the mean that an
429- observation lies, i.e. ``(x - μ) / σ``.
430-
431- If a destination array `Z` is provided, the scores are stored
432- in `Z` and it must have the same shape as `X`. Otherwise `X` is overwritten.
433- """
434- function zscore! (Z:: AbstractArray{ZT} , X:: AbstractArray{T} , μ:: Real , σ:: Real ) where {ZT<: AbstractFloat ,T<: Real }
435- size (Z) == size (X) || throw (DimensionMismatch (" Z and X must have the same size." ))
436- _zscore! (Z, X, μ, σ)
437- end
438-
439- function zscore! (Z:: AbstractArray{<:AbstractFloat} , X:: AbstractArray{<:Real} ,
440- μ:: AbstractArray{<:Real} , σ:: AbstractArray{<:Real} )
441- size (Z) == size (X) || throw (DimensionMismatch (" Z and X must have the same size." ))
442- _zscore_chksize (X, μ, σ)
443- _zscore! (Z, X, μ, σ)
444- end
445-
446- zscore! (X:: AbstractArray{<:AbstractFloat} , μ:: Real , σ:: Real ) = _zscore! (X, X, μ, σ)
447-
448- zscore! (X:: AbstractArray{<:AbstractFloat} , μ:: AbstractArray{<:Real} , σ:: AbstractArray{<:Real} ) =
449- (_zscore_chksize (X, μ, σ); _zscore! (X, X, μ, σ))
450-
451-
452- """
453- zscore(X, [μ, σ])
454-
455- Compute the z-scores of `X`, optionally specifying a precomputed mean `μ` and
456- standard deviation `σ`. z-scores are the signed number of standard deviations
457- above the mean that an observation lies, i.e. ``(x - μ) / σ``.
458-
459- `μ` and `σ` should be both scalars or both arrays. The computation is broadcasting.
460- In particular, when `μ` and `σ` are arrays, they should have the same size, and
461- `size(μ, i) == 1 || size(μ, i) == size(X, i)` for each dimension.
462- """
463- function zscore (X:: AbstractArray{T} , μ:: Real , σ:: Real ) where T<: Real
464- ZT = typeof ((zero (T) - zero (μ)) / one (σ))
465- _zscore! (Array {ZT} (undef, size (X)), X, μ, σ)
466- end
467-
468- function zscore (X:: AbstractArray{T} , μ:: AbstractArray{U} , σ:: AbstractArray{S} ) where {T<: Real ,U<: Real ,S<: Real }
469- _zscore_chksize (X, μ, σ)
470- ZT = typeof ((zero (T) - zero (U)) / one (S))
471- _zscore! (Array {ZT} (undef, size (X)), X, μ, σ)
472- end
473-
474- zscore (X:: AbstractArray{<:Real} ) = ((μ, σ) = mean_and_std (X); zscore (X, μ, σ))
475- zscore (X:: AbstractArray{<:Real} , dim:: Int ) = ((μ, σ) = mean_and_std (X, dim); zscore (X, μ, σ))
476-
477-
478349
479350# ############################
480351#
@@ -521,7 +392,7 @@ function renyientropy(p::AbstractArray{T}, α::Real) where T<:Real
521392 end
522393 end
523394 s = s / scale
524- elseif ( isinf (α) )
395+ elseif isinf (α)
525396 s = - log (maximum (p))
526397 else # a normal Rényi entropy
527398 for i = 1 : length (p)
@@ -586,7 +457,7 @@ kldivergence(p::AbstractArray{T}, q::AbstractArray{T}, b::Real) where {T<:Real}
586457
587458# ############################
588459#
589- # summary
460+ # Summary Statistics
590461#
591462# ############################
592463
@@ -599,17 +470,18 @@ struct SummaryStats{T<:Union{AbstractFloat,Missing}}
599470 max:: T
600471 nobs:: Int
601472 nmiss:: Int
473+ isnumeric:: Bool
602474end
603475
604476
605477"""
606- summarystats (a)
478+ describe (a)
607479
608480Compute summary statistics for a real-valued array `a`. Returns a
609481`SummaryStats` object containing the mean, minimum, 25th percentile,
610482median, 75th percentile, and maxmimum.
611483"""
612- function summarystats (a:: AbstractArray{T} ) where T<: Union{Real,Missing}
484+ function describe (a:: AbstractArray{T} ) where T<: Union{Real,Missing}
613485 # `mean` doesn't fail on empty input but rather returns `NaN`, so we can use the
614486 # return type to populate the `SummaryStats` structure.
615487 s = T >: Missing ? collect (skipmissing (a)) : a
@@ -624,14 +496,20 @@ function summarystats(a::AbstractArray{T}) where T<:Union{Real,Missing}
624496 else
625497 quantile (s, [0.00 , 0.25 , 0.50 , 0.75 , 1.00 ])
626498 end
627- SummaryStats {R} (m, qs... , n, n - ns)
499+ SummaryStats {R} (m, qs... , n, n - ns, true )
500+ end
501+
502+ function describe (a:: AbstractArray{T} ) where T
503+ nmiss = T >: Missing ? count (ismissing, a) : 0
504+ SummaryStats {R} (NaN , NaN , NaN , NaN , NaN , length (a), nmiss, false )
628505end
629506
630507function Base. show (io:: IO , ss:: SummaryStats )
631- println (io, " Summary Stats :" )
508+ println (io, " Summary Statistics :" )
632509 @printf (io, " Length: %i\n " , ss. nobs)
633510 ss. nobs > 0 || return
634511 @printf (io, " Missing Count: %i\n " , ss. nmiss)
512+ ss. isnumeric || return
635513 @printf (io, " Mean: %.6f\n " , ss. mean)
636514 @printf (io, " Minimum: %.6f\n " , ss. min)
637515 @printf (io, " 1st Quartile: %.6f\n " , ss. q25)
0 commit comments