diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml new file mode 100644 index 000000000..9613e0542 --- /dev/null +++ b/.JuliaFormatter.toml @@ -0,0 +1 @@ +style = "yas" \ No newline at end of file diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 000000000..16a30e70e --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1 @@ +b816a8162b8d602a439da67eacca33e8b3961fdd diff --git a/.github/workflows/Format.yml b/.github/workflows/Format.yml new file mode 100644 index 000000000..7398499b4 --- /dev/null +++ b/.github/workflows/Format.yml @@ -0,0 +1,10 @@ +name: Format suggestions +on: + pull_request: +jobs: + code-style: + runs-on: ubuntu-latest + steps: + - uses: julia-actions/julia-format@v4 + with: + version: '2' diff --git a/docs/make.jl b/docs/make.jl index a737a3de4..504bb06ac 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -7,28 +7,24 @@ end DocMeta.setdocmeta!(StatsBase, :DocTestSetup, :(using StatsBase)) -makedocs( - sitename = "StatsBase.jl", - modules = [StatsBase, StatsAPI], - format = Documenter.HTML(assets = ["assets/favicon.ico"]), - pages = ["index.md", - "weights.md", - "scalarstats.md", - "robust.md", - "deviation.md", - "cov.md", - "counts.md", - "ranking.md", - "sampling.md", - "empirical.md", - "signalcorr.md", - "multivariate.md", - "misc.md", - "statmodels.md", - "transformations.md"], - checkdocs=:exports -) +makedocs(; sitename="StatsBase.jl", + modules=[StatsBase, StatsAPI], + format=Documenter.HTML(; assets=["assets/favicon.ico"]), + pages=["index.md", + "weights.md", + "scalarstats.md", + "robust.md", + "deviation.md", + "cov.md", + "counts.md", + "ranking.md", + "sampling.md", + "empirical.md", + "signalcorr.md", + "multivariate.md", + "misc.md", + "statmodels.md", + "transformations.md"], + checkdocs=:exports) -deploydocs( - repo = "github.com/JuliaStats/StatsBase.jl.git" -) +deploydocs(; repo="github.com/JuliaStats/StatsBase.jl.git") diff --git a/perf/sampling.jl b/perf/sampling.jl index dc65ff7ee..300351022 100644 --- a/perf/sampling.jl +++ b/perf/sampling.jl @@ -51,20 +51,20 @@ tsample!(s::Sample_NoRep, a, x) = sample!(a, x; replace=false, ordered=false) mutable struct Sample_NoRep_Ord <: NoRep end tsample!(s::Sample_NoRep_Ord, a, x) = sample!(a, x; replace=false, ordered=true) - # config is in the form of (n, k) Base.string(p::SampleProc{Alg}) where {Alg} = lowercase(string(Alg)) Base.length(p::SampleProc, cfg::Tuple{Int,Int}) = cfg[2] -Base.isvalid(p::SampleProc{<:WithRep}, cfg::Tuple{Int,Int}) = ((n, k) = cfg; n >= 1 && k >= 1) -Base.isvalid(p::SampleProc{<:NoRep}, cfg::Tuple{Int,Int}) = ((n, k) = cfg; n >= k >= 1) +Base.isvalid(p::SampleProc{<:WithRep}, cfg::Tuple{Int,Int}) = ((n, k)=cfg; n >= 1 && k >= 1) +Base.isvalid(p::SampleProc{<:NoRep}, cfg::Tuple{Int,Int}) = ((n, k)=cfg; n >= k >= 1) Base.start(p::SampleProc, cfg::Tuple{Int,Int}) = Vector{Int}(cfg[2]) -Base.run(p::SampleProc{Alg}, cfg::Tuple{Int,Int}, s::Vector{Int}) where {Alg} = tsample!(Alg(), 1:cfg[1], s) +function Base.run(p::SampleProc{Alg}, cfg::Tuple{Int,Int}, s::Vector{Int}) where {Alg} + return tsample!(Alg(), 1:cfg[1], s) +end Base.done(p::SampleProc, cfg, s) = nothing - ### benchmarking const ns = 5 * (2 .^ [0:9]) @@ -72,10 +72,10 @@ const ks = 2 .^ [1:16] ## with replacement -const procs1 = Proc[ SampleProc{Direct}(), - SampleProc{Sample_WRep}(), - SampleProc{Xmultinom}(), - SampleProc{Sample_WRep_Ord}() ] +const procs1 = Proc[SampleProc{Direct}(), + SampleProc{Sample_WRep}(), + SampleProc{Xmultinom}(), + SampleProc{Sample_WRep_Ord}()] const cfgs1 = vec([(n, k) for k in ks, n in ns]) @@ -84,14 +84,14 @@ println() ## without replacement -const procs2 = Proc[ SampleProc{Knuths}(), - SampleProc{Fisher_Yates}(), - SampleProc{Self_Avoid}(), - SampleProc{Sample_NoRep}(), - SampleProc{Seq_A}(), - SampleProc{Seq_C}(), - SampleProc{Seq_D}(), - SampleProc{Sample_NoRep_Ord}() ] +const procs2 = Proc[SampleProc{Knuths}(), + SampleProc{Fisher_Yates}(), + SampleProc{Self_Avoid}(), + SampleProc{Sample_NoRep}(), + SampleProc{Seq_A}(), + SampleProc{Seq_C}(), + SampleProc{Seq_D}(), + SampleProc{Sample_NoRep_Ord}()] const cfgs2 = (Int, Int)[] for n in 5 * (2 .^ [0:11]), k in 2 .^ [1:16] diff --git a/perf/wsampling.jl b/perf/wsampling.jl index 30d665719..87c8bcfbf 100644 --- a/perf/wsampling.jl +++ b/perf/wsampling.jl @@ -33,14 +33,15 @@ tsample!(s::Sample_WRep, wv, x) = sample!(1:length(wv), wv, x; ordered=false) mutable struct Sample_WRep_Ord <: WithRep end tsample!(s::Sample_WRep_Ord, wv, x) = sample!(1:length(wv), wv, x; ordered=true) - # config is in the form of (n, k) Base.string(p::WSampleProc{Alg}) where {Alg} = lowercase(string(Alg)) Base.length(p::WSampleProc, cfg::Tuple{Int,Int}) = cfg[2] -Base.isvalid(p::WSampleProc{<:WithRep}, cfg::Tuple{Int,Int}) = ((n, k) = cfg; n >= 1 && k >= 1) -Base.isvalid(p::WSampleProc{<:NoRep}, cfg::Tuple{Int,Int}) = ((n, k) = cfg; n >= k >= 1) +function Base.isvalid(p::WSampleProc{<:WithRep}, cfg::Tuple{Int,Int}) + return ((n, k)=cfg; n >= 1 && k >= 1) +end +Base.isvalid(p::WSampleProc{<:NoRep}, cfg::Tuple{Int,Int}) = ((n, k)=cfg; n >= k >= 1) function Base.start(p::WSampleProc, cfg::Tuple{Int,Int}) n, k = cfg @@ -49,10 +50,11 @@ function Base.start(p::WSampleProc, cfg::Tuple{Int,Int}) return (w, x) end -Base.run(p::WSampleProc{Alg}, cfg::Tuple{Int,Int}, s) where {Alg} = tsample!(Alg(), s[1], s[2]) +function Base.run(p::WSampleProc{Alg}, cfg::Tuple{Int,Int}, s) where {Alg} + return tsample!(Alg(), s[1], s[2]) +end Base.done(p::WSampleProc, cfg, s) = nothing - ### benchmarking const ns = 5 * (2 .^ [0:9]) @@ -60,25 +62,22 @@ const ks = 2 .^ [1:16] ## with replacement -const procs1 = Proc[ WSampleProc{Direct}(), - WSampleProc{Alias}(), - WSampleProc{Xmultinom_S}(), - WSampleProc{Sample_WRep}(), - WSampleProc{Xmultinom}(), - WSampleProc{Direct_S}(), - WSampleProc{Sample_WRep_Ord}() ] +const procs1 = Proc[WSampleProc{Direct}(), + WSampleProc{Alias}(), + WSampleProc{Xmultinom_S}(), + WSampleProc{Sample_WRep}(), + WSampleProc{Xmultinom}(), + WSampleProc{Direct_S}(), + WSampleProc{Sample_WRep_Ord}()] const cfgs1 = vec([(n, k) for k in ks, n in ns]) rtable1 = run(procs1, cfgs1; duration=0.2) println() - ## show results println("Sampling With Replacement") println("===================================") show(rtable1; unit=:mps, cfghead="(n, k)") println() - - diff --git a/src/StatsBase.jl b/src/StatsBase.jl index 4d0d182ae..c51a92528 100644 --- a/src/StatsBase.jl +++ b/src/StatsBase.jl @@ -30,217 +30,206 @@ import StatsAPI: pairwise, pairwise!, params, params!, informationmatrix, stderror, vcov, weights, isfitted, fit, fit!, aic, aicc, bic, r2, r², adjr2, adjr² - ## tackle compatibility issues +## tackle compatibility issues export - ## functions defined in Statistics - cor, - cov, - mean, - mean!, - median, - median!, - quantile, - quantile!, - std, - var, - - ## weights - AbstractWeights, # abstract type to represent any weight vector - Weights, # to represent a generic weight vector - AnalyticWeights, # to represent an analytic/precision/reliability weight vector - FrequencyWeights, # to representing a frequency/case/repeat weight vector - ProbabilityWeights, # to representing a probability/sampling weight vector - UnitWeights, # to representing a uniform weight vector - weights, # construct a generic Weights vector - aweights, # construct an AnalyticWeights vector - fweights, # construct a FrequencyWeights vector - pweights, # construct a ProbabilityWeights vector - eweights, # construct an exponential Weights vector - uweights, # construct an UnitWeights vector - wsum, # weighted sum with vector as second argument - wsum!, # weighted sum across dimensions with provided storage - - ## moments - skewness, # (standardized) skewness - kurtosis, # (excessive) kurtosis - moment, # central moment of given order - cumulant, # cumulant of given order - mean_and_var, # (mean, var) - mean_and_std, # (mean, std) - mean_and_cov, # (mean, cov) - - ## scalarstats - geomean, # geometric mean - harmmean, # harmonic mean - genmean, # generalized/power mean - middle, # the mean of two real numbers - mode, # find a mode from data (the first one) - modes, # find all modes from data - - zscore, # compute Z-scores - zscore!, # compute Z-scores inplace or to a pre-allocated array - - percentile, # quantile using percentage (instead of fraction) as argument - nquantile, # quantiles at [0:n]/n - quantilerank, # quantile-position (0-1) of a value relative to a collection - percentilerank, # percentile-position (0-100) of a value relative to a collection - - span, # The range minimum(x):maximum(x) - variation, # ratio of standard deviation to mean - sem, # standard error of the mean, i.e. sqrt(var / n) - mad, # median absolute deviation - iqr, # interquartile range - - genvar, # generalized variance - totalvar, # total variation - - entropy, # the entropy of a probability vector - renyientropy, # the Rényi (generalised) entropy of a probability vector - crossentropy, # cross entropy between two probability vectors - kldivergence, # K-L divergence between two probability vectors - - summarystats, # summary statistics - describe, # print the summary statistics - - # deviation - counteq, # count the number of equal pairs - countne, # count the number of non-equal pairs - sqL2dist, # squared L2 distance between two arrays - L2dist, # L2 distance between two arrays - L1dist, # L1 distance between two arrays - Linfdist, # L-inf distance between two arrays - gkldiv, # (Generalized) Kullback-Leibler divergence between two vectors - meanad, # mean absolute deviation - maxad, # maximum absolute deviation - msd, # mean squared deviation - rmsd, # root mean squared deviation - psnr, # peak signal-to-noise ratio (in dB) - - # cov - scattermat, # scatter matrix (i.e. unnormalized covariance) - cov2cor, # converts a covariance matrix to a correlation matrix - cor2cov, # converts a correlation matrix to a covariance matrix - CovarianceEstimator, # abstract type for covariance estimators - SimpleCovariance, # simple covariance estimator - - ## counts - addcounts!, # add counts to an accumulating array or map - counts, # count integer values in given arrays - proportions, # proportions of integer values in given arrays - # (normalized version of counts) - countmap, # count distinct values and return a map - proportionmap, # proportions of distinct values returned as a map - - ## ranking - ordinalrank, # ordinal ranking ("1234" ranking) - competerank, # competition ranking ("1 2 2 4" ranking) - denserank, # dense ranking ("1 2 2 3" ranking) - tiedrank, # tied ranking ("1 2.5 2.5 4" ranking) - - ## rankcorr - corspearman, # spearman's rank correlation - corkendall, # kendall's rank correlation - - ## partialcor - partialcor, # partial correlation - - ## signalcorr - autocov!, autocov, # auto covariance - autocor!, autocor, # auto correlation - crosscov!, crosscov, # cross covariance - crosscor!, crosscor, # cross correlation - pacf!, pacf, # partial auto-correlation - - ## sampling - samplepair, # draw a pair of distinct elements    - sample, # sampling from a population - sample!, # sampling from a population, with pre-allocated output - wsample, # sampling from a population with weights - wsample!, # weighted sampling, with pre-allocated output - - ## empirical - ecdf, # empirical cumulative distribution function - ECDF, # type for empirical cumulative distribution function - - AbstractHistogram, - Histogram, - midpoints, - # histrange, - - ## robust - trim, # trimmed set - trim!, # trimmed set - winsor, # Winsorized set - winsor!, # Winsorized set - trimvar, # variance of the mean of a trimmed set - - ## misc - rle, # run-length encoding - inverse_rle, # inverse run-length encoding - indexmap, # construct a map from element to index - levelsmap, # construct a map from n unique elements to [1, ..., n] - indicatormat, # construct indicator matrix - pairwise, # pairwise application of functions - pairwise!, # pairwise! application of functions - - # statistical models - CoefTable, - StatisticalModel, - RegressionModel, - - adjr2, - adjr², - aic, - aicc, - bic, - coef, - coefnames, - coeftable, - confint, - cooksdistance, - crossmodelmatrix, - deviance, - dof, - dof_residual, - fit, - fit!, - fitted, - informationmatrix, - isfitted, - islinear, - leverage, - loglikelihood, - meanresponse, - modelmatrix, - mss, - response, - responsename, - nobs, - nulldeviance, - nullloglikelihood, - rss, - score, - stderror, - vcov, - predict, - predict!, - residuals, - r2, - r², - - ConvergenceException, - - # data standardization - standardize, - AbstractDataTransform, # the type to represent a abstract data transformation - ZScoreTransform, # the type to represent a z-score data transformation - UnitRangeTransform, # the type to represent a 0-1 data transformation - - # reliability - CronbachAlpha, # the type to represent Cronbach's alpha scores - cronbachalpha # function to compute Cronbach's alpha scores +## functions defined in Statistics + cor, + cov, + mean, + mean!, + median, + median!, + quantile, + quantile!, + std, + var, + +## weights + AbstractWeights, # abstract type to represent any weight vector + Weights, # to represent a generic weight vector + AnalyticWeights, # to represent an analytic/precision/reliability weight vector + FrequencyWeights, # to representing a frequency/case/repeat weight vector + ProbabilityWeights, # to representing a probability/sampling weight vector + UnitWeights, # to representing a uniform weight vector + weights, # construct a generic Weights vector + aweights, # construct an AnalyticWeights vector + fweights, # construct a FrequencyWeights vector + pweights, # construct a ProbabilityWeights vector + eweights, # construct an exponential Weights vector + uweights, # construct an UnitWeights vector + wsum, # weighted sum with vector as second argument + wsum!, # weighted sum across dimensions with provided storage + +## moments + skewness, # (standardized) skewness + kurtosis, # (excessive) kurtosis + moment, # central moment of given order + cumulant, # cumulant of given order + mean_and_var, # (mean, var) + mean_and_std, # (mean, std) + mean_and_cov, # (mean, cov) + +## scalarstats + geomean, # geometric mean + harmmean, # harmonic mean + genmean, # generalized/power mean + middle, # the mean of two real numbers + mode, # find a mode from data (the first one) + modes, # find all modes from data + zscore, # compute Z-scores + zscore!, # compute Z-scores inplace or to a pre-allocated array + percentile, # quantile using percentage (instead of fraction) as argument + nquantile, # quantiles at [0:n]/n + quantilerank, # quantile-position (0-1) of a value relative to a collection + percentilerank, # percentile-position (0-100) of a value relative to a collection + span, # The range minimum(x):maximum(x) + variation, # ratio of standard deviation to mean + sem, # standard error of the mean, i.e. sqrt(var / n) + mad, # median absolute deviation + iqr, # interquartile range + genvar, # generalized variance + totalvar, # total variation + entropy, # the entropy of a probability vector + renyientropy, # the Rényi (generalised) entropy of a probability vector + crossentropy, # cross entropy between two probability vectors + kldivergence, # K-L divergence between two probability vectors + summarystats, # summary statistics + describe, # print the summary statistics + +# deviation + counteq, # count the number of equal pairs + countne, # count the number of non-equal pairs + sqL2dist, # squared L2 distance between two arrays + L2dist, # L2 distance between two arrays + L1dist, # L1 distance between two arrays + Linfdist, # L-inf distance between two arrays + gkldiv, # (Generalized) Kullback-Leibler divergence between two vectors + meanad, # mean absolute deviation + maxad, # maximum absolute deviation + msd, # mean squared deviation + rmsd, # root mean squared deviation + psnr, # peak signal-to-noise ratio (in dB) + +# cov + scattermat, # scatter matrix (i.e. unnormalized covariance) + cov2cor, # converts a covariance matrix to a correlation matrix + cor2cov, # converts a correlation matrix to a covariance matrix + CovarianceEstimator, # abstract type for covariance estimators + SimpleCovariance, # simple covariance estimator + +## counts + addcounts!, # add counts to an accumulating array or map + counts, # count integer values in given arrays + proportions, # proportions of integer values in given arrays +# (normalized version of counts) + countmap, # count distinct values and return a map + proportionmap, # proportions of distinct values returned as a map + +## ranking + ordinalrank, # ordinal ranking ("1234" ranking) + competerank, # competition ranking ("1 2 2 4" ranking) + denserank, # dense ranking ("1 2 2 3" ranking) + tiedrank, # tied ranking ("1 2.5 2.5 4" ranking) + +## rankcorr + corspearman, # spearman's rank correlation + corkendall, # kendall's rank correlation + +## partialcor + partialcor, # partial correlation + +## signalcorr + autocov!, autocov, # auto covariance + autocor!, autocor, # auto correlation + crosscov!, crosscov, # cross covariance + crosscor!, crosscor, # cross correlation + pacf!, pacf, # partial auto-correlation + +## sampling + samplepair, # draw a pair of distinct elements    + sample, # sampling from a population + sample!, # sampling from a population, with pre-allocated output + wsample, # sampling from a population with weights + wsample!, # weighted sampling, with pre-allocated output + +## empirical + ecdf, # empirical cumulative distribution function + ECDF, # type for empirical cumulative distribution function + AbstractHistogram, + Histogram, + midpoints, +# histrange, + +## robust + trim, # trimmed set + trim!, # trimmed set + winsor, # Winsorized set + winsor!, # Winsorized set + trimvar, # variance of the mean of a trimmed set + +## misc + rle, # run-length encoding + inverse_rle, # inverse run-length encoding + indexmap, # construct a map from element to index + levelsmap, # construct a map from n unique elements to [1, ..., n] + indicatormat, # construct indicator matrix + pairwise, # pairwise application of functions + pairwise!, # pairwise! application of functions + +# statistical models + CoefTable, + StatisticalModel, + RegressionModel, adjr2, + adjr², + aic, + aicc, + bic, + coef, + coefnames, + coeftable, + confint, + cooksdistance, + crossmodelmatrix, + deviance, + dof, + dof_residual, + fit, + fit!, + fitted, + informationmatrix, + isfitted, + islinear, + leverage, + loglikelihood, + meanresponse, + modelmatrix, + mss, + response, + responsename, + nobs, + nulldeviance, + nullloglikelihood, + rss, + score, + stderror, + vcov, + predict, + predict!, + residuals, + r2, + r², ConvergenceException, + +# data standardization + standardize, + AbstractDataTransform, # the type to represent a abstract data transformation + ZScoreTransform, # the type to represent a z-score data transformation + UnitRangeTransform, # the type to represent a 0-1 data transformation + +# reliability + CronbachAlpha, # the type to represent Cronbach's alpha scores + cronbachalpha # function to compute Cronbach's alpha scores # source files diff --git a/src/common.jl b/src/common.jl index 4dc268c6c..c95c72c3b 100644 --- a/src/common.jl +++ b/src/common.jl @@ -1,6 +1,6 @@ # common utilities -function depcheck(fname::Symbol, varname::Symbol, b::Union{Bool, Nothing}) +function depcheck(fname::Symbol, varname::Symbol, b::Union{Bool,Nothing}) if b === nothing msg = "$fname will default to $varname=true in the future. Use $varname=false for previous behaviour." Base.depwarn(msg, fname) diff --git a/src/counts.jl b/src/counts.jl index 43fa1d495..4cf3b950a 100644 --- a/src/counts.jl +++ b/src/counts.jl @@ -22,7 +22,8 @@ array `r`. For each `xi ∈ x`, if `xi == levels[j]`, then we increment `r[j]`. If a weighting vector `wv` is specified, the sum of weights is used rather than the raw counts. """ -function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, levels::UnitRange{<:Integer}) +function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, + levels::UnitRange{<:Integer}) # add counts of integers from x that fall within levels to r checkbounds(r, axes(levels)...) @@ -39,7 +40,8 @@ function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, levels::UnitR return r end -function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, levels::UnitRange{<:Integer}, wv::AbstractWeights) +function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, + levels::UnitRange{<:Integer}, wv::AbstractWeights) # add wv weighted counts of integers from x that fall within levels to r length(x) == length(wv) || @@ -62,7 +64,6 @@ function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, levels::UnitR return r end - """ counts(x, [wv::AbstractWeights]) counts(x, levels::UnitRange{<:Integer}, [wv::AbstractWeights]) @@ -80,16 +81,18 @@ The output is a vector of length `length(levels)`. """ function counts end -counts(x::AbstractArray{<:Integer}, levels::UnitRange{<:Integer}) = - addcounts!(zeros(Int, length(levels)), x, levels) -counts(x::AbstractArray{<:Integer}, levels::UnitRange{<:Integer}, wv::AbstractWeights) = - addcounts!(zeros(eltype(wv), length(levels)), x, levels, wv) +function counts(x::AbstractArray{<:Integer}, levels::UnitRange{<:Integer}) + return addcounts!(zeros(Int, length(levels)), x, levels) +end +function counts(x::AbstractArray{<:Integer}, levels::UnitRange{<:Integer}, + wv::AbstractWeights) + return addcounts!(zeros(eltype(wv), length(levels)), x, levels, wv) +end counts(x::AbstractArray{<:Integer}, k::Integer) = counts(x, 1:k) counts(x::AbstractArray{<:Integer}, k::Integer, wv::AbstractWeights) = counts(x, 1:k, wv) counts(x::AbstractArray{<:Integer}) = counts(x, span(x)) counts(x::AbstractArray{<:Integer}, wv::AbstractWeights) = counts(x, span(x), wv) - """ proportions(x, levels=span(x), [wv::AbstractWeights]) @@ -99,9 +102,13 @@ Equivalent to `counts(x, levels) / length(x)`. If a vector of weights `wv` is provided, the proportion of weights is computed rather than the proportion of raw counts. """ -proportions(x::AbstractArray{<:Integer}, levels::UnitRange{<:Integer}) = counts(x, levels) / length(x) -proportions(x::AbstractArray{<:Integer}, levels::UnitRange{<:Integer}, wv::AbstractWeights) = - counts(x, levels, wv) / sum(wv) +function proportions(x::AbstractArray{<:Integer}, levels::UnitRange{<:Integer}) + return counts(x, levels) / length(x) +end +function proportions(x::AbstractArray{<:Integer}, levels::UnitRange{<:Integer}, + wv::AbstractWeights) + return counts(x, levels, wv) / sum(wv) +end """ proportions(x, k::Integer, [wv::AbstractWeights]) @@ -112,18 +119,20 @@ If a vector of weights `wv` is provided, the proportion of weights is computed r than the proportion of raw counts. """ proportions(x::AbstractArray{<:Integer}, k::Integer) = proportions(x, 1:k) -proportions(x::AbstractArray{<:Integer}, k::Integer, wv::AbstractWeights) = proportions(x, 1:k, wv) +function proportions(x::AbstractArray{<:Integer}, k::Integer, wv::AbstractWeights) + return proportions(x, 1:k, wv) +end proportions(x::AbstractArray{<:Integer}) = proportions(x, span(x)) proportions(x::AbstractArray{<:Integer}, wv::AbstractWeights) = proportions(x, span(x), wv) #### functions for counting a single list of integers (2D) -function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2,UnitRange{<:Integer}}) +function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, + y::AbstractArray{<:Integer}, levels::NTuple{2,UnitRange{<:Integer}}) # add counts of pairs from zip(x,y) to r xlevels, ylevels = levels - checkbounds(r, axes(xlevels, 1), axes(ylevels, 1)) mx0 = first(xlevels) @@ -144,7 +153,8 @@ function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, y::AbstractAr return r end -function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, +function addcounts!(r::AbstractArray, x::AbstractArray{<:Integer}, + y::AbstractArray{<:Integer}, levels::NTuple{2,UnitRange{<:Integer}}, wv::AbstractWeights) # add counts of pairs from zip(x,y) to r @@ -180,45 +190,80 @@ end # facet functions -function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2,UnitRange{<:Integer}}) - addcounts!(zeros(Int, length(levels[1]), length(levels[2])), x, y, levels) +function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, + levels::NTuple{2,UnitRange{<:Integer}}) + return addcounts!(zeros(Int, length(levels[1]), length(levels[2])), x, y, levels) end -function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2,UnitRange{<:Integer}}, wv::AbstractWeights) - addcounts!(zeros(eltype(wv), length(levels[1]), length(levels[2])), x, y, levels, wv) +function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, + levels::NTuple{2,UnitRange{<:Integer}}, wv::AbstractWeights) + return addcounts!(zeros(eltype(wv), length(levels[1]), length(levels[2])), x, y, levels, + wv) end -counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::UnitRange{<:Integer}) = - counts(x, y, (levels, levels)) -counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::UnitRange{<:Integer}, wv::AbstractWeights) = - counts(x, y, (levels, levels), wv) - -counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, ks::NTuple{2,Integer}) = - counts(x, y, (1:ks[1], 1:ks[2])) -counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, ks::NTuple{2,Integer}, wv::AbstractWeights) = - counts(x, y, (1:ks[1], 1:ks[2]), wv) -counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, k::Integer) = counts(x, y, (1:k, 1:k)) -counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, k::Integer, wv::AbstractWeights) = - counts(x, y, (1:k, 1:k), wv) -counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}) = counts(x, y, (span(x), span(y))) -counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, wv::AbstractWeights) = counts(x, y, (span(x), span(y)), wv) +function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, + levels::UnitRange{<:Integer}) + return counts(x, y, (levels, levels)) +end +function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, + levels::UnitRange{<:Integer}, wv::AbstractWeights) + return counts(x, y, (levels, levels), wv) +end -proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2,UnitRange{<:Integer}}) = - counts(x, y, levels) / length(x) -proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, levels::NTuple{2,UnitRange{<:Integer}}, wv::AbstractWeights) = - counts(x, y, levels, wv) / sum(wv) +function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, + ks::NTuple{2,Integer}) + return counts(x, y, (1:ks[1], 1:ks[2])) +end +function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, + ks::NTuple{2,Integer}, wv::AbstractWeights) + return counts(x, y, (1:ks[1], 1:ks[2]), wv) +end +function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, k::Integer) + return counts(x, y, (1:k, 1:k)) +end +function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, k::Integer, + wv::AbstractWeights) + return counts(x, y, (1:k, 1:k), wv) +end +function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}) + return counts(x, y, (span(x), span(y))) +end +function counts(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, + wv::AbstractWeights) + return counts(x, y, (span(x), span(y)), wv) +end -proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, ks::NTuple{2,Integer}) = - proportions(x, y, (1:ks[1], 1:ks[2])) -proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, ks::NTuple{2,Integer}, wv::AbstractWeights) = - proportions(x, y, (1:ks[1], 1:ks[2]), wv) -proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, k::Integer) = proportions(x, y, (1:k, 1:k)) -proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, k::Integer, wv::AbstractWeights) = - proportions(x, y, (1:k, 1:k), wv) -proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}) = proportions(x, y, (span(x), span(y))) -proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, wv::AbstractWeights) = - proportions(x, y, (span(x), span(y)), wv) +function proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, + levels::NTuple{2,UnitRange{<:Integer}}) + return counts(x, y, levels) / length(x) +end +function proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, + levels::NTuple{2,UnitRange{<:Integer}}, wv::AbstractWeights) + return counts(x, y, levels, wv) / sum(wv) +end +function proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, + ks::NTuple{2,Integer}) + return proportions(x, y, (1:ks[1], 1:ks[2])) +end +function proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, + ks::NTuple{2,Integer}, wv::AbstractWeights) + return proportions(x, y, (1:ks[1], 1:ks[2]), wv) +end +function proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, k::Integer) + return proportions(x, y, (1:k, 1:k)) +end +function proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, k::Integer, + wv::AbstractWeights) + return proportions(x, y, (1:k, 1:k), wv) +end +function proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}) + return proportions(x, y, (span(x), span(y))) +end +function proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, + wv::AbstractWeights) + return proportions(x, y, (span(x), span(y)), wv) +end ################################################# # @@ -231,7 +276,7 @@ proportions(x::AbstractArray{<:Integer}, y::AbstractArray{<:Integer}, wv::Abstra ## auxiliary functions -function _normalize_countmap(cm::Dict{T}, s::Real) where T +function _normalize_countmap(cm::Dict{T}, s::Real) where {T} r = Dict{T,Float64}() for (k, c) in cm r[k] = c / s @@ -241,7 +286,6 @@ end ## 1D - """ addcounts!(dict, x; alg = :auto) addcounts!(dict, x, wv) @@ -267,9 +311,9 @@ raw counts. RAM, is safe for any data type, is faster for small arrays, and is faster when there are not many duplicates. """ -addcounts!(cm::Dict, x; alg = :auto) = _addcounts!(eltype(x), cm, x, alg = alg) +addcounts!(cm::Dict, x; alg=:auto) = _addcounts!(eltype(x), cm, x, alg=alg) -function _addcounts!(::Type{T}, cm::Dict, x; alg = :auto) where T +function _addcounts!(::Type{T}, cm::Dict, x; alg=:auto) where {T} # if it's safe to be sorted using radixsort then it should be faster # albeit using more RAM if radixsort_safe(T) && (alg == :auto || alg == :radixsort) @@ -277,13 +321,13 @@ function _addcounts!(::Type{T}, cm::Dict, x; alg = :auto) where T elseif alg == :radixsort throw(ArgumentError("`alg = :radixsort` is chosen but type `radixsort_safe($T)` did not return `true`; use `alg = :auto` or `alg = :dict` instead")) else - addcounts_dict!(cm,x) + addcounts_dict!(cm, x) end return cm end """Dict-based addcounts method""" -function addcounts_dict!(cm::Dict{T}, x) where T +function addcounts_dict!(cm::Dict{T}, x) where {T} for v in x index = ht_keyindex2!(cm, v) if index > 0 @@ -300,15 +344,15 @@ end # faster results and less memory usage. However we still wish to enable others # to write generic algorithms, therefore the methods below still accept the # `alg` argument but it is ignored. -function _addcounts!(::Type{Bool}, cm::Dict{Bool}, x::AbstractArray{Bool}; alg = :ignored) +function _addcounts!(::Type{Bool}, cm::Dict{Bool}, x::AbstractArray{Bool}; alg=:ignored) sumx = sum(x) cm[true] = get(cm, true, 0) + sumx cm[false] = get(cm, false, 0) + length(x) - sumx - cm + return cm end # specialized for `Bool` iterator -function _addcounts!(::Type{Bool}, cm::Dict{Bool}, x; alg = :ignored) +function _addcounts!(::Type{Bool}, cm::Dict{Bool}, x; alg=:ignored) sumx = 0 len = 0 for i in x @@ -317,10 +361,11 @@ function _addcounts!(::Type{Bool}, cm::Dict{Bool}, x; alg = :ignored) end cm[true] = get(cm, true, 0) + sumx cm[false] = get(cm, false, 0) + len - sumx - cm + return cm end -function _addcounts!(::Type{T}, cm::Dict{T}, x; alg = :ignored) where T <: Union{UInt8, UInt16, Int8, Int16} +function _addcounts!(::Type{T}, cm::Dict{T}, x; + alg=:ignored) where {T<:Union{UInt8,UInt16,Int8,Int16}} counts = zeros(Int, 2^(8sizeof(T))) for xi in x @@ -337,24 +382,24 @@ function _addcounts!(::Type{T}, cm::Dict{T}, x; alg = :ignored) where T <: Union end end end - cm + return cm end -const BaseRadixSortSafeTypes = Union{Int8, Int16, Int32, Int64, Int128, - UInt8, UInt16, UInt32, UInt64, UInt128, - Float32, Float64} +const BaseRadixSortSafeTypes = Union{Int8,Int16,Int32,Int64,Int128, + UInt8,UInt16,UInt32,UInt64,UInt128, + Float32,Float64} "Can the type be safely sorted by radixsort" -radixsort_safe(::Type{T}) where T = T<:BaseRadixSortSafeTypes +radixsort_safe(::Type{T}) where {T} = T<:BaseRadixSortSafeTypes -function _addcounts_radix_sort_loop!(cm::Dict{T}, sx::AbstractVector{T}) where T +function _addcounts_radix_sort_loop!(cm::Dict{T}, sx::AbstractVector{T}) where {T} isempty(sx) && return cm last_sx = first(sx) start_i = firstindex(sx) # now the data is sorted: can just run through and accumulate values before # adding into the Dict - for i in start_i+1:lastindex(sx) + for i in (start_i + 1):lastindex(sx) sxi = sx[i] if !isequal(last_sx, sxi) cm[last_sx] = get(cm, last_sx, 0) + i - start_i @@ -369,9 +414,9 @@ function _addcounts_radix_sort_loop!(cm::Dict{T}, sx::AbstractVector{T}) where T return cm end -function addcounts_radixsort!(cm::Dict{T}, x::AbstractArray{T}) where T +function addcounts_radixsort!(cm::Dict{T}, x::AbstractArray{T}) where {T} # sort the x using radixsort - sx = sort(vec(x), alg=Base.DEFAULT_UNSTABLE) + sx = sort(vec(x); alg=Base.DEFAULT_UNSTABLE) # Delegate the loop to a separate function since sort might not # be inferred in Julia 0.6 after SortingAlgorithms is loaded. @@ -380,13 +425,14 @@ function addcounts_radixsort!(cm::Dict{T}, x::AbstractArray{T}) where T end # fall-back for `x` an iterator -function addcounts_radixsort!(cm::Dict{T}, x) where T +function addcounts_radixsort!(cm::Dict{T}, x) where {T} cx = vec(collect(x)) - sx = sort!(cx, alg = Base.DEFAULT_UNSTABLE) + sx = sort!(cx; alg=Base.DEFAULT_UNSTABLE) return _addcounts_radix_sort_loop!(cm, sx) end -function addcounts!(cm::Dict{T}, x::AbstractArray{T}, wv::AbstractVector{W}) where {T,W<:Real} +function addcounts!(cm::Dict{T}, x::AbstractArray{T}, + wv::AbstractVector{W}) where {T,W<:Real} # add wv weighted counts of integers from x to cm length(x) == length(wv) || @@ -404,7 +450,6 @@ function addcounts!(cm::Dict{T}, x::AbstractArray{T}, wv::AbstractVector{W}) whe return cm end - """ countmap(x; alg = :auto) countmap(x::AbstractVector, wv::AbstractVector{<:Real}) @@ -430,9 +475,10 @@ raw counts. RAM, is safe for any data type, is faster for small arrays, and is faster when there are not many duplicates. """ -countmap(x; alg = :auto) = addcounts!(Dict{eltype(x),Int}(), x; alg = alg) -countmap(x::AbstractArray{T}, wv::AbstractVector{W}) where {T,W<:Real} = addcounts!(Dict{T,W}(), x, wv) - +countmap(x; alg=:auto) = addcounts!(Dict{eltype(x),Int}(), x; alg=alg) +function countmap(x::AbstractArray{T}, wv::AbstractVector{W}) where {T,W<:Real} + return addcounts!(Dict{T,W}(), x, wv) +end """ proportionmap(x) @@ -444,4 +490,6 @@ If a vector of weights `wv` is provided, the proportion of weights is computed r than the proportion of raw counts. """ proportionmap(x::AbstractArray) = _normalize_countmap(countmap(x), length(x)) -proportionmap(x::AbstractArray, wv::AbstractWeights) = _normalize_countmap(countmap(x, wv), sum(wv)) +function proportionmap(x::AbstractArray, wv::AbstractWeights) + return _normalize_countmap(countmap(x, wv), sum(wv)) +end diff --git a/src/cov.jl b/src/cov.jl index 6796216e6..38c501aad 100644 --- a/src/cov.jl +++ b/src/cov.jl @@ -5,27 +5,28 @@ function _symmetrize!(a::DenseMatrix) m, n = size(a) m == n || error("a must be a square matrix.") - for j = 1:n - for i = j+1:n - vl = a[i,j] - vr = a[j,i] - a[i,j] = a[j,i] = middle(vl, vr) + for j in 1:n + for i in (j + 1):n + vl = a[i, j] + vr = a[j, i] + a[i, j] = a[j, i] = middle(vl, vr) end end return a end function _scalevars(x::DenseMatrix, s::AbstractWeights, dims::Int) - dims == 1 ? Diagonal(s) * x : - dims == 2 ? x * Diagonal(s) : - error("dims should be either 1 or 2.") + return dims == 1 ? Diagonal(s) * x : + dims == 2 ? x * Diagonal(s) : + error("dims should be either 1 or 2.") end ## scatter matrix _unscaled_covzm(x::DenseMatrix, dims::Integer) = unscaled_covzm(x, dims) -_unscaled_covzm(x::DenseMatrix, wv::AbstractWeights, dims::Integer) = - _symmetrize!(unscaled_covzm(x, _scalevars(x, wv, dims), dims)) +function _unscaled_covzm(x::DenseMatrix, wv::AbstractWeights, dims::Integer) + return _symmetrize!(unscaled_covzm(x, _scalevars(x, wv, dims), dims)) +end """ scattermat(X, [wv::AbstractWeights]; mean=nothing, dims=1) @@ -44,7 +45,6 @@ the estimate. """ function scattermat end - """ cov(X, w::AbstractWeights, vardim=1; mean=nothing, corrected=false) @@ -59,7 +59,6 @@ matrix (`corrected=false`) is computed by multiplying `scattermat(X, w)` by """ cov - """ mean_and_cov(x, [wv::AbstractWeights,] vardim=1; corrected=false) -> (mean, cov) @@ -71,33 +70,39 @@ Finally, bias correction is applied to the covariance calculation if """ function mean_and_cov end -scattermat(x::DenseMatrix; mean=nothing, dims::Int=1) = - _scattermatm(x, mean, dims) -_scattermatm(x::DenseMatrix, ::Nothing, dims::Int) = - _unscaled_covzm(x .- mean(x, dims=dims), dims) -_scattermatm(x::DenseMatrix, mean, dims::Int=1) = - _unscaled_covzm(x .- mean, dims) +scattermat(x::DenseMatrix; mean=nothing, dims::Int=1) = _scattermatm(x, mean, dims) +function _scattermatm(x::DenseMatrix, ::Nothing, dims::Int) + return _unscaled_covzm(x .- mean(x; dims=dims), dims) +end +_scattermatm(x::DenseMatrix, mean, dims::Int=1) = _unscaled_covzm(x .- mean, dims) -scattermat(x::DenseMatrix, wv::AbstractWeights; mean=nothing, dims::Int=1) = - _scattermatm(x, wv, mean, dims) -_scattermatm(x::DenseMatrix, wv::AbstractWeights, ::Nothing, dims::Int) = - _unscaled_covzm(x .- mean(x, wv, dims=dims), wv, dims) -_scattermatm(x::DenseMatrix, wv::AbstractWeights, mean, dims::Int) = - _unscaled_covzm(x .- mean, wv, dims) +function scattermat(x::DenseMatrix, wv::AbstractWeights; mean=nothing, dims::Int=1) + return _scattermatm(x, wv, mean, dims) +end +function _scattermatm(x::DenseMatrix, wv::AbstractWeights, ::Nothing, dims::Int) + return _unscaled_covzm(x .- mean(x, wv; dims=dims), wv, dims) +end +function _scattermatm(x::DenseMatrix, wv::AbstractWeights, mean, dims::Int) + return _unscaled_covzm(x .- mean, wv, dims) +end ## weighted cov -covm(x::DenseMatrix, mean, w::AbstractWeights, dims::Int=1; - corrected::Union{Bool, Nothing}=nothing) = - rmul!(scattermat(x, w, mean=mean, dims=dims), varcorrection(w, depcheck(:covm, :corrected, corrected))) - +function covm(x::DenseMatrix, mean, w::AbstractWeights, dims::Int=1; + corrected::Union{Bool,Nothing}=nothing) + return rmul!(scattermat(x, w; mean=mean, dims=dims), + varcorrection(w, depcheck(:covm, :corrected, corrected))) +end -cov(x::DenseMatrix, w::AbstractWeights, dims::Int=1; corrected::Union{Bool, Nothing}=nothing) = - covm(x, mean(x, w, dims=dims), w, dims; corrected=depcheck(:cov, :corrected, corrected)) +function cov(x::DenseMatrix, w::AbstractWeights, dims::Int=1; + corrected::Union{Bool,Nothing}=nothing) + return covm(x, mean(x, w; dims=dims), w, dims; + corrected=depcheck(:cov, :corrected, corrected)) +end function corm(x::DenseMatrix, mean, w::AbstractWeights, vardim::Int=1) c = covm(x, mean, w, vardim; corrected=false) s = std(x, w, vardim; mean=mean, corrected=false) - cov2cor!(c, s) + return cov2cor!(c, s) end """ @@ -106,27 +111,27 @@ end Compute the Pearson correlation matrix of `X` along the dimension `dims` with a weighting `w` . """ -cor(x::DenseMatrix, w::AbstractWeights, dims::Int=1) = - corm(x, mean(x, w, dims=dims), w, dims) +function cor(x::DenseMatrix, w::AbstractWeights, dims::Int=1) + return corm(x, mean(x, w; dims=dims), w, dims) +end function mean_and_cov(x::DenseMatrix, dims::Int=1; corrected::Bool=true) - m = mean(x, dims=dims) - return m, covm(x, m, dims, corrected=corrected) + m = mean(x; dims=dims) + return m, covm(x, m, dims; corrected=corrected) end function mean_and_cov(x::DenseMatrix, wv::AbstractWeights, dims::Int=1; - corrected::Union{Bool, Nothing}=nothing) - m = mean(x, wv, dims=dims) + corrected::Union{Bool,Nothing}=nothing) + m = mean(x, wv; dims=dims) return m, cov(x, wv, dims; corrected=depcheck(:mean_and_cov, :corrected, corrected)) end - """ cov2cor(C::AbstractMatrix, [s::AbstractArray]) Compute the correlation matrix from the covariance matrix `C` and, optionally, a vector of standard deviations `s`. Use [`StatsBase.cov2cor!`](@ref) for an in-place version. """ -function cov2cor(C::AbstractMatrix, s::AbstractArray = map(sqrt, view(C, diagind(C)))) +function cov2cor(C::AbstractMatrix, s::AbstractArray=map(sqrt, view(C, diagind(C)))) zs = zero(eltype(s)) T = typeof(zero(eltype(C)) / (zs * zs)) return cov2cor!(copyto!(similar(C, T), C), s) @@ -139,18 +144,18 @@ end Convert the covariance matrix `C` to a correlation matrix in-place, optionally using a vector of standard deviations `s`. """ -function cov2cor!(C::AbstractMatrix, s::AbstractArray = map(sqrt, view(C, diagind(C)))) +function cov2cor!(C::AbstractMatrix, s::AbstractArray=map(sqrt, view(C, diagind(C)))) Base.require_one_based_indexing(C, s) n = length(s) size(C) == (n, n) || throw(DimensionMismatch("inconsistent dimensions")) - for j = 1:n + for j in 1:n sj = s[j] - for i = 1:(j-1) - C[i,j] = adjoint(C[j,i]) + for i in 1:(j - 1) + C[i, j] = adjoint(C[j, i]) end - C[j,j] = oneunit(C[j,j]) - for i = (j+1):n - C[i,j] = _clampcor(C[i,j] / (s[i] * sj)) + C[j, j] = oneunit(C[j, j]) + for i in (j + 1):n + C[i, j] = _clampcor(C[i, j] / (s[i] * sj)) end end return C @@ -164,19 +169,19 @@ function cov2cor!(C::Union{Symmetric{<:Real},Hermitian}, s::AbstractArray) size(C) == (n, n) || throw(DimensionMismatch("inconsistent dimensions")) A = parent(C) if C.uplo === 'U' - for j = 1:n + for j in 1:n sj = s[j] - for i = 1:(j-1) - A[i,j] = _clampcor(A[i,j] / (s[i] * sj)) + for i in 1:(j - 1) + A[i, j] = _clampcor(A[i, j] / (s[i] * sj)) end - A[j,j] = oneunit(A[j,j]) + A[j, j] = oneunit(A[j, j]) end else - for j = 1:n + for j in 1:n sj = s[j] - A[j,j] = oneunit(A[j,j]) - for i = (j+1):n - A[i,j] = _clampcor(A[i,j] / (s[i] * sj)) + A[j, j] = oneunit(A[j, j]) + for i in (j + 1):n + A[i, j] = _clampcor(A[i, j] / (s[i] * sj)) end end end @@ -207,12 +212,12 @@ function cor2cov!(C::AbstractMatrix, s::AbstractArray) size(C) == (n, n) || throw(DimensionMismatch("inconsistent dimensions")) for j in 1:n sj = s[j] - for i in 1:(j-1) - C[i,j] = adjoint(C[j,i]) + for i in 1:(j - 1) + C[i, j] = adjoint(C[j, i]) end - C[j,j] = sj^2 - for i in (j+1):n - C[i,j] *= s[i] * sj + C[j, j] = sj^2 + for i in (j + 1):n + C[i, j] *= s[i] * sj end end return C @@ -226,17 +231,17 @@ function cor2cov!(C::Union{Symmetric{<:Real},Hermitian}, s::AbstractArray) if C.uplo === 'U' for j in 1:n sj = s[j] - for i in 1:(j-1) - A[i,j] *= s[i] * sj + for i in 1:(j - 1) + A[i, j] *= s[i] * sj end - A[j,j] = sj^2 + A[j, j] = sj^2 end else for j in 1:n sj = s[j] - A[j,j] = sj^2 - for i in (j+1):n - A[i,j] *= s[i] * sj + A[j, j] = sj^2 + for i in (j + 1):n + A[i, j] *= s[i] * sj end end end @@ -255,16 +260,18 @@ abstract type CovarianceEstimator end Compute a variance estimate from the observation vector `x` using the estimator `ce`. """ -cov(ce::CovarianceEstimator, x::AbstractVector; mean=nothing) = - error("cov is not defined for $(typeof(ce)) and $(typeof(x))") +function cov(ce::CovarianceEstimator, x::AbstractVector; mean=nothing) + return error("cov is not defined for $(typeof(ce)) and $(typeof(x))") +end """ cov(ce::CovarianceEstimator, x::AbstractVector, y::AbstractVector) Compute the covariance of the vectors `x` and `y` using estimator `ce`. """ -cov(ce::CovarianceEstimator, x::AbstractVector, y::AbstractVector) = - error("cov is not defined for $(typeof(ce)), $(typeof(x)) and $(typeof(y))") +function cov(ce::CovarianceEstimator, x::AbstractVector, y::AbstractVector) + return error("cov is not defined for $(typeof(ce)), $(typeof(x)) and $(typeof(y))") +end """ cov(ce::CovarianceEstimator, X::AbstractMatrix, [w::AbstractWeights]; @@ -282,11 +289,14 @@ The keyword argument `mean` can be: * when `dims=2`, an `AbstractVector` of length `N` or an `AbstractMatrix` of size `(N,1)`. """ -cov(ce::CovarianceEstimator, X::AbstractMatrix; mean=nothing, dims::Int=1) = - error("cov is not defined for $(typeof(ce)) and $(typeof(X))") +function cov(ce::CovarianceEstimator, X::AbstractMatrix; mean=nothing, dims::Int=1) + return error("cov is not defined for $(typeof(ce)) and $(typeof(X))") +end -cov(ce::CovarianceEstimator, X::AbstractMatrix, w::AbstractWeights; mean=nothing, dims::Int=1) = - error("cov is not defined for $(typeof(ce)), $(typeof(X)) and $(typeof(w))") +function cov(ce::CovarianceEstimator, X::AbstractMatrix, w::AbstractWeights; mean=nothing, + dims::Int=1) + return error("cov is not defined for $(typeof(ce)), $(typeof(X)) and $(typeof(w))") +end """ var(ce::CovarianceEstimator, x::AbstractVector; mean=nothing) @@ -349,29 +359,30 @@ where `x`, `y` are vectors, `X` is a matrix and `w` is a weighting vector. """ struct SimpleCovariance <: CovarianceEstimator corrected::Bool - SimpleCovariance(;corrected::Bool=false) = new(corrected) + SimpleCovariance(; corrected::Bool=false) = new(corrected) end -cov(sc::SimpleCovariance, x::AbstractVector) = - cov(x; corrected=sc.corrected) +cov(sc::SimpleCovariance, x::AbstractVector) = cov(x; corrected=sc.corrected) -cov(sc::SimpleCovariance, x::AbstractVector, y::AbstractVector) = - cov(x, y; corrected=sc.corrected) +function cov(sc::SimpleCovariance, x::AbstractVector, y::AbstractVector) + return cov(x, y; corrected=sc.corrected) +end function cov(sc::SimpleCovariance, X::AbstractMatrix; dims::Int=1, mean=nothing) dims ∈ (1, 2) || throw(ArgumentError("Argument dims can only be 1 or 2 (given: $dims)")) if mean === nothing return cov(X; dims=dims, corrected=sc.corrected) else - return covm(X, mean, dims, corrected=sc.corrected) + return covm(X, mean, dims; corrected=sc.corrected) end end -function cov(sc::SimpleCovariance, X::AbstractMatrix, w::AbstractWeights; dims::Int=1, mean=nothing) +function cov(sc::SimpleCovariance, X::AbstractMatrix, w::AbstractWeights; dims::Int=1, + mean=nothing) dims ∈ (1, 2) || throw(ArgumentError("Argument dims can only be 1 or 2 (given: $dims)")) if mean === nothing - return cov(X, w, dims, corrected=sc.corrected) + return cov(X, w, dims; corrected=sc.corrected) else - return covm(X, mean, w, dims, corrected=sc.corrected) + return covm(X, mean, w, dims; corrected=sc.corrected) end end diff --git a/src/deprecates.jl b/src/deprecates.jl index 16810af13..f5635a790 100644 --- a/src/deprecates.jl +++ b/src/deprecates.jl @@ -4,8 +4,9 @@ if !isdefined(Base, :stderr) @deprecate stderr(obj::StatisticalModel) stderror(obj) else function (io::typeof(stderr))(obj::StatisticalModel) - Base.depwarn("stderr(obj::StatisticalModel) is deprecated, use stderror(obj) instead", :stderr) - io === stderr ? stderror(obj) : throw(MethodError(io, (obj,))) + Base.depwarn("stderr(obj::StatisticalModel) is deprecated, use stderror(obj) instead", + :stderr) + return io === stderr ? stderror(obj) : throw(MethodError(io, (obj,))) end end @@ -14,43 +15,73 @@ end @deprecate norepeats(a::AbstractArray) allunique(a) @deprecate(mad!(v::AbstractArray{<:Real}, center; - constant::Real = BigFloat("1.482602218505601860547076529360423431326703202590312896536266275245674447622701")), + constant::Real=BigFloat("1.482602218505601860547076529360423431326703202590312896536266275245674447622701")), mad!(v, center=center, constant=constant)) ### Deprecated January 2019 @deprecate scattermatm(x::DenseMatrix, mean, dims::Int) scattermat(x, mean=mean, dims=dims) -@deprecate scattermatm(x::DenseMatrix, mean, wv::AbstractWeights, dims::Int) scattermat(x, wv, mean=mean, dims=dims) +@deprecate scattermatm(x::DenseMatrix, mean, wv::AbstractWeights, dims::Int) scattermat(x, + wv, + mean=mean, + dims=dims) @deprecate scattermat(x::DenseMatrix, dims::Int) scattermat(x, dims=dims) -@deprecate scattermat(x::DenseMatrix, wv::AbstractWeights, dims::Int) scattermat(x, wv, dims=dims) -@deprecate mean!(R::AbstractArray, A::AbstractArray, w::AbstractWeights, dims::Int) mean!(R, A, w, dims=dims) -@deprecate mean(A::AbstractArray{T}, w::AbstractWeights{W}, dims::Int) where {T<:Number,W<:Real} mean(A, w, dims=dims) - -@deprecate wquantile(v::AbstractVector{<:Real}, w::AbstractWeights{<:Real}, p::AbstractVector{<:Real}) quantile(v, w, p) -@deprecate wquantile(v::AbstractVector{<:Real}, w::AbstractWeights{<:Real}, p::Number) quantile(v, w, [p])[1] -@deprecate wquantile(v::AbstractVector{<:Real}, w::AbstractVector{<:Real}, p::AbstractVector{<:Real}) quantile(v, pweights(w), p) -@deprecate wquantile(v::AbstractVector{<:Real}, w::AbstractVector{<:Real}, p::Number) quantile(v, pweights(w), [p])[1] +@deprecate scattermat(x::DenseMatrix, wv::AbstractWeights, dims::Int) scattermat(x, wv, + dims=dims) +@deprecate mean!(R::AbstractArray, A::AbstractArray, w::AbstractWeights, dims::Int) mean!(R, + A, + w, + dims=dims) +@deprecate mean(A::AbstractArray{T}, w::AbstractWeights{W}, + dims::Int) where {T<:Number,W<:Real} mean(A, w, dims=dims) + +@deprecate wquantile(v::AbstractVector{<:Real}, w::AbstractWeights{<:Real}, + p::AbstractVector{<:Real}) quantile(v, w, p) +@deprecate wquantile(v::AbstractVector{<:Real}, w::AbstractWeights{<:Real}, p::Number) quantile(v, + w, + [p])[1] +@deprecate wquantile(v::AbstractVector{<:Real}, w::AbstractVector{<:Real}, + p::AbstractVector{<:Real}) quantile(v, pweights(w), p) +@deprecate wquantile(v::AbstractVector{<:Real}, w::AbstractVector{<:Real}, p::Number) quantile(v, + pweights(w), + [p])[1] @deprecate wmedian(v::AbstractVector{<:Real}, w::AbstractWeights{<:Real}) median(v, w) -@deprecate wmedian(v::AbstractVector{<:Real}, w::AbstractVector{<:Real}) median(v, weights(w)) +@deprecate wmedian(v::AbstractVector{<:Real}, w::AbstractVector{<:Real}) median(v, + weights(w)) -@deprecate quantile(v::AbstractArray{<:Real}) quantile(v, [.0, .25, .5, .75, 1.0]) +@deprecate quantile(v::AbstractArray{<:Real}) quantile(v, [0.0, 0.25, 0.5, 0.75, 1.0]) ### Deprecated September 2019 @deprecate sum(A::AbstractArray, w::AbstractWeights, dims::Int) sum(A, w, dims=dims) @deprecate values(wv::AbstractWeights) convert(Vector, wv) ### Deprecated November 2021 -@deprecate stdm(x::AbstractArray{<:Real}, w::AbstractWeights, m::Real; corrected::Union{Bool, Nothing}=nothing) std(x, w, mean=m, corrected=corrected) false -@deprecate varm(x::AbstractArray{<:Real}, w::AbstractWeights, m::Real; corrected::Union{Bool, Nothing}=nothing) var(x, w, mean=m, corrected=corrected) false -@deprecate stdm(x::AbstractArray{<:Real}, w::AbstractWeights, m::AbstractArray{<:Real}, dim::Int; corrected::Union{Bool, Nothing}=nothing) std(x, w, dim, mean=m, corrected=corrected) false -@deprecate varm(x::AbstractArray{<:Real}, w::AbstractWeights, m::AbstractArray{<:Real}, dim::Int; corrected::Union{Bool, Nothing}=nothing) var(x, w, dim, mean=m, corrected=corrected) false -@deprecate varm!(R::AbstractArray, x::AbstractArray{<:Real}, w::AbstractWeights, m::AbstractArray{<:Real}, dim::Int; corrected::Union{Bool, Nothing}=nothing) var!(R, x, w, dim, mean=m, corrected=corrected) false +@deprecate stdm(x::AbstractArray{<:Real}, w::AbstractWeights, m::Real; + corrected::Union{Bool,Nothing}=nothing) std(x, w, mean=m, + corrected=corrected) false +@deprecate varm(x::AbstractArray{<:Real}, w::AbstractWeights, m::Real; + corrected::Union{Bool,Nothing}=nothing) var(x, w, mean=m, + corrected=corrected) false +@deprecate stdm(x::AbstractArray{<:Real}, w::AbstractWeights, m::AbstractArray{<:Real}, + dim::Int; corrected::Union{Bool,Nothing}=nothing) std(x, w, dim, mean=m, + corrected=corrected) false +@deprecate varm(x::AbstractArray{<:Real}, w::AbstractWeights, m::AbstractArray{<:Real}, + dim::Int; corrected::Union{Bool,Nothing}=nothing) var(x, w, dim, mean=m, + corrected=corrected) false +@deprecate varm!(R::AbstractArray, x::AbstractArray{<:Real}, w::AbstractWeights, + m::AbstractArray{<:Real}, dim::Int; corrected::Union{Bool,Nothing}=nothing) var!(R, + x, + w, + dim, + mean=m, + corrected=corrected) false ### This was never part of the public API ### Deprecated April 2024 function make_alias_table!(w::AbstractVector, wsum, a::AbstractVector{Float64}, alias::AbstractVector{Int}) - Base.depwarn("make_alias_table! is both internal and deprecated, use AliasTables.jl instead", :make_alias_table!) + Base.depwarn("make_alias_table! is both internal and deprecated, use AliasTables.jl instead", + :make_alias_table!) # Arguments: # # w [in]: input weights @@ -70,7 +101,7 @@ function make_alias_table!(w::AbstractVector, wsum, throw(DimensionMismatch("Inconsistent array lengths.")) ac = n / wsum - for i = 1:n + for i in 1:n a[i] = w[i] * ac end @@ -79,7 +110,7 @@ function make_alias_table!(w::AbstractVector, wsum, kl = 0 # actual number of larges ks = 0 # actual number of smalls - for i = 1:n + for i in 1:n ai = a[i] if ai > 1.0 larges[kl+=1] = i # push to larges @@ -89,8 +120,10 @@ function make_alias_table!(w::AbstractVector, wsum, end while kl > 0 && ks > 0 - s = smalls[ks]; ks -= 1 # pop from smalls - l = larges[kl]; kl -= 1 # pop from larges + s = smalls[ks]; + ks -= 1 # pop from smalls + l = larges[kl]; + kl -= 1 # pop from larges alias[s] = l al = a[l] = (a[l] - 1.0) + a[s] if al > 1.0 @@ -101,8 +134,8 @@ function make_alias_table!(w::AbstractVector, wsum, end # this loop should be redundant, except for rounding - for i = 1:ks + for i in 1:ks a[smalls[i]] = 1.0 end - nothing + return nothing end diff --git a/src/deviation.jl b/src/deviation.jl index cfbf96c49..dc07b23b5 100644 --- a/src/deviation.jl +++ b/src/deviation.jl @@ -20,7 +20,6 @@ function counteq(a::AbstractArray, b::AbstractArray) return c end - """ countne(a, b) @@ -39,7 +38,6 @@ function countne(a::AbstractArray, b::AbstractArray) return c end - """ sqL2dist(a, b) @@ -58,7 +56,6 @@ function sqL2dist(a::AbstractArray{<:Number}, b::AbstractArray{<:Number}) return r end - # L2 distance """ L2dist(a, b) @@ -68,7 +65,6 @@ Efficient equivalent of `sqrt(sum(abs2, a - b))`. """ L2dist(a::AbstractArray{<:Number}, b::AbstractArray{<:Number}) = sqrt(sqL2dist(a, b)) - # L1 distance """ L1dist(a, b) @@ -88,7 +84,6 @@ function L1dist(a::AbstractArray{<:Number}, b::AbstractArray{<:Number}) return r end - # Linf distance """ Linfdist(a, b) @@ -109,7 +104,6 @@ function Linfdist(a::AbstractArray{<:Number}, b::AbstractArray{<:Number}) return r end - # Generalized KL-divergence """ gkldiv(a, b) @@ -134,7 +128,6 @@ function gkldiv(a::AbstractArray{<:Real}, b::AbstractArray{<:Real}) return r end - # MeanAD: mean absolute deviation """ meanad(a, b) @@ -143,7 +136,6 @@ Return the mean absolute deviation between two arrays: `mean(abs, a - b)`. """ meanad(a::AbstractArray{<:Number}, b::AbstractArray{<:Number}) = L1dist(a, b) / length(a) - # MaxAD: maximum absolute deviation """ maxad(a, b) @@ -152,7 +144,6 @@ Return the maximum absolute deviation between two arrays: `maxabs(a - b)`. """ maxad(a::AbstractArray{<:Number}, b::AbstractArray{<:Number}) = Linfdist(a, b) - # MSD: mean squared deviation """ msd(a, b) @@ -161,7 +152,6 @@ Return the mean squared deviation between two arrays: `mean(abs2, a - b)`. """ msd(a::AbstractArray{<:Number}, b::AbstractArray{<:Number}) = sqL2dist(a, b) / length(a) - # RMSD: root mean squared deviation """ rmsd(a, b; normalize=false) @@ -180,7 +170,6 @@ function rmsd(a::AbstractArray{<:Number}, b::AbstractArray{<:Number}; normalize: end end - # PSNR: peak signal-to-noise ratio """ psnr(a, b, maxv) diff --git a/src/empirical.jl b/src/empirical.jl index 45f985468..bff5c82bb 100644 --- a/src/empirical.jl +++ b/src/empirical.jl @@ -2,7 +2,7 @@ ## Empirical CDF -struct ECDF{T <: AbstractVector{<:Real}, W <: AbstractWeights{<:Real}} +struct ECDF{T<:AbstractVector{<:Real},W<:AbstractWeights{<:Real}} sorted_values::T weights::W end @@ -13,7 +13,7 @@ function (ecdf::ECDF)(x::Real) evenweights = isempty(ecdf.weights) weightsum = evenweights ? length(ecdf.sorted_values) : sum(ecdf.weights) partialsum = evenweights ? n : sum(view(ecdf.weights, 1:n)) - partialsum / weightsum + return partialsum / weightsum end function (ecdf::ECDF)(v::AbstractVector{<:Real}) @@ -55,10 +55,11 @@ function is inside the interval ``(0,1)``; the function is defined for the whole """ function ecdf(X::AbstractVector{<:Real}; weights::AbstractVector{<:Real}=Weights(Float64[])) any(isnan, X) && throw(ArgumentError("ecdf can not include NaN values")) - isempty(weights) || length(X) == length(weights) || throw(ArgumentError("data and weight vectors must be the same size," * - "got $(length(X)) and $(length(weights))")) + isempty(weights) || length(X) == length(weights) || + throw(ArgumentError("data and weight vectors must be the same size," * + "got $(length(X)) and $(length(weights))")) ord = sortperm(X) - ECDF(X[ord], isempty(weights) ? weights : Weights(weights[ord])) + return ECDF(X[ord], isempty(weights) ? weights : Weights(weights[ord])) end minimum(ecdf::ECDF) = first(ecdf.sorted_values) diff --git a/src/hist.jl b/src/hist.jl index b2e696938..03b1137a0 100644 --- a/src/hist.jl +++ b/src/hist.jl @@ -3,28 +3,27 @@ using Base.Cartesian import Base: show, ==, push!, append!, float import LinearAlgebra: norm, normalize, normalize! - ## Fast getindex function for multiple arrays, returns a tuple of array elements -@inline Base.@propagate_inbounds @generated function _multi_getindex(i::Union{Integer, CartesianIndex}, c::AbstractArray...) +@inline Base.@propagate_inbounds @generated function _multi_getindex(i::Union{Integer, + CartesianIndex}, + c::AbstractArray...) N = length(c) result_expr = Expr(:tuple) for j in 1:N push!(result_expr.args, :(c[$j][i])) end - result_expr + return result_expr end - # Need a generated function to promote edge types, because a simple # promote_type(map(eltype, h.edges)...) isn't type stable (tested # with Julia v0.5). -@generated function _promote_edge_types(edges::NTuple{N,AbstractVector}) where N - promote_type(map(eltype, edges.parameters)...) +@generated function _promote_edge_types(edges::NTuple{N,AbstractVector}) where {N} + return promote_type(map(eltype, edges.parameters)...) end - ## nice-valued ranges for histograms -function histrange(v::AbstractArray{T}, n::Integer, closed::Symbol=:left) where T +function histrange(v::AbstractArray{T}, n::Integer, closed::Symbol=:left) where {T} F = float(T) nv = length(v) if nv == 0 && n < 0 @@ -36,10 +35,10 @@ function histrange(v::AbstractArray{T}, n::Integer, closed::Symbol=:left) where end lo, hi = extrema(v) - histrange(F(lo), F(hi), n, closed) + return histrange(F(lo), F(hi), n, closed) end -function histrange(lo::F, hi::F, n::Integer, closed::Symbol=:left) where F +function histrange(lo::F, hi::F, n::Integer, closed::Symbol=:left) where {F} if hi == lo start = F(hi) step = one(F) @@ -96,22 +95,23 @@ function histrange(lo::F, hi::F, n::Integer, closed::Symbol=:left) where F len += one(F) end end - StepRangeLen(Base.TwicePrecision{Float64}((start, divisor)), - Base.TwicePrecision{Float64}((step, divisor)), - Int(len)) + return StepRangeLen(Base.TwicePrecision{Float64}((start, divisor)), + Base.TwicePrecision{Float64}((step, divisor)), + Int(len)) end -histrange(vs::NTuple{N,AbstractVector},nbins::NTuple{N,Integer},closed::Symbol) where {N} = - map((v,n) -> histrange(v,n,closed),vs,nbins) -histrange(vs::NTuple{N,AbstractVector},nbins::Integer,closed::Symbol) where {N} = - map(v -> histrange(v,nbins,closed),vs) - - +function histrange(vs::NTuple{N,AbstractVector}, nbins::NTuple{N,Integer}, + closed::Symbol) where {N} + return map((v, n) -> histrange(v, n, closed), vs, nbins) +end +function histrange(vs::NTuple{N,AbstractVector}, nbins::Integer, closed::Symbol) where {N} + return map(v -> histrange(v, nbins, closed), vs) +end ## histograms ## function sturges(n) # Sturges' formula n==0 && return one(n) - ceil(Integer, log2(n))+1 + return ceil(Integer, log2(n))+1 end abstract type AbstractHistogram{T<:Real,N,E} end @@ -191,50 +191,58 @@ mutable struct Histogram{T<:Real,N,E} <: AbstractHistogram{T,N,E} function Histogram{T,N,E}(edges::NTuple{N,AbstractArray}, weights::Array{T,N}, closed::Symbol, isdensity::Bool=false) where {T,N,E} closed == :right || closed == :left || error("closed must :left or :right") - isdensity && !(T <: AbstractFloat) && error("Density histogram must have float-type weights") - _edges_nbins(edges) == size(weights) || error("Histogram edge vectors must be 1 longer than corresponding weight dimensions") + isdensity && !(T <: AbstractFloat) && + error("Density histogram must have float-type weights") + _edges_nbins(edges) == size(weights) || + error("Histogram edge vectors must be 1 longer than corresponding weight dimensions") # We do not handle -0.0 in ranges correctly in `binindex` for performance # Constructing ranges starting or ending with -0.0 is very hard, # and ranges containing -0.0 elsewhere virtually impossible, # but check this just in case as it is cheap foreach(edges) do e - e isa AbstractRange && any(isequal(-0.0), e) && - throw(ArgumentError("ranges containing -0.0 not allowed in edges")) + return e isa AbstractRange && any(isequal(-0.0), e) && + throw(ArgumentError("ranges containing -0.0 not allowed in edges")) end - new{T,N,E}(edges,weights,closed,isdensity) + return new{T,N,E}(edges, weights, closed, isdensity) end end -Histogram(edges::NTuple{N,AbstractVector}, weights::AbstractArray{T,N}, - closed::Symbol=:left, isdensity::Bool=false) where {T,N} = - Histogram{T,N,typeof(edges)}(edges,weights,closed,isdensity) +function Histogram(edges::NTuple{N,AbstractVector}, weights::AbstractArray{T,N}, + closed::Symbol=:left, isdensity::Bool=false) where {T,N} + return Histogram{T,N,typeof(edges)}(edges, weights, closed, isdensity) +end -Histogram(edges::NTuple{N,AbstractVector}, ::Type{T}, closed::Symbol=:left, - isdensity::Bool=false) where {T,N} = - Histogram(edges,zeros(T,_edges_nbins(edges)...),closed,isdensity) +function Histogram(edges::NTuple{N,AbstractVector}, ::Type{T}, closed::Symbol=:left, + isdensity::Bool=false) where {T,N} + return Histogram(edges, zeros(T, _edges_nbins(edges)...), closed, isdensity) +end -Histogram(edges::NTuple{N,AbstractVector}, closed::Symbol=:left, - isdensity::Bool=false) where {N} = - Histogram(edges,Int,closed,isdensity) +function Histogram(edges::NTuple{N,AbstractVector}, closed::Symbol=:left, + isdensity::Bool=false) where {N} + return Histogram(edges, Int, closed, isdensity) +end function show(io::IO, h::AbstractHistogram) println(io, typeof(h)) - println(io,"edges:") + println(io, "edges:") for e in h.edges - println(io," ",e) + println(io, " ", e) end - println(io,"weights: ",h.weights) - println(io,"closed: ",h.closed) - print(io,"isdensity: ",h.isdensity) + println(io, "weights: ", h.weights) + println(io, "closed: ", h.closed) + return print(io, "isdensity: ", h.isdensity) end -(==)(h1::Histogram,h2::Histogram) = (==)(h1.edges,h2.edges) && (==)(h1.weights,h2.weights) && (==)(h1.closed,h2.closed) && (==)(h1.isdensity,h2.isdensity) - +function (==)(h1::Histogram, h2::Histogram) + return (==)(h1.edges, h2.edges) && (==)(h1.weights, h2.weights) && + (==)(h1.closed, h2.closed) && (==)(h1.isdensity, h2.isdensity) +end binindex(h::AbstractHistogram{T,1}, x::Real) where {T} = binindex(h, (x,))[1] -binindex(h::Histogram{T,N}, xs::NTuple{N,Real}) where {T,N} = - map((edge, x) -> _edge_binindex(edge, h.closed, x), h.edges, xs) +function binindex(h::Histogram{T,N}, xs::NTuple{N,Real}) where {T,N} + return map((edge, x) -> _edge_binindex(edge, h.closed, x), h.edges, xs) +end _normalize_zero(x::AbstractFloat) = isequal(x, -0.0) ? zero(x) : x _normalize_zero(x::Any) = x @@ -258,109 +266,140 @@ end end end - binvolume(h::AbstractHistogram{T,1}, binidx::Integer) where {T} = binvolume(h, (binidx,)) -binvolume(::Type{V}, h::AbstractHistogram{T,1}, binidx::Integer) where {V,T} = binvolume(V, h, (binidx,)) +function binvolume(::Type{V}, h::AbstractHistogram{T,1}, binidx::Integer) where {V,T} + return binvolume(V, h, (binidx,)) +end -binvolume(h::Histogram{T,N}, binidx::NTuple{N,Integer}) where {T,N} = - binvolume(_promote_edge_types(h.edges), h, binidx) +function binvolume(h::Histogram{T,N}, binidx::NTuple{N,Integer}) where {T,N} + return binvolume(_promote_edge_types(h.edges), h, binidx) +end -binvolume(::Type{V}, h::Histogram{T,N}, binidx::NTuple{N,Integer}) where {V,T,N} = - prod(map((edge, i) -> _edge_binvolume(V, edge, i), h.edges, binidx)) +function binvolume(::Type{V}, h::Histogram{T,N}, binidx::NTuple{N,Integer}) where {V,T,N} + return prod(map((edge, i) -> _edge_binvolume(V, edge, i), h.edges, binidx)) +end -@inline _edge_binvolume(::Type{V}, edge::AbstractVector, i::Integer) where {V} = V(edge[i+1]) - V(edge[i]) +@inline _edge_binvolume(::Type{V}, edge::AbstractVector, i::Integer) where {V} = V(edge[i+1]) - + V(edge[i]) @inline _edge_binvolume(::Type{V}, edge::AbstractRange, i::Integer) where {V} = V(step(edge)) -@inline _edge_binvolume(edge::AbstractVector, i::Integer) = _edge_binvolume(eltype(edge), edge, i) - +@inline _edge_binvolume(edge::AbstractVector, i::Integer) = _edge_binvolume(eltype(edge), + edge, i) @inline _edges_nbins(edges::NTuple{N,AbstractVector}) where {N} = map(_edge_nbins, edges) @inline _edge_nbins(edge::AbstractVector) = length(edge) - 1 - # 1-dimensional -Histogram(edge::AbstractVector, weights::AbstractVector{T}, closed::Symbol=:left, isdensity::Bool=false) where {T} = - Histogram((edge,), weights, closed, isdensity) - -Histogram(edge::AbstractVector, ::Type{T}, closed::Symbol=:left, isdensity::Bool=false) where {T} = - Histogram((edge,), T, closed, isdensity) +function Histogram(edge::AbstractVector, weights::AbstractVector{T}, closed::Symbol=:left, + isdensity::Bool=false) where {T} + return Histogram((edge,), weights, closed, isdensity) +end -Histogram(edge::AbstractVector, closed::Symbol=:left, isdensity::Bool=false) = - Histogram((edge,), closed, isdensity) +function Histogram(edge::AbstractVector, ::Type{T}, closed::Symbol=:left, + isdensity::Bool=false) where {T} + return Histogram((edge,), T, closed, isdensity) +end +function Histogram(edge::AbstractVector, closed::Symbol=:left, isdensity::Bool=false) + return Histogram((edge,), closed, isdensity) +end push!(h::AbstractHistogram{T,1}, x::Real, w::Real) where {T} = push!(h, (x,), w) -push!(h::AbstractHistogram{T,1}, x::Real) where {T} = push!(h,x,one(T)) +push!(h::AbstractHistogram{T,1}, x::Real) where {T} = push!(h, x, one(T)) append!(h::AbstractHistogram{T,1}, v::AbstractVector) where {T} = append!(h, (v,)) -append!(h::AbstractHistogram{T,1}, v::AbstractVector, wv::Union{AbstractVector,AbstractWeights}) where {T} = append!(h, (v,), wv) +function append!(h::AbstractHistogram{T,1}, v::AbstractVector, + wv::Union{AbstractVector,AbstractWeights}) where {T} + return append!(h, (v,), wv) +end -fit(::Type{Histogram{T}},v::AbstractVector, edg::AbstractVector; closed::Symbol=:left) where {T} = - fit(Histogram{T},(v,), (edg,), closed=closed) -fit(::Type{Histogram{T}},v::AbstractVector; closed::Symbol=:left, nbins=sturges(length(v))) where {T} = - fit(Histogram{T},(v,); closed=closed, nbins=nbins) -fit(::Type{Histogram{T}},v::AbstractVector, wv::AbstractWeights, edg::AbstractVector; closed::Symbol=:left) where {T} = - fit(Histogram{T},(v,), wv, (edg,), closed=closed) -fit(::Type{Histogram{T}},v::AbstractVector, wv::AbstractWeights; closed::Symbol=:left, nbins=sturges(length(v))) where {T} = - fit(Histogram{T}, (v,), wv; closed=closed, nbins=nbins) +function fit(::Type{Histogram{T}}, v::AbstractVector, edg::AbstractVector; + closed::Symbol=:left) where {T} + return fit(Histogram{T}, (v,), (edg,); closed=closed) +end +function fit(::Type{Histogram{T}}, v::AbstractVector; closed::Symbol=:left, + nbins=sturges(length(v))) where {T} + return fit(Histogram{T}, (v,); closed=closed, nbins=nbins) +end +function fit(::Type{Histogram{T}}, v::AbstractVector, wv::AbstractWeights, + edg::AbstractVector; closed::Symbol=:left) where {T} + return fit(Histogram{T}, (v,), wv, (edg,); closed=closed) +end +function fit(::Type{Histogram{T}}, v::AbstractVector, wv::AbstractWeights; + closed::Symbol=:left, nbins=sturges(length(v))) where {T} + return fit(Histogram{T}, (v,), wv; closed=closed, nbins=nbins) +end -fit(::Type{Histogram}, v::AbstractVector, wv::AbstractWeights{W}, args...; kwargs...) where {W} = fit(Histogram{W}, v, wv, args...; kwargs...) +function fit(::Type{Histogram}, v::AbstractVector, wv::AbstractWeights{W}, args...; + kwargs...) where {W} + return fit(Histogram{W}, v, wv, args...; kwargs...) +end # N-dimensional -function push!(h::Histogram{T,N},xs::NTuple{N,Real},w::Real) where {T,N} +function push!(h::Histogram{T,N}, xs::NTuple{N,Real}, w::Real) where {T,N} h.isdensity && error("Density histogram must have float-type weights") idx = binindex(h, xs) if checkbounds(Bool, h.weights, idx...) h.weights[idx...] += w end - h + return h end -function push!(h::Histogram{T,N},xs::NTuple{N,Real},w::Real) where {T<:AbstractFloat,N} +function push!(h::Histogram{T,N}, xs::NTuple{N,Real}, w::Real) where {T<:AbstractFloat,N} idx = binindex(h, xs) if checkbounds(Bool, h.weights, idx...) h.weights[idx...] += h.isdensity ? w / binvolume(h, idx) : w end - h + return h end -push!(h::AbstractHistogram{T,N},xs::NTuple{N,Real}) where {T,N} = push!(h,xs,one(T)) - +push!(h::AbstractHistogram{T,N}, xs::NTuple{N,Real}) where {T,N} = push!(h, xs, one(T)) function append!(h::AbstractHistogram{T,N}, vs::NTuple{N,AbstractVector}) where {T,N} for i in eachindex(vs...) xs = _multi_getindex(i, vs...) push!(h, xs, one(T)) end - h + return h end -function append!(h::AbstractHistogram{T,N}, vs::NTuple{N,AbstractVector}, wv::AbstractVector) where {T,N} +function append!(h::AbstractHistogram{T,N}, vs::NTuple{N,AbstractVector}, + wv::AbstractVector) where {T,N} for i in eachindex(wv, vs...) xs = _multi_getindex(i, vs...) push!(h, xs, wv[i]) end - h + return h end # Turn kwargs nbins into a type-stable tuple of integers: -function _nbins_tuple(vs::NTuple{N,AbstractVector}, nbins) where N +function _nbins_tuple(vs::NTuple{N,AbstractVector}, nbins) where {N} template = map(length, vs) result = broadcast((t, x) -> typeof(t)(x), template, nbins) - result::typeof(template) + return result::typeof(template) end -fit(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, edges::NTuple{N,AbstractVector}; closed::Symbol=:left) where {T,N} = - append!(Histogram(edges, T, closed, false), vs) +function fit(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, + edges::NTuple{N,AbstractVector}; closed::Symbol=:left) where {T,N} + return append!(Histogram(edges, T, closed, false), vs) +end -fit(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}; closed::Symbol=:left, nbins=sturges(length(vs[1]))) where {T,N} = - fit(Histogram{T}, vs, histrange(vs,_nbins_tuple(vs, nbins),closed); closed=closed) +function fit(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}; closed::Symbol=:left, + nbins=sturges(length(vs[1]))) where {T,N} + return fit(Histogram{T}, vs, histrange(vs, _nbins_tuple(vs, nbins), closed); + closed=closed) +end -fit(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, wv::AbstractWeights{W}, edges::NTuple{N,AbstractVector}; closed::Symbol=:left) where {T,N,W} = - append!(Histogram(edges, T, closed, false), vs, wv) +function fit(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, wv::AbstractWeights{W}, + edges::NTuple{N,AbstractVector}; closed::Symbol=:left) where {T,N,W} + return append!(Histogram(edges, T, closed, false), vs, wv) +end -fit(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, wv::AbstractWeights; closed::Symbol=:left, nbins=sturges(length(vs[1]))) where {T,N} = - fit(Histogram{T}, vs, wv, histrange(vs,_nbins_tuple(vs, nbins),closed); closed=closed) +function fit(::Type{Histogram{T}}, vs::NTuple{N,AbstractVector}, wv::AbstractWeights; + closed::Symbol=:left, nbins=sturges(length(vs[1]))) where {T,N} + return fit(Histogram{T}, vs, wv, histrange(vs, _nbins_tuple(vs, nbins), closed); + closed=closed) +end """ fit(Histogram, data[, weight][, edges]; closed=:left[, nbins]) @@ -411,52 +450,49 @@ h = fit(Histogram, (rand(100),rand(100)),nbins=10) ``` """ fit(::Type{Histogram}, args...; kwargs...) = fit(Histogram{Int}, args...; kwargs...) -fit(::Type{Histogram}, vs::NTuple{N,AbstractVector}, wv::AbstractWeights{W}, args...; kwargs...) where {N,W} = fit(Histogram{W}, vs, wv, args...; kwargs...) - +function fit(::Type{Histogram}, vs::NTuple{N,AbstractVector}, wv::AbstractWeights{W}, + args...; kwargs...) where {N,W} + return fit(Histogram{W}, vs, wv, args...; kwargs...) +end # Get a suitable high-precision type for the norm of a histogram. -norm_type(h::Histogram{T,N}) where {T,N} = - promote_type(T, _promote_edge_types(h.edges)) +norm_type(h::Histogram{T,N}) where {T,N} = promote_type(T, _promote_edge_types(h.edges)) norm_type(::Type{T}) where {T<:Integer} = promote_type(T, Int64) norm_type(::Type{T}) where {T<:AbstractFloat} = promote_type(T, Float64) - """ norm(h::Histogram) Calculate the norm of histogram `h` as the absolute value of its integral. """ @generated function norm(h::Histogram{T,N}) where {T,N} - quote + return quote edges = h.edges weights = h.weights SumT = norm_type(h) v_0 = 1 s_0 = zero(SumT) - @nloops( - $N, i, weights, - d -> begin - v_{$N-d+1} = v_{$N-d} * _edge_binvolume(SumT, edges[d], i_d) - s_{$N-d+1} = zero(SumT) - end, - d -> begin - s_{$N-d} += s_{$N-d+1} - end, - begin - $(Symbol("s_$(N)")) += (@nref $N weights i) * $(Symbol("v_$N")) - end - ) + @nloops($N, i, weights, + d -> begin + v_{$N-d+1} = v_{$N-d} * _edge_binvolume(SumT, edges[d], i_d) + s_{$N-d+1} = zero(SumT) + end, + d -> begin + s_{$N-d} += s_{$N-d+1} + end, + begin + $(Symbol("s_$(N)")) += (@nref $N weights i) * $(Symbol("v_$N")) + end) s_0 end end - float(h::Histogram{T,N}) where {T<:AbstractFloat,N} = h -float(h::Histogram{T,N}) where {T,N} = Histogram(h.edges, float(h.weights), h.closed, h.isdensity) - - +function float(h::Histogram{T,N}) where {T,N} + return Histogram(h.edges, float(h.weights), h.closed, h.isdensity) +end """ normalize!(h::Histogram{T,N}, aux_weights::Array{T,N}...; @@ -465,13 +501,15 @@ float(h::Histogram{T,N}) where {T,N} = Histogram(h.edges, float(h.weights), h.cl Normalize the histogram `h` and optionally scale one or more auxiliary weight arrays appropriately. See description of `normalize` for details. Returns `h`. """ -@generated function normalize!(h::Histogram{T,N}, aux_weights::Array{T,N}...; mode::Symbol=:pdf) where {T<:AbstractFloat,N} - quote +@generated function normalize!(h::Histogram{T,N}, aux_weights::Array{T,N}...; + mode::Symbol=:pdf) where {T<:AbstractFloat,N} + return quote edges = h.edges weights = h.weights for A in aux_weights - (size(A) != size(weights)) && throw(DimensionMismatch("aux_weights must have same size as histogram weights")) + (size(A) != size(weights)) && + throw(DimensionMismatch("aux_weights must have same size as histogram weights")) end if mode == :none @@ -493,7 +531,9 @@ arrays appropriately. See description of `normalize` for details. Returns `h`. # Divide weights by bin volume, for :pdf also divide by sum of weights SumT = norm_type(h) vs_0 = (mode == :pdf) ? sum(SumT, weights) : one(SumT) - @nloops $N i weights d->(vs_{$N-d+1} = vs_{$N-d} * _edge_binvolume(SumT, edges[d], i_d)) begin + @nloops $N i weights d->(vs_{$N-d+1} = vs_{$N-d} * + _edge_binvolume(SumT, edges[d], + i_d)) begin (@nref $N weights i) /= $(Symbol("vs_$N")) for A in aux_weights (@nref $N A i) /= $(Symbol("vs_$N")) @@ -516,7 +556,6 @@ arrays appropriately. See description of `normalize` for details. Returns `h`. end end - """ normalize(h::Histogram{T,N}; mode::Symbol=:pdf) where {T,N} @@ -538,9 +577,9 @@ Valid values for `mode` are: Successive application of both `:probability` and `:density` normalization (in any order) is equivalent to `:pdf` normalization. """ -normalize(h::Histogram{T,N}; mode::Symbol=:pdf) where {T,N} = - normalize!(deepcopy(float(h)), mode = mode) - +function normalize(h::Histogram{T,N}; mode::Symbol=:pdf) where {T,N} + return normalize!(deepcopy(float(h)); mode=mode) +end """ normalize(h::Histogram{T,N}, aux_weights::Array{T,N}...; mode::Symbol=:pdf) where {T,N} @@ -551,14 +590,14 @@ uncertainties). The values of the auxiliary arrays are scaled by the same factor as the corresponding histogram weight values. Returns a tuple of the normalized histogram and scaled auxiliary weights. """ -function normalize(h::Histogram{T,N}, aux_weights::Array{T,N}...; mode::Symbol=:pdf) where {T,N} +function normalize(h::Histogram{T,N}, aux_weights::Array{T,N}...; + mode::Symbol=:pdf) where {T,N} h_fltcp = deepcopy(float(h)) aux_weights_fltcp = map(x -> deepcopy(float(x)), aux_weights) - normalize!(h_fltcp, aux_weights_fltcp..., mode = mode) - (h_fltcp, aux_weights_fltcp...) + normalize!(h_fltcp, aux_weights_fltcp...; mode=mode) + return (h_fltcp, aux_weights_fltcp...) end - """ zero(h::Histogram) @@ -566,9 +605,9 @@ Create a new histogram with the same binning, type and shape of weights and the same properties (`closed` and `isdensity`) as `h`, with all weights set to zero. """ -Base.zero(h::Histogram{T,N,E}) where {T,N,E} = - Histogram{T,N,E}(deepcopy(h.edges), zero(h.weights), h.closed, h.isdensity) - +function Base.zero(h::Histogram{T,N,E}) where {T,N,E} + return Histogram{T,N,E}(deepcopy(h.edges), zero(h.weights), h.closed, h.isdensity) +end """ merge!(target::Histogram, others::Histogram...) @@ -578,18 +617,21 @@ Update histogram `target` by merging it with the histograms `others`. See """ function Base.merge!(target::Histogram, others::Histogram...) for h in others - target.edges != h.edges && throw(ArgumentError("can't merge histograms with different binning")) - size(target.weights) != size(h.weights) && throw(ArgumentError("can't merge histograms with different dimensions")) - target.closed != h.closed && throw(ArgumentError("can't merge histograms with different closed left/right settings")) - target.isdensity != h.isdensity && throw(ArgumentError("can't merge histograms with different isdensity settings")) + target.edges != h.edges && + throw(ArgumentError("can't merge histograms with different binning")) + size(target.weights) != size(h.weights) && + throw(ArgumentError("can't merge histograms with different dimensions")) + target.closed != h.closed && + throw(ArgumentError("can't merge histograms with different closed left/right settings")) + target.isdensity != h.isdensity && + throw(ArgumentError("can't merge histograms with different isdensity settings")) end for h in others target.weights .+= h.weights end - target + return target end - """ merge(h::Histogram, others::Histogram...) diff --git a/src/misc.jl b/src/misc.jl index afff59eb4..0c8970cc1 100644 --- a/src/misc.jl +++ b/src/misc.jl @@ -18,12 +18,12 @@ julia> rle([1,1,1,2,2,3,3,3,3,2,2,2]) ([1, 2, 3, 2], [3, 2, 4, 3]) ``` """ -function rle(v::AbstractVector{T}) where T +function rle(v::AbstractVector{T}) where {T} n = length(v) vals = T[] lens = Int[] - n>0 || return (vals,lens) + n>0 || return (vals, lens) cv = v[1] cl = 1 @@ -57,38 +57,37 @@ Reconstruct a vector from its run-length encoding (see [`rle`](@ref)). `vals` is a vector of the values and `lens` is a vector of the corresponding run lengths. """ -function inverse_rle(vals::AbstractVector{T}, lens::AbstractVector{<:Integer}) where T +function inverse_rle(vals::AbstractVector{T}, lens::AbstractVector{<:Integer}) where {T} m = length(vals) mlens = length(lens) - mlens == m || throw(DimensionMismatch( - "number of vals ($m) does not match the number of lens ($mlens)")) + mlens == m || + throw(DimensionMismatch("number of vals ($m) does not match the number of lens ($mlens)")) n = sum(lens) n >= 0 || throw(ArgumentError("lengths must be non-negative")) r = Vector{T}(undef, n) p = 0 - for i = 1 : m + for i in 1:m j = lens[i] j >= 0 || throw(ArgumentError("lengths must be non-negative")) v = vals[i] while j > 0 r[p+=1] = v - j -=1 + j -= 1 end end return r end - """ indexmap(a) Construct a dictionary that maps each unique value in `a` to the index of its first occurrence in `a`. """ -function indexmap(a::AbstractArray{T}) where T +function indexmap(a::AbstractArray{T}) where {T} d = Dict{T,Int}() - for i = 1 : length(a) + for i in 1:length(a) k = a[i] if !haskey(d, k) d[k] = i @@ -97,17 +96,16 @@ function indexmap(a::AbstractArray{T}) where T return d end - """ levelsmap(a) Construct a dictionary that maps each of the `n` unique values in `a` to a number between 1 and `n`. """ -function levelsmap(a::AbstractArray{T}) where T +function levelsmap(a::AbstractArray{T}) where {T} d = Dict{T,Int}() index = 1 - for i = 1 : length(a) + for i in 1:length(a) k = a[i] if !haskey(d, k) d[k] = index @@ -137,10 +135,9 @@ julia> indicatormat([1 2 2], 2) ``` """ function indicatormat(x::AbstractArray{<:Integer}, k::Integer; sparse::Bool=false) - sparse ? _indicatormat_sparse(x, k) : _indicatormat_dense(x, k) + return sparse ? _indicatormat_sparse(x, k) : _indicatormat_dense(x, k) end - """ indicatormat(x, c=sort(unique(x)); sparse=false) @@ -149,29 +146,29 @@ Let `ci` be the index of `x[i]` in `c`. Then `I[ci, i] = true` and all other elements are `false`. """ function indicatormat(x::AbstractArray, c::AbstractArray; sparse::Bool=false) - sparse ? _indicatormat_sparse(x, c) : _indicatormat_dense(x, c) + return sparse ? _indicatormat_sparse(x, c) : _indicatormat_dense(x, c) end -indicatormat(x::AbstractArray; sparse::Bool=false) = - indicatormat(x, sort!(unique(x)); sparse=sparse) - +function indicatormat(x::AbstractArray; sparse::Bool=false) + return indicatormat(x, sort!(unique(x)); sparse=sparse) +end function _indicatormat_dense(x::AbstractArray{<:Integer}, k::Integer) n = length(x) r = zeros(Bool, k, n) - for i = 1 : n + for i in 1:n r[x[i], i] = true end return r end -function _indicatormat_dense(x::AbstractArray{T}, c::AbstractArray{T}) where T +function _indicatormat_dense(x::AbstractArray{T}, c::AbstractArray{T}) where {T} d = indexmap(c) m = length(c) n = length(x) r = zeros(Bool, m, n) o = 0 - for i = 1 : n + for i in 1:n xi = x[i] r[o + d[xi]] = true o += m @@ -179,15 +176,17 @@ function _indicatormat_dense(x::AbstractArray{T}, c::AbstractArray{T}) where T return r end -_indicatormat_sparse(x::AbstractArray{<:Integer}, k::Integer) = (n = length(x); sparse(x, 1:n, true, k, n)) +function _indicatormat_sparse(x::AbstractArray{<:Integer}, k::Integer) + return (n=length(x); sparse(x, 1:n, true, k, n)) +end -function _indicatormat_sparse(x::AbstractArray{T}, c::AbstractArray{T}) where T +function _indicatormat_sparse(x::AbstractArray{T}, c::AbstractArray{T}) where {T} d = indexmap(c) m = length(c) n = length(x) rinds = Vector{Int}(undef, n) - for i = 1 : n + for i in 1:n rinds[i] = d[x[i]] end return sparse(rinds, 1:n, true, m, n) diff --git a/src/moments.jl b/src/moments.jl index 04de97a83..082d56bb4 100644 --- a/src/moments.jl +++ b/src/moments.jl @@ -19,7 +19,7 @@ replacing ``\\frac{1}{\\sum{w}}`` with a factor dependent on the type of weights * `Weights`: `ArgumentError` (bias correction not supported) """ function var(v::AbstractArray{<:Real}, w::AbstractWeights; mean=nothing, - corrected::Union{Bool, Nothing}=nothing) + corrected::Union{Bool,Nothing}=nothing) length(w) == length(v) || throw(DimensionMismatch("Inconsistent array lengths.")) corrected = depcheck(:var, :corrected, corrected) if mean === nothing @@ -28,7 +28,7 @@ function var(v::AbstractArray{<:Real}, w::AbstractWeights; mean=nothing, return _moment2(v, w, mean; corrected) end function var(v::AbstractArray{<:Real}, w::UnitWeights; mean=nothing, - corrected::Union{Bool, Nothing}=nothing) + corrected::Union{Bool,Nothing}=nothing) length(w) == length(v) || throw(DimensionMismatch("Inconsistent array lengths.")) corrected = depcheck(:var, :corrected, corrected) return var(v; mean, corrected) @@ -37,18 +37,18 @@ end ## var along dim function var!(R::AbstractArray, A::AbstractArray{<:Real}, w::AbstractWeights, dims::Int; - mean=nothing, corrected::Union{Bool, Nothing}=nothing) + mean=nothing, corrected::Union{Bool,Nothing}=nothing) corrected = depcheck(:var!, :corrected, corrected) if mean == 0 mean = Base.reducedim_initarray(A, dims, 0, eltype(R)) elseif mean === nothing - mean = Statistics.mean(A, w, dims=dims) + mean = Statistics.mean(A, w; dims=dims) else # check size of mean - for i = 1:ndims(A) - dA = size(A,i) - dM = size(mean,i) + for i in 1:ndims(A) + dA = size(A, i) + dM = size(mean, i) if i == dims dM == 1 || throw(DimensionMismatch("Incorrect size of mean.")) else @@ -61,18 +61,19 @@ function var!(R::AbstractArray, A::AbstractArray{<:Real}, w::AbstractWeights, di end function var(A::AbstractArray{<:Real}, w::AbstractWeights, dim::Int; mean=nothing, - corrected::Union{Bool, Nothing}=nothing) + corrected::Union{Bool,Nothing}=nothing) corrected = depcheck(:var, :corrected, corrected) if mean === nothing z = (zero(eltype(w)) * zero(eltype(A))^2) / zero(eltype(w)) else - z = (zero(eltype(w)) * zero(zero(eltype(A)) - zero(eltype(mean)))^2) / zero(eltype(w)) + z = (zero(eltype(w)) * zero(zero(eltype(A)) - zero(eltype(mean)))^2) / + zero(eltype(w)) end - var!(similar(A, typeof(z), Base.reduced_indices(axes(A), dim)), A, w, dim; - mean=mean, corrected=corrected) + return var!(similar(A, typeof(z), Base.reduced_indices(axes(A), dim)), A, w, dim; + mean=mean, corrected=corrected) end function var(v::AbstractArray{<:Real}, w::UnitWeights, dim::Int; mean=nothing, - corrected::Union{Bool, Nothing}=nothing) + corrected::Union{Bool,Nothing}=nothing) length(w) == length(v) || throw(DimensionMismatch("Inconsistent array lengths.")) corrected = depcheck(:var, :corrected, corrected) return var(v; mean, corrected, dims=dim) @@ -97,12 +98,15 @@ weights used: * `ProbabilityWeights`: ``\\frac{n}{(n - 1) \\sum w}`` where ``n`` equals `count(!iszero, w)` * `Weights`: `ArgumentError` (bias correction not supported) """ -std(v::AbstractArray{<:Real}, w::AbstractWeights; mean=nothing, corrected::Union{Bool, Nothing}=nothing) = - sqrt.(var(v, w; mean=mean, corrected=depcheck(:std, :corrected, corrected))) +function std(v::AbstractArray{<:Real}, w::AbstractWeights; mean=nothing, + corrected::Union{Bool,Nothing}=nothing) + return sqrt.(var(v, w; mean=mean, corrected=depcheck(:std, :corrected, corrected))) +end -std(v::AbstractArray{<:Real}, w::AbstractWeights, dim::Int; - mean=nothing, corrected::Union{Bool, Nothing}=nothing) = - sqrt.(var(v, w, dim; mean=mean, corrected=depcheck(:std, :corrected, corrected))) +function std(v::AbstractArray{<:Real}, w::AbstractWeights, dim::Int; + mean=nothing, corrected::Union{Bool,Nothing}=nothing) + return sqrt.(var(v, w, dim; mean=mean, corrected=depcheck(:std, :corrected, corrected))) +end ##### Fused statistics """ @@ -116,8 +120,8 @@ See [`var`](@ref) documentation for more details. """ function mean_and_var(x; corrected::Bool=true) m = mean(x) - v = var(x, mean=m, corrected=corrected) - m, v + v = var(x; mean=m, corrected=corrected) + return m, v end """ @@ -132,49 +136,47 @@ See [`std`](@ref) documentation for more details. """ function mean_and_std(x; corrected::Bool=true) m = mean(x) - s = std(x, mean=m, corrected=corrected) - m, s + s = std(x; mean=m, corrected=corrected) + return m, s end -function mean_and_var(x::AbstractArray{<:Real}, w::AbstractWeights; corrected::Union{Bool, Nothing}=nothing) +function mean_and_var(x::AbstractArray{<:Real}, w::AbstractWeights; + corrected::Union{Bool,Nothing}=nothing) m = mean(x, w) - v = var(x, w, mean=m, corrected=depcheck(:mean_and_var, :corrected, corrected)) - m, v + v = var(x, w; mean=m, corrected=depcheck(:mean_and_var, :corrected, corrected)) + return m, v end -function mean_and_std(x::AbstractArray{<:Real}, w::AbstractWeights; corrected::Union{Bool, Nothing}=nothing) +function mean_and_std(x::AbstractArray{<:Real}, w::AbstractWeights; + corrected::Union{Bool,Nothing}=nothing) m = mean(x, w) - s = std(x, w, mean=m, corrected=depcheck(:mean_and_std, :corrected, corrected)) - m, s + s = std(x, w; mean=m, corrected=depcheck(:mean_and_std, :corrected, corrected)) + return m, s end - function mean_and_var(x::AbstractArray{<:Real}, dim::Int; corrected::Bool=true) - m = mean(x, dims=dim) - v = var(x, dims=dim, mean=m, corrected=corrected) - m, v + m = mean(x; dims=dim) + v = var(x; dims=dim, mean=m, corrected=corrected) + return m, v end function mean_and_std(x::AbstractArray{<:Real}, dim::Int; corrected::Bool=true) - m = mean(x, dims=dim) - s = std(x, dims=dim, mean=m, corrected=corrected) - m, s + m = mean(x; dims=dim) + s = std(x; dims=dim, mean=m, corrected=corrected) + return m, s end - function mean_and_var(x::AbstractArray{<:Real}, w::AbstractWeights, dims::Int; - corrected::Union{Bool, Nothing}=nothing) - m = mean(x, w, dims=dims) - v = var(x, w, dims, mean=m, corrected=depcheck(:mean_and_var, :corrected, corrected)) - m, v + corrected::Union{Bool,Nothing}=nothing) + m = mean(x, w; dims=dims) + v = var(x, w, dims; mean=m, corrected=depcheck(:mean_and_var, :corrected, corrected)) + return m, v end function mean_and_std(x::AbstractArray{<:Real}, w::AbstractWeights, dims::Int; - corrected::Union{Bool, Nothing}=nothing) - m = mean(x, w, dims=dims) - s = std(x, w, dims, mean=m, corrected=depcheck(:mean_and_std, :corrected, corrected)) - m, s + corrected::Union{Bool,Nothing}=nothing) + m = mean(x, w; dims=dims) + s = std(x, w, dims; mean=m, corrected=depcheck(:mean_and_std, :corrected, corrected)) + return m, s end - - ##### General central moment function _moment2(v::AbstractArray{<:Real}, m::Real; corrected::Bool) n = length(v) @@ -221,7 +223,7 @@ function _moment3(v::AbstractArray{<:Real}, m::Real) end end end - s / n + return s / n end function _moment3(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real) @@ -235,9 +237,9 @@ function _moment3(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real) return zi^3 * wvi end end - s = sum(Broadcast.instantiate(broadcasted)) + s = sum(Broadcast.instantiate(broadcasted)) end - s / sum(wv) + return s / sum(wv) end function _moment4(v::AbstractArray{<:Real}, m::Real) @@ -253,7 +255,7 @@ function _moment4(v::AbstractArray{<:Real}, m::Real) end end end - s / n + return s / n end function _moment4(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real) @@ -269,7 +271,7 @@ function _moment4(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real) end s = sum(Broadcast.instantiate(broadcasted)) end - s / sum(wv) + return s / sum(wv) end function _momentk(v::AbstractArray{<:Real}, k::Int, m::Real) @@ -285,7 +287,7 @@ function _momentk(v::AbstractArray{<:Real}, k::Int, m::Real) end end end - s / n + return s / n end function _momentk(v::AbstractArray{<:Real}, k::Int, wv::AbstractWeights, m::Real) @@ -301,10 +303,9 @@ function _momentk(v::AbstractArray{<:Real}, k::Int, wv::AbstractWeights, m::Real end s = sum(Broadcast.instantiate(broadcasted)) end - s / sum(wv) + return s / sum(wv) end - """ moment(v, k, [wv::AbstractWeights], m=mean(v)) @@ -312,18 +313,18 @@ Return the `k`th order central moment of a real-valued array `v`, optionally specifying a weighting vector `wv` and a center `m`. """ function moment(v::AbstractArray{<:Real}, k::Int, m::Real=mean(v)) - k == 2 ? _moment2(v, m; corrected = false) : - k == 3 ? _moment3(v, m) : - k == 4 ? _moment4(v, m) : - _momentk(v, k, m) + return k == 2 ? _moment2(v, m; corrected=false) : + k == 3 ? _moment3(v, m) : + k == 4 ? _moment4(v, m) : + _momentk(v, k, m) end function moment(v::AbstractArray{<:Real}, k::Int, wv::AbstractWeights, m::Real=mean(v, wv)) length(wv) == length(v) || throw(DimensionMismatch("Inconsistent array lengths.")) - k == 2 ? _moment2(v, wv, m; corrected = false) : - k == 3 ? _moment3(v, wv, m) : - k == 4 ? _moment4(v, wv, m) : - _momentk(v, k, wv, m) + return k == 2 ? _moment2(v, wv, m; corrected=false) : + k == 3 ? _moment3(v, wv, m) : + k == 4 ? _moment4(v, wv, m) : + _momentk(v, k, wv, m) end function moment(v::AbstractArray{<:Real}, k::Int, wv::UnitWeights, m::Real) length(wv) == length(v) || throw(DimensionMismatch("Inconsistent array lengths.")) @@ -362,7 +363,7 @@ end function skewness(v::AbstractArray{<:Real}, wv::AbstractWeights, m::Real=mean(v, wv)) n = length(v) length(wv) == n || throw(DimensionMismatch("Inconsistent array lengths.")) - if iszero(n) + if iszero(n) z = zero(zero(eltype(v)) - m) cm2 = z^2 * zero(eltype(wv)) # empirical 2nd centered moment (variance) cm3 = cm2 * z # empirical 3rd centered moment @@ -454,7 +455,8 @@ Reference: Smith, P. J. 1995. A Recursive Formulation of the Old Problem of Obta Moments from Cumulants and Vice Versa. The American Statistician, 49(2), 217–218. https://doi.org/10.2307/2684642 """ -function cumulant(v::AbstractArray{<:Real}, krange::Union{Integer, AbstractRange{<:Integer}}, wv::AbstractWeights, +function cumulant(v::AbstractArray{<:Real}, krange::Union{Integer,AbstractRange{<:Integer}}, + wv::AbstractWeights, m::Real=mean(v, wv)) n = length(v) length(wv) == n || throw(DimensionMismatch("Inconsistent array lengths.")) @@ -465,9 +467,9 @@ function cumulant(v::AbstractArray{<:Real}, krange::Union{Integer, AbstractRange cmoms = [moment(v, i, wv, m) for i in 2:kmax] cumls = Vector{eltype(cmoms)}(undef, kmax) cumls[1] = m - for i = 2:kmax + for i in 2:kmax kn = cmoms[i-1] - for j = 2:(i-2) + for j in 2:(i - 2) kn -= binomial(i-1, j)*cmoms[j-1]*cumls[i-j] end cumls[i] = kn @@ -475,5 +477,7 @@ function cumulant(v::AbstractArray{<:Real}, krange::Union{Integer, AbstractRange return cumls[krange] end -cumulant(v::AbstractArray{<:Real}, krange::Union{Integer, AbstractRange{<:Integer}}, m::Real=mean(v)) = - cumulant(v, krange, uweights(length(v)), m) +function cumulant(v::AbstractArray{<:Real}, krange::Union{Integer,AbstractRange{<:Integer}}, + m::Real=mean(v)) + return cumulant(v, krange, uweights(length(v)), m) +end diff --git a/src/pairwise.jl b/src/pairwise.jl index c8023c6a2..622d93335 100644 --- a/src/pairwise.jl +++ b/src/pairwise.jl @@ -11,7 +11,7 @@ function _pairwise!(::Val{:none}, f, dest::AbstractMatrix, x, y, symmetric::Bool end if symmetric m, n = size(dest) - for j in 1:n, i in (j+1):m + for j in 1:n, i in (j + 1):m dest[i, j] = dest[j, i] end end @@ -70,7 +70,7 @@ function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix, x, y, symmetric:: end if symmetric m, n = size(dest) - for j in 1:n, i in (j+1):m + for j in 1:n, i in (j + 1):m dest[i, j] = dest[j, i] end end @@ -106,8 +106,8 @@ function _pairwise!(f, dest::AbstractMatrix, x, y; throw(ArgumentError("skipmissing must be one of :none, :pairwise or :listwise")) end - x′ = x isa Union{AbstractArray, Tuple, NamedTuple} ? x : collect(x) - y′ = y isa Union{AbstractArray, Tuple, NamedTuple} ? y : collect(y) + x′ = x isa Union{AbstractArray,Tuple,NamedTuple} ? x : collect(x) + y′ = y isa Union{AbstractArray,Tuple,NamedTuple} ? y : collect(y) m = length(x′) n = length(y′) @@ -123,7 +123,7 @@ using Base: typejoin_union_tuple # Identical to `Base.promote_typejoin` except that it uses `promote_type` # instead of `typejoin` to combine members of `Union` types -function promote_type_union(::Type{T}) where T +function promote_type_union(::Type{T}) where {T} if T === Union{} return Union{} elseif T isa UnionAll @@ -138,14 +138,14 @@ function promote_type_union(::Type{T}) where T end function _pairwise(::Val{skipmissing}, f, x, y, symmetric::Bool) where {skipmissing} - x′ = x isa Union{AbstractArray, Tuple, NamedTuple} ? x : collect(x) - y′ = y isa Union{AbstractArray, Tuple, NamedTuple} ? y : collect(y) + x′ = x isa Union{AbstractArray,Tuple,NamedTuple} ? x : collect(x) + y′ = y isa Union{AbstractArray,Tuple,NamedTuple} ? y : collect(y) m = length(x′) n = length(y′) - T = Core.Compiler.return_type(f, Tuple{eltype(x′), eltype(y′)}) + T = Core.Compiler.return_type(f, Tuple{eltype(x′),eltype(y′)}) Tsm = Core.Compiler.return_type((x, y) -> f(disallowmissing(x), disallowmissing(y)), - Tuple{eltype(x′), eltype(y′)}) + Tuple{eltype(x′),eltype(y′)}) if skipmissing === :none dest = Matrix{T}(undef, m, n) @@ -158,7 +158,7 @@ function _pairwise(::Val{skipmissing}, f, x, y, symmetric::Bool) where {skipmiss # Preserve inferred element type isempty(dest) && return dest - _pairwise!(f, dest, x′, y′, symmetric=symmetric, skipmissing=skipmissing) + _pairwise!(f, dest, x′, y′; symmetric=symmetric, skipmissing=skipmissing) if isconcretetype(eltype(dest)) return dest @@ -167,7 +167,7 @@ function _pairwise(::Val{skipmissing}, f, x, y, symmetric::Bool) where {skipmiss # but using `promote_type` rather than `promote_typejoin`) U = mapreduce(typeof, promote_type, dest) # V is inferred (contrary to U), but it only gives an upper bound for U - V = promote_type_union(Union{T, Tsm}) + V = promote_type_union(Union{T,Tsm}) return convert(Matrix{U}, dest)::Matrix{<:V} end end @@ -239,7 +239,7 @@ function pairwise!(f, dest::AbstractMatrix, x, y=x; "a single set of variables (x === y)")) end - return _pairwise!(f, dest, x, y, symmetric=symmetric, skipmissing=skipmissing) + return _pairwise!(f, dest, x, y; symmetric=symmetric, skipmissing=skipmissing) end """ @@ -307,23 +307,29 @@ end # cov(x) is faster than cov(x, x) _cov(x, y) = x === y ? cov(x) : cov(x, y) -pairwise!(::typeof(cov), dest::AbstractMatrix, x, y; - symmetric::Bool=false, skipmissing::Symbol=:none) = - pairwise!(_cov, dest, x, y, symmetric=symmetric, skipmissing=skipmissing) +function pairwise!(::typeof(cov), dest::AbstractMatrix, x, y; + symmetric::Bool=false, skipmissing::Symbol=:none) + return pairwise!(_cov, dest, x, y; symmetric=symmetric, skipmissing=skipmissing) +end -pairwise(::typeof(cov), x, y; symmetric::Bool=false, skipmissing::Symbol=:none) = - pairwise(_cov, x, y, symmetric=symmetric, skipmissing=skipmissing) +function pairwise(::typeof(cov), x, y; symmetric::Bool=false, skipmissing::Symbol=:none) + return pairwise(_cov, x, y; symmetric=symmetric, skipmissing=skipmissing) +end -pairwise!(::typeof(cov), dest::AbstractMatrix, x; - symmetric::Bool=true, skipmissing::Symbol=:none) = - pairwise!(_cov, dest, x, x, symmetric=symmetric, skipmissing=skipmissing) +function pairwise!(::typeof(cov), dest::AbstractMatrix, x; + symmetric::Bool=true, skipmissing::Symbol=:none) + return pairwise!(_cov, dest, x, x; symmetric=symmetric, skipmissing=skipmissing) +end -pairwise(::typeof(cov), x; symmetric::Bool=true, skipmissing::Symbol=:none) = - pairwise(_cov, x, x, symmetric=symmetric, skipmissing=skipmissing) +function pairwise(::typeof(cov), x; symmetric::Bool=true, skipmissing::Symbol=:none) + return pairwise(_cov, x, x; symmetric=symmetric, skipmissing=skipmissing) +end -pairwise!(::typeof(cor), dest::AbstractMatrix, x; - symmetric::Bool=true, skipmissing::Symbol=:none) = - pairwise!(cor, dest, x, x, symmetric=symmetric, skipmissing=skipmissing) +function pairwise!(::typeof(cor), dest::AbstractMatrix, x; + symmetric::Bool=true, skipmissing::Symbol=:none) + return pairwise!(cor, dest, x, x; symmetric=symmetric, skipmissing=skipmissing) +end -pairwise(::typeof(cor), x; symmetric::Bool=true, skipmissing::Symbol=:none) = - pairwise(cor, x, x, symmetric=symmetric, skipmissing=skipmissing) +function pairwise(::typeof(cor), x; symmetric::Bool=true, skipmissing::Symbol=:none) + return pairwise(cor, x, x; symmetric=symmetric, skipmissing=skipmissing) +end diff --git a/src/partialcor.jl b/src/partialcor.jl index 33a3ee593..17c2a652e 100644 --- a/src/partialcor.jl +++ b/src/partialcor.jl @@ -16,12 +16,12 @@ end function _partialcor(x::AbstractVector, μx, y::AbstractVector, μy, Z::AbstractMatrix) p = size(Z, 2) p == 1 && return _partialcor(x, μx, y, μy, vec(Z)) - z₀ = view(Z, :, 1) + z₀ = view(Z, :, 1) Zmz₀ = view(Z, :, 2:p) μz₀ = mean(z₀) - rxz = _partialcor(x, μx, z₀, μz₀, Zmz₀) - rzy = _partialcor(z₀, μz₀, y, μy, Zmz₀) - rxy = _partialcor(x, μx, y, μy, Zmz₀)::typeof(rxz) + rxz = _partialcor(x, μx, z₀, μz₀, Zmz₀) + rzy = _partialcor(z₀, μz₀, y, μy, Zmz₀) + rxy = _partialcor(x, μx, y, μy, Zmz₀)::typeof(rxz) return (rxy - rxz * rzy) / (sqrt(1 - rxz^2) * sqrt(1 - rzy^2)) end diff --git a/src/rankcorr.jl b/src/rankcorr.jl index 1713300ff..a32988714 100644 --- a/src/rankcorr.jl +++ b/src/rankcorr.jl @@ -31,13 +31,13 @@ function corspearman(X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}) C = Matrix{Float64}(I, n, 1) any(isnan, y) && return fill!(C, NaN) yrank = tiedrank(y) - for j = 1:n + for j in 1:n Xj = view(X, :, j) if any(isnan, Xj) - C[j,1] = NaN + C[j, 1] = NaN else Xjrank = tiedrank(Xj) - C[j,1] = cor(Xjrank, yrank) + C[j, 1] = cor(Xjrank, yrank) end end return C @@ -50,13 +50,13 @@ function corspearman(x::AbstractVector{<:Real}, Y::AbstractMatrix{<:Real}) C = Matrix{Float64}(I, 1, n) any(isnan, x) && return fill!(C, NaN) xrank = tiedrank(x) - for j = 1:n + for j in 1:n Yj = view(Y, :, j) if any(isnan, Yj) - C[1,j] = NaN + C[1, j] = NaN else Yjrank = tiedrank(Yj) - C[1,j] = cor(xrank, Yjrank) + C[1, j] = cor(xrank, Yjrank) end end return C @@ -66,23 +66,23 @@ function corspearman(X::AbstractMatrix{<:Real}) n = size(X, 2) C = Matrix{Float64}(I, n, n) anynan = Vector{Bool}(undef, n) - for j = 1:n + for j in 1:n Xj = view(X, :, j) anynan[j] = any(isnan, Xj) if anynan[j] - C[:,j] .= NaN - C[j,:] .= NaN - C[j,j] = 1 + C[:, j] .= NaN + C[j, :] .= NaN + C[j, j] = 1 continue end Xjrank = tiedrank(Xj) - for i = 1:(j-1) + for i in 1:(j - 1) Xi = view(X, :, i) if anynan[i] - C[i,j] = C[j,i] = NaN + C[i, j] = C[j, i] = NaN else Xirank = tiedrank(Xi) - C[i,j] = C[j,i] = cor(Xjrank, Xirank) + C[i, j] = C[j, i] = cor(Xjrank, Xirank) end end end @@ -95,27 +95,26 @@ function corspearman(X::AbstractMatrix{<:Real}, Y::AbstractMatrix{<:Real}) nr = size(X, 2) nc = size(Y, 2) C = Matrix{Float64}(undef, nr, nc) - for j = 1:nr + for j in 1:nr Xj = view(X, :, j) if any(isnan, Xj) - C[j,:] .= NaN + C[j, :] .= NaN continue end Xjrank = tiedrank(Xj) - for i = 1:nc + for i in 1:nc Yi = view(Y, :, i) if any(isnan, Yi) - C[j,i] = NaN + C[j, i] = NaN else Yirank = tiedrank(Yi) - C[j,i] = cor(Xjrank, Yirank) + C[j, i] = cor(Xjrank, Yirank) end end end return C end - ####################################### # # Kendall correlation @@ -125,10 +124,15 @@ end # Knight, William R. “A Computer Method for Calculating Kendall's Tau with Ungrouped Data.” # Journal of the American Statistical Association, vol. 61, no. 314, 1966, pp. 436–439. # JSTOR, www.jstor.org/stable/2282833. -function corkendall!(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, permx::AbstractArray{<:Integer}=sortperm(x)) - if any(isnan, x) || any(isnan, y) return NaN end +function corkendall!(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, + permx::AbstractArray{<:Integer}=sortperm(x)) + if any(isnan, x) || any(isnan, y) + return NaN + end n = length(x) - if n != length(y) error("Vectors must have same length") end + if n != length(y) + error("Vectors must have same length") + end # Initial sorting permute!(x, permx) @@ -139,7 +143,7 @@ function corkendall!(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, permx ntiesx = ndoubleties = nswaps = widen(0) k = 0 - for i = 2:n + for i in 2:n if x[i - 1] == x[i] k += 1 elseif k > 0 @@ -148,7 +152,7 @@ function corkendall!(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, permx # double ties can be counted by calling countties. sort!(view(y, (i - k - 1):(i - 1))) ntiesx += div(widen(k) * (k + 1), 2) # Must use wide integers here - ndoubleties += countties(y, i - k - 1, i - 1) + ndoubleties += countties(y, i - k - 1, i - 1) k = 0 end end @@ -163,8 +167,8 @@ function corkendall!(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, permx # Calls to float below prevent possible overflow errors when # length(x) exceeds 77_936 (32 bit) or 5_107_605_667 (64 bit) - (npairs + ndoubleties - ntiesx - ntiesy - 2 * nswaps) / - sqrt(float(npairs - ntiesx) * float(npairs - ntiesy)) + return (npairs + ndoubleties - ntiesx - ntiesy - 2 * nswaps) / + sqrt(float(npairs - ntiesx) * float(npairs - ntiesy)) end """ @@ -173,27 +177,29 @@ end Compute Kendall's rank correlation coefficient, τ. `x` and `y` must both be either matrices or vectors. """ -corkendall(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}) = corkendall!(copy(x), copy(y)) +function corkendall(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}) + return corkendall!(copy(x), copy(y)) +end function corkendall(X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}) permy = sortperm(y) - return([corkendall!(copy(y), X[:,i], permy) for i in 1:size(X, 2)]) + return ([corkendall!(copy(y), X[:, i], permy) for i in 1:size(X, 2)]) end function corkendall(x::AbstractVector{<:Real}, Y::AbstractMatrix{<:Real}) n = size(Y, 2) permx = sortperm(x) - return(reshape([corkendall!(copy(x), Y[:,i], permx) for i in 1:n], 1, n)) + return (reshape([corkendall!(copy(x), Y[:, i], permx) for i in 1:n], 1, n)) end function corkendall(X::AbstractMatrix{<:Real}) n = size(X, 2) C = Matrix{Float64}(I, n, n) - for j = 2:n - permx = sortperm(X[:,j]) - for i = 1:j - 1 - C[j,i] = corkendall!(X[:,j], X[:,i], permx) - C[i,j] = C[j,i] + for j in 2:n + permx = sortperm(X[:, j]) + for i in 1:(j - 1) + C[j, i] = corkendall!(X[:, j], X[:, i], permx) + C[i, j] = C[j, i] end end return C @@ -203,10 +209,10 @@ function corkendall(X::AbstractMatrix{<:Real}, Y::AbstractMatrix{<:Real}) nr = size(X, 2) nc = size(Y, 2) C = Matrix{Float64}(undef, nr, nc) - for j = 1:nr - permx = sortperm(X[:,j]) - for i = 1:nc - C[j,i] = corkendall!(X[:,j], Y[:,i], permx) + for j in 1:nr + permx = sortperm(X[:, j]) + for i in 1:nc + C[j, i] = corkendall!(X[:, j], Y[:, i], permx) end end return C @@ -224,7 +230,7 @@ function countties(x::AbstractVector, lo::Integer, hi::Integer) # length(x) exceeds 2^16 (32 bit) or 2^32 (64 bit) thistiecount = result = widen(0) checkbounds(x, lo:hi) - for i = (lo + 1):hi + for i in (lo + 1):hi if x[i] == x[i - 1] thistiecount += 1 elseif thistiecount > 0 @@ -236,7 +242,7 @@ function countties(x::AbstractVector, lo::Integer, hi::Integer) if thistiecount > 0 result += div(thistiecount * (thistiecount + 1), 2) end - result + return result end # Tests appear to show that a value of 64 is optimal, @@ -251,7 +257,8 @@ const SMALL_THRESHOLD = 64 Mutates `v` by sorting elements `x[lo:hi]` using the merge sort algorithm. This method is a copy-paste-edit of sort! in base/sort.jl, amended to return the bubblesort distance. """ -function merge_sort!(v::AbstractVector, lo::Integer, hi::Integer, t::AbstractVector=similar(v, 0)) +function merge_sort!(v::AbstractVector, lo::Integer, hi::Integer, + t::AbstractVector=similar(v, 0)) # Use of widen below prevents possible overflow errors when # length(v) exceeds 2^16 (32 bit) or 2^32 (64 bit) nswaps = widen(0) @@ -261,7 +268,7 @@ function merge_sort!(v::AbstractVector, lo::Integer, hi::Integer, t::AbstractVec m = midpoint(lo, hi) (length(t) < m - lo + 1) && resize!(t, m - lo + 1) - nswaps = merge_sort!(v, lo, m, t) + nswaps = merge_sort!(v, lo, m, t) nswaps += merge_sort!(v, m + 1, hi, t) i, j = 1, lo @@ -294,7 +301,7 @@ end # insertion_sort! and midpoint copied from Julia Base # (commit 28330a2fef4d9d149ba0fd3ffa06347b50067647, dated 20 Sep 2020) -midpoint(lo::T, hi::T) where T <: Integer = lo + ((hi - lo) >>> 0x01) +midpoint(lo::T, hi::T) where {T<:Integer} = lo + ((hi - lo) >>> 0x01) midpoint(lo::Integer, hi::Integer) = midpoint(promote(lo, hi)...) """ @@ -304,9 +311,11 @@ Mutates `v` by sorting elements `x[lo:hi]` using the insertion sort algorithm. This method is a copy-paste-edit of sort! in base/sort.jl, amended to return the bubblesort distance. """ function insertion_sort!(v::AbstractVector, lo::Integer, hi::Integer) - if lo == hi return widen(0) end + if lo == hi + return widen(0) + end nswaps = widen(0) - for i = lo + 1:hi + for i in (lo + 1):hi j = i x = v[i] while j > lo diff --git a/src/ranking.jl b/src/ranking.jl index 317f24186..818c9f49e 100644 --- a/src/ranking.jl +++ b/src/ranking.jl @@ -10,8 +10,8 @@ function _check_randparams(rks, x, p) n = length(rks) nx = length(x) np = length(p) - nx == np == n || throw( - DimensionMismatch("lengths of x $nx and p $np do not match that of ranks $n")) + nx == np == n || + throw(DimensionMismatch("lengths of x $nx and p $np do not match that of ranks $n")) return n end @@ -42,7 +42,6 @@ function _ordinalrank!(rks::AbstractArray, x::AbstractArray, p::AbstractArray{<: return rks end - """ ordinalrank(x; lt=isless, by=identity, rev::Bool=false, ...) @@ -52,9 +51,7 @@ All items in `x` are given distinct, successive ranks based on their position in the sorted vector. Missing values are assigned rank `missing`. """ -ordinalrank(x::AbstractArray; sortkwargs...) = - _rank(_ordinalrank!, x; sortkwargs...) - +ordinalrank(x::AbstractArray; sortkwargs...) = _rank(_ordinalrank!, x; sortkwargs...) # Competition ranking ("1224" ranking) -- resolve tied ranks using min function _competerank!(rks::AbstractArray, x::AbstractArray, p::AbstractArray{<:Integer}) @@ -79,7 +76,6 @@ function _competerank!(rks::AbstractArray, x::AbstractArray, p::AbstractArray{<: return rks end - """ competerank(x; lt=isless, by=identity, rev::Bool=false, ...) @@ -89,9 +85,7 @@ Equal (*"tied"*) items are given the same rank, and the next rank comes after a that is equal to the number of tied items - 1. Missing values are assigned rank `missing`. """ -competerank(x::AbstractArray; sortkwargs...) = - _rank(_competerank!, x; sortkwargs...) - +competerank(x::AbstractArray; sortkwargs...) = _rank(_competerank!, x; sortkwargs...) # Dense ranking ("1223" ranking) -- resolve tied ranks using min function _denserank!(rks::AbstractArray, x::AbstractArray, p::AbstractArray{<:Integer}) @@ -116,7 +110,6 @@ function _denserank!(rks::AbstractArray, x::AbstractArray, p::AbstractArray{<:In return rks end - """ denserank(x; lt=isless, by=identity, rev::Bool=false, ...) @@ -126,9 +119,7 @@ Equal items receive the same rank, and the next subsequent rank is assigned with no gap. Missing values are assigned rank `missing`. """ -denserank(x::AbstractArray; sortkwargs...) = - _rank(_denserank!, x; sortkwargs...) - +denserank(x::AbstractArray; sortkwargs...) = _rank(_denserank!, x; sortkwargs...) # Tied ranking ("1 2.5 2.5 4" ranking) -- resolve tied ranks using average function _tiedrank!(rks::AbstractArray, x::AbstractArray, p::AbstractArray{<:Integer}) @@ -143,7 +134,7 @@ function _tiedrank!(rks::AbstractArray, x::AbstractArray, p::AbstractArray{<:Int if cx != v # fill average rank to s : e-1 ar = (s + e - 1) / 2 - for i = s : e-1 + for i in s:(e - 1) rks[p[i]] = ar end # switch to next range @@ -154,7 +145,7 @@ function _tiedrank!(rks::AbstractArray, x::AbstractArray, p::AbstractArray{<:Int # the last range ar = (s + n) / 2 - for i = s : n + for i in s:n rks[p[i]] = ar end end @@ -172,5 +163,4 @@ Equal (*"tied"*) items receive the mean of the ranks they would have been assigned under the ordinal ranking (see [`ordinalrank`](@ref)). Missing values are assigned rank `missing`. """ -tiedrank(x::AbstractArray; sortkwargs...) = - _rank(_tiedrank!, x, Float64; sortkwargs...) +tiedrank(x::AbstractArray; sortkwargs...) = _rank(_tiedrank!, x, Float64; sortkwargs...) diff --git a/src/reliability.jl b/src/reliability.jl index c96ec495a..dbf4297c9 100644 --- a/src/reliability.jl +++ b/src/reliability.jl @@ -1,4 +1,4 @@ -struct CronbachAlpha{T <: Real} +struct CronbachAlpha{T<:Real} alpha::T dropped::Vector{T} end @@ -57,8 +57,8 @@ function cronbachalpha(covmatrix::AbstractMatrix{<:Real}) "If so, call `cronbachalpha(cov(...))` instead.")) end k = size(covmatrix, 2) - k > 1 || throw(ArgumentError("Covariance matrix must have more than one column.")) - v = vec(sum(covmatrix, dims=1)) + k > 1 || throw(ArgumentError("Covariance matrix must have more than one column.")) + v = vec(sum(covmatrix; dims=1)) σ = sum(v) for i in axes(v, 1) v[i] -= covmatrix[i, i] @@ -67,7 +67,9 @@ function cronbachalpha(covmatrix::AbstractMatrix{<:Real}) alpha = k * (1 - σ_diag / σ) / (k - 1) if k > 2 - dropped = typeof(alpha)[(k - 1) * (1 - (σ_diag - covmatrix[i, i]) / (σ - 2*v[i] - covmatrix[i, i])) / (k - 2) + dropped = typeof(alpha)[(k - 1) * (1 - + (σ_diag - covmatrix[i, i]) / + (σ - 2*v[i] - covmatrix[i, i])) / (k - 2) for i in 1:k] else # if k = 2 do not produce dropped; this has to be also diff --git a/src/robust.jl b/src/robust.jl index f21595395..d7323789b 100644 --- a/src/robust.jl +++ b/src/robust.jl @@ -17,7 +17,8 @@ function uplo(x::AbstractVector; prop::Real=0.0, count::Integer=0) count = floor(Int, n * prop) else prop == 0 || throw(ArgumentError("prop and count can not both be > 0.")) - 0 <= count < n/2 || throw(ArgumentError("count must satisfy 0 ≤ count < length(x)/2.")) + 0 <= count < n/2 || + throw(ArgumentError("count must satisfy 0 ≤ count < length(x)/2.")) end # indices for lowest count values @@ -25,7 +26,7 @@ function uplo(x::AbstractVector; prop::Real=0.0, count::Integer=0) lo = partialsort!(x2, count+1) up = partialsort!(x2, n-count) - up, lo + return up, lo end """ @@ -52,7 +53,7 @@ julia> collect(trim([5,2,4,3,1], prop=0.2)) function trim(x::AbstractVector; prop::Real=0.0, count::Integer=0) up, lo = uplo(x; prop=prop, count=count) - (xi for xi in x if lo <= xi <= up) + return (xi for xi in x if lo <= xi <= up) end """ @@ -62,7 +63,7 @@ A variant of [`trim`](@ref) that modifies `x` in place. """ function trim!(x::AbstractVector; prop::Real=0.0, count::Integer=0) up, lo = uplo(x; prop=prop, count=count) - ix = (i for (i,xi) in enumerate(x) if lo > xi || xi > up) + ix = (i for (i, xi) in enumerate(x) if lo > xi || xi > up) deleteat!(x, ix) return x end @@ -93,7 +94,7 @@ julia> collect(winsor([5,2,3,4,1], prop=0.2)) function winsor(x::AbstractVector; prop::Real=0.0, count::Integer=0) up, lo = uplo(x; prop=prop, count=count) - (clamp(xi, lo, up) for xi in x) + return (clamp(xi, lo, up) for xi in x) end """ @@ -106,7 +107,6 @@ function winsor!(x::AbstractVector; prop::Real=0.0, count::Integer=0) return x end - ############################# # # Other @@ -128,9 +128,10 @@ function trimvar(x::AbstractVector; prop::Real=0.0, count::Integer=0) 0 <= prop < 0.5 || throw(ArgumentError("prop must satisfy 0 ≤ prop < 0.5.")) count = floor(Int, n * prop) else - 0 <= count < n/2 || throw(ArgumentError("count must satisfy 0 ≤ count < length(x)/2.")) + 0 <= count < n/2 || + throw(ArgumentError("count must satisfy 0 ≤ count < length(x)/2.")) prop = count/n end - return var(winsor(x, count=count)) / (n * (1 - 2prop)^2) + return var(winsor(x; count=count)) / (n * (1 - 2prop)^2) end diff --git a/src/sampling.jl b/src/sampling.jl index 358092e9a..79abe1c81 100644 --- a/src/sampling.jl +++ b/src/sampling.jl @@ -17,11 +17,11 @@ function direct_sample!(rng::AbstractRNG, a::UnitRange, x::AbstractArray) s = Sampler(rng, 1:length(a)) b = a[1] - 1 if b == 0 - for i = 1:length(x) + for i in 1:length(x) x[i] = rand(rng, s) end else - for i = 1:length(x) + for i in 1:length(x) x[i] = b + rand(rng, s) end end @@ -43,7 +43,7 @@ function direct_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray) Base.mightalias(a, x) && throw(ArgumentError("output array x must not share memory with input array a")) s = Sampler(rng, 1:length(a)) - for i = 1:length(x) + for i in 1:length(x) x[i] = a[rand(rng, s)] end return x @@ -54,11 +54,13 @@ direct_sample!(a::AbstractArray, x::AbstractArray) = direct_sample!(default_rng( # use some heuristics to decide whether it is beneficial for k samples # (true for a subset of hardware-supported numeric types) _storeindices(n, k, ::Type{T}) where {T<:Integer} = n ≤ typemax(T) -_storeindices(n, k, ::Type{T}) where {T<:Union{Float32,Float64}} = k < 22 && n ≤ maxintfloat(T) +function _storeindices(n, k, ::Type{T}) where {T<:Union{Float32,Float64}} + return k < 22 && n ≤ maxintfloat(T) +end _storeindices(n, k, ::Type{Complex{T}}) where {T} = _storeindices(n, k, T) _storeindices(n, k, ::Type{Rational{T}}) where {T} = k < 16 && _storeindices(n, k, T) _storeindices(n, k, T) = false -storeindices(n, k, ::Type{T}) where {T<:Base.HWNumber} = _storeindices(n, k, T) +storeindices(n, k, ::Type{T}) where {T<:Base.HWNumber} = _storeindices(n, k, T) storeindices(n, k, T) = false # order results of a sampler that does not order automatically @@ -72,14 +74,14 @@ function sample_ordered!(sampler!, rng::AbstractRNG, a::AbstractArray, x::Abstra # in some cases it might be faster to check # issorted(a) to see if we can just sort x if storeindices(n, k, eltype(x)) - sort!(sampler!(rng, Base.OneTo(n), x), by=real, lt=<) - for i = 1:k + sort!(sampler!(rng, Base.OneTo(n), x); by=real, lt=<) + for i in 1:k x[i] = a[Int(x[i])] end else indices = Array{Int}(undef, k) sort!(sampler!(rng, Base.OneTo(n), indices)) - for i = 1:k + for i in 1:k x[i] = a[indices[i]] end end @@ -87,15 +89,17 @@ function sample_ordered!(sampler!, rng::AbstractRNG, a::AbstractArray, x::Abstra end # special case of a range can be done more efficiently -sample_ordered!(sampler!, rng::AbstractRNG, a::AbstractRange, x::AbstractArray) = - sort!(sampler!(rng, a, x), rev=step(a)<0) +function sample_ordered!(sampler!, rng::AbstractRNG, a::AbstractRange, x::AbstractArray) + return sort!(sampler!(rng, a, x); rev=step(a)<0) +end # weighted case: -sample_ordered!(sampler!, rng::AbstractRNG, a::AbstractArray, - wv::AbstractWeights, x::AbstractArray) = +function sample_ordered!(sampler!, rng::AbstractRNG, a::AbstractArray, + wv::AbstractWeights, x::AbstractArray) sample_ordered!(rng, a, x) do rng, a, x - sampler!(rng, a, wv, x) + return sampler!(rng, a, wv, x) end +end ### draw a pair of distinct integers in [1:n] @@ -151,11 +155,11 @@ function knuths_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray; k <= n || error("length(x) should not exceed length(a)") # initialize - for i = 1:k + for i in 1:k x[i] = a[i] end if initshuffle - for j = 1:k + for j in 1:k l = rand(rng, j:k) if l != j t = x[j] @@ -167,15 +171,16 @@ function knuths_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray; # scan remaining s = Sampler(rng, 1:k) - for i = k+1:n + for i in (k + 1):n if rand(rng) * i < k # keep it with probability k / i x[rand(rng, s)] = a[i] end end return x end -knuths_sample!(a::AbstractArray, x::AbstractArray; initshuffle::Bool=true) = - knuths_sample!(default_rng(), a, x; initshuffle=initshuffle) +function knuths_sample!(a::AbstractArray, x::AbstractArray; initshuffle::Bool=true) + return knuths_sample!(default_rng(), a, x; initshuffle=initshuffle) +end """ fisher_yates_sample!([rng], a::AbstractArray, x::AbstractArray) @@ -211,11 +216,11 @@ function fisher_yates_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArr k <= n || error("length(x) should not exceed length(a)") inds = Vector{Int}(undef, n) - for i = 1:n + for i in 1:n inds[i] = i end - for i = 1:k + for i in 1:k j = rand(rng, i:n) t = inds[j] inds[j] = inds[i] @@ -224,8 +229,9 @@ function fisher_yates_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArr end return x end -fisher_yates_sample!(a::AbstractArray, x::AbstractArray) = - fisher_yates_sample!(default_rng(), a, x) +function fisher_yates_sample!(a::AbstractArray, x::AbstractArray) + return fisher_yates_sample!(default_rng(), a, x) +end """ self_avoid_sample!([rng], a::AbstractArray, x::AbstractArray) @@ -260,7 +266,7 @@ function self_avoid_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray push!(s, idx) # remaining - for i = 2:k + for i in 2:k idx = rand(rng, rgen) while idx in s idx = rand(rng, rgen) @@ -270,8 +276,9 @@ function self_avoid_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray end return x end -self_avoid_sample!(a::AbstractArray, x::AbstractArray) = - self_avoid_sample!(default_rng(), a, x) +function self_avoid_sample!(a::AbstractArray, x::AbstractArray) + return self_avoid_sample!(default_rng(), a, x) +end """ seqsample_a!([rng], a::AbstractArray, x::AbstractArray) @@ -391,7 +398,7 @@ function seqsample_d!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray) while n > 1 && threshold < N while true - local X + local X while true X = N * (1 - vprime) s = trunc(Int, X) @@ -444,7 +451,7 @@ function seqsample_d!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray) end if n > 1 - seqsample_a!(rng, a[i+1:end], @view x[j+1:end]) + seqsample_a!(rng, a[(i + 1):end], @view x[(j + 1):end]) else s = trunc(Int, N * vprime) x[j+=1] = a[i+=s+1] @@ -453,7 +460,6 @@ end seqsample_d!(a::AbstractArray, x::AbstractArray) = seqsample_d!(default_rng(), a, x) - ### Interface functions (poly-algorithms) """ sample([rng], a::AbstractArray, [wv::AbstractWeights]) @@ -467,7 +473,6 @@ Optionally specify a random number generator `rng` as the first argument sample(rng::AbstractRNG, a::AbstractArray) = a[rand(rng, 1:length(a))] sample(a::AbstractArray) = sample(default_rng(), a) - """ sample!([rng], a, [wv::AbstractWeights], x; replace=true, ordered=false) @@ -523,9 +528,10 @@ function sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray; end return x end -sample!(a::AbstractArray, x::AbstractArray; replace::Bool=true, ordered::Bool=false) = - sample!(default_rng(), a, x; replace=replace, ordered=ordered) - +function sample!(a::AbstractArray, x::AbstractArray; replace::Bool=true, + ordered::Bool=false) + return sample!(default_rng(), a, x; replace=replace, ordered=ordered) +end """ sample([rng], a, [wv::AbstractWeights], n::Integer; replace=true, ordered=false) @@ -541,12 +547,12 @@ Optionally specify a random number generator `rng` as the first argument (defaults to `Random.default_rng()`). """ function sample(rng::AbstractRNG, a::AbstractArray{T}, n::Integer; - replace::Bool=true, ordered::Bool=false) where T - sample!(rng, a, Vector{T}(undef, n); replace=replace, ordered=ordered) + replace::Bool=true, ordered::Bool=false) where {T} + return sample!(rng, a, Vector{T}(undef, n); replace=replace, ordered=ordered) +end +function sample(a::AbstractArray, n::Integer; replace::Bool=true, ordered::Bool=false) + return sample(default_rng(), a, n; replace=replace, ordered=ordered) end -sample(a::AbstractArray, n::Integer; replace::Bool=true, ordered::Bool=false) = - sample(default_rng(), a, n; replace=replace, ordered=ordered) - """ sample([rng], a, [wv::AbstractWeights], dims::Dims; replace=true, ordered=false) @@ -562,11 +568,12 @@ Optionally specify a random number generator `rng` as the first argument (defaults to `Random.default_rng()`). """ function sample(rng::AbstractRNG, a::AbstractArray{T}, dims::Dims; - replace::Bool=true, ordered::Bool=false) where T - sample!(rng, a, Array{T}(undef, dims); replace=replace, ordered=ordered) + replace::Bool=true, ordered::Bool=false) where {T} + return sample!(rng, a, Array{T}(undef, dims); replace=replace, ordered=ordered) +end +function sample(a::AbstractArray, dims::Dims; replace::Bool=true, ordered::Bool=false) + return sample(default_rng(), a, dims; replace=replace, ordered=ordered) end -sample(a::AbstractArray, dims::Dims; replace::Bool=true, ordered::Bool=false) = - sample(default_rng(), a, dims; replace=replace, ordered=ordered) ################################################################ # @@ -625,13 +632,14 @@ function direct_sample!(rng::AbstractRNG, a::AbstractArray, throw(ArgumentError("non 1-based arrays are not supported")) n = length(a) length(wv) == n || throw(DimensionMismatch("Inconsistent lengths.")) - for i = 1:length(x) + for i in 1:length(x) x[i] = a[sample(rng, wv)] end return x end -direct_sample!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray) = - direct_sample!(default_rng(), a, wv, x) +function direct_sample!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray) + return direct_sample!(default_rng(), a, wv, x) +end """ alias_sample!([rng], a::AbstractArray, wv::AbstractWeights, x::AbstractArray) @@ -646,7 +654,8 @@ with General Distributions." *ACM Transactions on Mathematical Software* 3 (3): Noting `k=length(x)` and `n=length(a)`, this algorithm takes ``O(n)`` time for building the alias table, and then ``O(1)`` to draw each sample. It consumes ``k`` random numbers. """ -function alias_sample!(rng::AbstractRNG, a::AbstractArray, wv::AbstractWeights, x::AbstractArray) +function alias_sample!(rng::AbstractRNG, a::AbstractArray, wv::AbstractWeights, + x::AbstractArray) Base.mightalias(a, x) && throw(ArgumentError("output array x must not share memory with input array a")) 1 == firstindex(a) == firstindex(wv) || @@ -664,8 +673,9 @@ function alias_sample!(rng::AbstractRNG, a::AbstractArray, wv::AbstractWeights, end return x end -alias_sample!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray) = - alias_sample!(default_rng(), a, wv, x) +function alias_sample!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray) + return alias_sample!(default_rng(), a, wv, x) +end """ naive_wsample_norep!([rng], a::AbstractArray, wv::AbstractWeights, x::AbstractArray) @@ -695,7 +705,7 @@ function naive_wsample_norep!(rng::AbstractRNG, a::AbstractArray, w = Vector{Float64}(undef, n) copyto!(w, wv) - for i = 1:k + for i in 1:k u = rand(rng) * wsum j = 1 c = w[1] @@ -709,8 +719,9 @@ function naive_wsample_norep!(rng::AbstractRNG, a::AbstractArray, end return x end -naive_wsample_norep!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray) = - naive_wsample_norep!(default_rng(), a, wv, x) +function naive_wsample_norep!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray) + return naive_wsample_norep!(default_rng(), a, wv, x) +end # Weighted sampling without replacement # Instead of keys u^(1/w) where u = random(0,1) keys w/v where v = randexp(1) are used. @@ -735,7 +746,8 @@ function efraimidis_a_wsample_norep!(rng::AbstractRNG, a::AbstractArray, throw(ArgumentError("non 1-based arrays are not supported")) isfinite(sum(wv)) || throw(ArgumentError("only finite weights are supported")) n = length(a) - length(wv) == n || throw(DimensionMismatch("a and wv must be of same length (got $n and $(length(wv))).")) + length(wv) == n || + throw(DimensionMismatch("a and wv must be of same length (got $n and $(length(wv))).")) k = length(x) # calculate keys for all items @@ -745,14 +757,16 @@ function efraimidis_a_wsample_norep!(rng::AbstractRNG, a::AbstractArray, end # return items with largest keys - index = sortperm(keys; alg = PartialQuickSort(k), rev = true) + index = sortperm(keys; alg=PartialQuickSort(k), rev=true) for i in 1:k x[i] = a[index[i]] end return x end -efraimidis_a_wsample_norep!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray) = - efraimidis_a_wsample_norep!(default_rng(), a, wv, x) +function efraimidis_a_wsample_norep!(a::AbstractArray, wv::AbstractWeights, + x::AbstractArray) + return efraimidis_a_wsample_norep!(default_rng(), a, wv, x) +end # Weighted sampling without replacement # Instead of keys u^(1/w) where u = random(0,1) keys w/v where v = randexp(1) are used. @@ -777,7 +791,8 @@ function efraimidis_ares_wsample_norep!(rng::AbstractRNG, a::AbstractArray, throw(ArgumentError("non 1-based arrays are not supported")) isfinite(sum(wv)) || throw(ArgumentError("only finite weights are supported")) n = length(a) - length(wv) == n || throw(DimensionMismatch("a and wv must be of same length (got $n and $(length(wv))).")) + length(wv) == n || + throw(DimensionMismatch("a and wv must be of same length (got $n and $(length(wv))).")) k = length(x) k > 0 || return x @@ -795,13 +810,14 @@ function efraimidis_ares_wsample_norep!(rng::AbstractRNG, a::AbstractArray, end i >= k && break end - i < k && throw(DimensionMismatch("wv must have at least $k strictly positive entries (got $i)")) + i < k && + throw(DimensionMismatch("wv must have at least $k strictly positive entries (got $i)")) heapify!(pq) # set threshold threshold = pq[1].first - for i in s+1:n + for i in (s + 1):n w = wv.values[i] w < 0 && error("Negative weight found in weight vector at index $i") w > 0 || continue @@ -824,8 +840,10 @@ function efraimidis_ares_wsample_norep!(rng::AbstractRNG, a::AbstractArray, end return x end -efraimidis_ares_wsample_norep!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray) = - efraimidis_ares_wsample_norep!(default_rng(), a, wv, x) +function efraimidis_ares_wsample_norep!(a::AbstractArray, wv::AbstractWeights, + x::AbstractArray) + return efraimidis_ares_wsample_norep!(default_rng(), a, wv, x) +end # Weighted sampling without replacement # Instead of keys u^(1/w) where u = random(0,1) keys w/v where v = randexp(1) are used. @@ -851,7 +869,8 @@ function efraimidis_aexpj_wsample_norep!(rng::AbstractRNG, a::AbstractArray, throw(ArgumentError("non 1-based arrays are not supported")) isfinite(sum(wv)) || throw(ArgumentError("only finite weights are supported")) n = length(a) - length(wv) == n || throw(DimensionMismatch("a and wv must be of same length (got $n and $(length(wv))).")) + length(wv) == n || + throw(DimensionMismatch("a and wv must be of same length (got $n and $(length(wv))).")) k = length(x) k > 0 || return x @@ -869,14 +888,15 @@ function efraimidis_aexpj_wsample_norep!(rng::AbstractRNG, a::AbstractArray, end i >= k && break end - i < k && throw(DimensionMismatch("wv must have at least $k strictly positive entries (got $i)")) + i < k && + throw(DimensionMismatch("wv must have at least $k strictly positive entries (got $i)")) heapify!(pq) # set threshold threshold = pq[1].first X = threshold*randexp(rng) - for i in s+1:n + for i in (s + 1):n w = wv.values[i] w < 0 && error("Negative weight found in weight vector at index $i") w > 0 || continue @@ -894,7 +914,7 @@ function efraimidis_aexpj_wsample_norep!(rng::AbstractRNG, a::AbstractArray, end if ordered # fill output array with items sorted as in a - sort!(pq, by=last) + sort!(pq; by=last) for i in 1:k x[i] = a[pq[i].second] end @@ -906,9 +926,11 @@ function efraimidis_aexpj_wsample_norep!(rng::AbstractRNG, a::AbstractArray, end return x end -efraimidis_aexpj_wsample_norep!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray; - ordered::Bool=false) = - efraimidis_aexpj_wsample_norep!(default_rng(), a, wv, x; ordered=ordered) +function efraimidis_aexpj_wsample_norep!(a::AbstractArray, wv::AbstractWeights, + x::AbstractArray; + ordered::Bool=false) + return efraimidis_aexpj_wsample_norep!(default_rng(), a, wv, x; ordered=ordered) +end function sample!(rng::AbstractRNG, a::AbstractArray, wv::AbstractWeights, x::AbstractArray; replace::Bool=true, ordered::Bool=false) @@ -920,7 +942,7 @@ function sample!(rng::AbstractRNG, a::AbstractArray, wv::AbstractWeights, x::Abs if replace if ordered sample_ordered!(rng, a, wv, x) do rng, a, wv, x - sample!(rng, a, wv, x; replace=true, ordered=false) + return sample!(rng, a, wv, x; replace=true, ordered=false) end else if n < 40 @@ -940,23 +962,28 @@ function sample!(rng::AbstractRNG, a::AbstractArray, wv::AbstractWeights, x::Abs end return x end -sample!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray; - replace::Bool=true, ordered::Bool=false) = - sample!(default_rng(), a, wv, x; replace=replace, ordered=ordered) - -sample(rng::AbstractRNG, a::AbstractArray{T}, wv::AbstractWeights, n::Integer; - replace::Bool=true, ordered::Bool=false) where {T} = - sample!(rng, a, wv, Vector{T}(undef, n); replace=replace, ordered=ordered) -sample(a::AbstractArray, wv::AbstractWeights, n::Integer; - replace::Bool=true, ordered::Bool=false) = - sample(default_rng(), a, wv, n; replace=replace, ordered=ordered) - -sample(rng::AbstractRNG, a::AbstractArray{T}, wv::AbstractWeights, dims::Dims; - replace::Bool=true, ordered::Bool=false) where {T} = - sample!(rng, a, wv, Array{T}(undef, dims); replace=replace, ordered=ordered) -sample(a::AbstractArray, wv::AbstractWeights, dims::Dims; - replace::Bool=true, ordered::Bool=false) = - sample(default_rng(), a, wv, dims; replace=replace, ordered=ordered) +function sample!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray; + replace::Bool=true, ordered::Bool=false) + return sample!(default_rng(), a, wv, x; replace=replace, ordered=ordered) +end + +function sample(rng::AbstractRNG, a::AbstractArray{T}, wv::AbstractWeights, n::Integer; + replace::Bool=true, ordered::Bool=false) where {T} + return sample!(rng, a, wv, Vector{T}(undef, n); replace=replace, ordered=ordered) +end +function sample(a::AbstractArray, wv::AbstractWeights, n::Integer; + replace::Bool=true, ordered::Bool=false) + return sample(default_rng(), a, wv, n; replace=replace, ordered=ordered) +end + +function sample(rng::AbstractRNG, a::AbstractArray{T}, wv::AbstractWeights, dims::Dims; + replace::Bool=true, ordered::Bool=false) where {T} + return sample!(rng, a, wv, Array{T}(undef, dims); replace=replace, ordered=ordered) +end +function sample(a::AbstractArray, wv::AbstractWeights, dims::Dims; + replace::Bool=true, ordered::Bool=false) + return sample(default_rng(), a, wv, dims; replace=replace, ordered=ordered) +end # wsample interface @@ -972,12 +999,15 @@ items appear in the same order as in `a`) should be taken. Optionally specify a random number generator `rng` as the first argument (defaults to `Random.default_rng()`). """ -wsample!(rng::AbstractRNG, a::AbstractArray, w::AbstractVector{<:Real}, x::AbstractArray; - replace::Bool=true, ordered::Bool=false) = - sample!(rng, a, weights(w), x; replace=replace, ordered=ordered) -wsample!(a::AbstractArray, w::AbstractVector{<:Real}, x::AbstractArray; - replace::Bool=true, ordered::Bool=false) = - sample!(default_rng(), a, weights(w), x; replace=replace, ordered=ordered) +function wsample!(rng::AbstractRNG, a::AbstractArray, w::AbstractVector{<:Real}, + x::AbstractArray; + replace::Bool=true, ordered::Bool=false) + return sample!(rng, a, weights(w), x; replace=replace, ordered=ordered) +end +function wsample!(a::AbstractArray, w::AbstractVector{<:Real}, x::AbstractArray; + replace::Bool=true, ordered::Bool=false) + return sample!(default_rng(), a, weights(w), x; replace=replace, ordered=ordered) +end """ wsample([rng], [a], w) @@ -990,10 +1020,11 @@ Optionally specify a random number generator `rng` as the first argument """ wsample(rng::AbstractRNG, w::AbstractVector{<:Real}) = sample(rng, weights(w)) wsample(w::AbstractVector{<:Real}) = wsample(default_rng(), w) -wsample(rng::AbstractRNG, a::AbstractArray, w::AbstractVector{<:Real}) = sample(rng, a, weights(w)) +function wsample(rng::AbstractRNG, a::AbstractArray, w::AbstractVector{<:Real}) + return sample(rng, a, weights(w)) +end wsample(a::AbstractArray, w::AbstractVector{<:Real}) = wsample(default_rng(), a, w) - """ wsample([rng], [a], w, n::Integer; replace=true, ordered=false) @@ -1007,12 +1038,15 @@ items appear in the same order as in `a`) should be taken. Optionally specify a random number generator `rng` as the first argument (defaults to `Random.default_rng()`). """ -wsample(rng::AbstractRNG, a::AbstractArray{T}, w::AbstractVector{<:Real}, n::Integer; - replace::Bool=true, ordered::Bool=false) where {T} = - wsample!(rng, a, w, Vector{T}(undef, n); replace=replace, ordered=ordered) -wsample(a::AbstractArray, w::AbstractVector{<:Real}, n::Integer; - replace::Bool=true, ordered::Bool=false) = - wsample(default_rng(), a, w, n; replace=replace, ordered=ordered) +function wsample(rng::AbstractRNG, a::AbstractArray{T}, w::AbstractVector{<:Real}, + n::Integer; + replace::Bool=true, ordered::Bool=false) where {T} + return wsample!(rng, a, w, Vector{T}(undef, n); replace=replace, ordered=ordered) +end +function wsample(a::AbstractArray, w::AbstractVector{<:Real}, n::Integer; + replace::Bool=true, ordered::Bool=false) + return wsample(default_rng(), a, w, n; replace=replace, ordered=ordered) +end """ wsample([rng], [a], w, dims::Dims; replace=true, ordered=false) @@ -1024,9 +1058,12 @@ weights given in `w` if `a` is present, otherwise select a random sample of size Optionally specify a random number generator `rng` as the first argument (defaults to `Random.default_rng()`). """ -wsample(rng::AbstractRNG, a::AbstractArray{T}, w::AbstractVector{<:Real}, dims::Dims; - replace::Bool=true, ordered::Bool=false) where {T} = - wsample!(rng, a, w, Array{T}(undef, dims); replace=replace, ordered=ordered) -wsample(a::AbstractArray, w::AbstractVector{<:Real}, dims::Dims; - replace::Bool=true, ordered::Bool=false) = - wsample(default_rng(), a, w, dims; replace=replace, ordered=ordered) +function wsample(rng::AbstractRNG, a::AbstractArray{T}, w::AbstractVector{<:Real}, + dims::Dims; + replace::Bool=true, ordered::Bool=false) where {T} + return wsample!(rng, a, w, Array{T}(undef, dims); replace=replace, ordered=ordered) +end +function wsample(a::AbstractArray, w::AbstractVector{<:Real}, dims::Dims; + replace::Bool=true, ordered::Bool=false) + return wsample(default_rng(), a, w, dims; replace=replace, ordered=ordered) +end diff --git a/src/scalarstats.jl b/src/scalarstats.jl index e1efd38f7..c5162ce1e 100644 --- a/src/scalarstats.jl +++ b/src/scalarstats.jl @@ -1,6 +1,5 @@ # Descriptive Statistics - ############################# # # Location @@ -39,7 +38,7 @@ function genmean(a, p::Real) # At least one of `x` or `p` must not be an int to avoid domain errors when `p` is a negative int. # We choose `x` in order to exploit exponentiation by squaring when `p` is an int. r = mean(a) do x - float(x)^p + return float(x)^p end return r^inv(p) end @@ -53,7 +52,7 @@ Return the mode (most common number) of an array, optionally over a specified range `r` or weighted via a vector `wv`. If several modes exist, the first one (in order of appearance) is returned. """ -function mode(a::AbstractArray{T}, r::UnitRange{T}) where T<:Integer +function mode(a::AbstractArray{T}, r::UnitRange{T}) where {T<:Integer} isempty(a) && throw(ArgumentError("mode is not defined for empty collections")) len = length(a) r0 = r[1] @@ -61,7 +60,7 @@ function mode(a::AbstractArray{T}, r::UnitRange{T}) where T<:Integer cnts = zeros(Int, length(r)) mc = 0 # maximum count mv = r0 # a value corresponding to maximum count - for i = 1:len + for i in 1:len x = a[i] if r0 <= x <= r1 c = (cnts[x - r0 + 1] += 1) @@ -81,14 +80,14 @@ end Return all modes (most common numbers) of an array, optionally over a specified range `r` or weighted via vector `wv`. """ -function modes(a::AbstractArray{T}, r::UnitRange{T}) where T<:Integer +function modes(a::AbstractArray{T}, r::UnitRange{T}) where {T<:Integer} r0 = r[1] r1 = r[end] n = length(r) cnts = zeros(Int, n) # find the maximum count mc = 0 - for i = 1:length(a) + for i in 1:length(a) x = a[i] if r0 <= x <= r1 c = (cnts[x - r0 + 1] += 1) @@ -99,7 +98,7 @@ function modes(a::AbstractArray{T}, r::UnitRange{T}) where T<:Integer end # find all values corresponding to maximum count ms = T[] - for i = 1:n + for i in 1:n if cnts[i] == mc push!(ms, r[i]) end @@ -161,7 +160,7 @@ function modes(a) end # Weighted mode of arbitrary vectors of values -function mode(a::AbstractVector, wv::AbstractWeights{T}) where T <: Real +function mode(a::AbstractVector, wv::AbstractWeights{T}) where {T<:Real} isempty(a) && throw(ArgumentError("mode is not defined for empty collections")) isfinite(sum(wv)) || throw(ArgumentError("only finite weights are supported")) length(a) == length(wv) || @@ -170,7 +169,7 @@ function mode(a::AbstractVector, wv::AbstractWeights{T}) where T <: Real # Iterate through the data mv = first(a) mw = first(wv) - weights = Dict{eltype(a), T}() + weights = Dict{eltype(a),T}() for (x, w) in zip(a, wv) _w = get!(weights, x, zero(T)) + w if _w > mw @@ -183,7 +182,7 @@ function mode(a::AbstractVector, wv::AbstractWeights{T}) where T <: Real return mv end -function modes(a::AbstractVector, wv::AbstractWeights{T}) where T <: Real +function modes(a::AbstractVector, wv::AbstractWeights{T}) where {T<:Real} isempty(a) && throw(ArgumentError("mode is not defined for empty collections")) isfinite(sum(wv)) || throw(ArgumentError("only finite weights are supported")) length(a) == length(wv) || @@ -191,7 +190,7 @@ function modes(a::AbstractVector, wv::AbstractWeights{T}) where T <: Real # Iterate through the data mw = first(wv) - weights = Dict{eltype(a), T}() + weights = Dict{eltype(a),T}() for (x, w) in zip(a, wv) _w = get!(weights, x, zero(T)) + w if _w > mw @@ -329,8 +328,10 @@ function quantilerank(itr, value; method::Symbol=:inc) n += 1 end - n == 0 && throw(ArgumentError("`itr` is empty. Pass a collection with at least two elements")) - n == 1 && throw(ArgumentError("`itr` has only 1 value. Pass a collection with at least two elements")) + n == 0 && + throw(ArgumentError("`itr` is empty. Pass a collection with at least two elements")) + n == 1 && + throw(ArgumentError("`itr` has only 1 value. Pass a collection with at least two elements")) if method == :inc if greatest_smaller == value @@ -387,7 +388,9 @@ Return the `q`th percentile of `value` in collection `itr`, i.e. [`quantilerank( See the [`quantilerank`](@ref) docstring for more details. """ -percentilerank(itr, value; method::Symbol=:inc) = quantilerank(itr, value, method=method) * 100 +function percentilerank(itr, value; method::Symbol=:inc) + return quantilerank(itr, value; method=method) * 100 +end ############################# # @@ -402,7 +405,7 @@ percentilerank(itr, value; method::Symbol=:inc) = quantilerank(itr, value, metho Return the span of a collection, i.e. the range `minimum(x):maximum(x)`. The minimum and maximum of `x` are computed in one pass using `extrema`. """ -span(x) = ((a, b) = extrema(x); a:b) +span(x) = ((a, b)=extrema(x); a:b) # Variation coefficient: std / mean """ @@ -416,7 +419,7 @@ then `std` is calculated with denominator `n`. Else, the `std` is calculated with denominator `n-1`. """ variation(x, m; corrected::Bool=true) = stdm(x, m; corrected=corrected) / m -variation(x; corrected::Bool=true) = ((m, s) = mean_and_std(x; corrected=corrected); s/m) +variation(x; corrected::Bool=true) = ((m, s)=mean_and_std(x; corrected=corrected); s/m) # Standard error of the mean: std / sqrt(len) # Code taken from var in the Statistics stdlib module @@ -503,10 +506,10 @@ function sem(x::AbstractArray, weights::UnitWeights; mean=nothing) return sem(x; mean=mean) end - # Weighted methods for the above -sem(x::AbstractArray, weights::FrequencyWeights; mean=nothing) = - sqrt(var(x, weights; mean=mean, corrected=true) / sum(weights)) +function sem(x::AbstractArray, weights::FrequencyWeights; mean=nothing) + return sqrt(var(x, weights; mean=mean, corrected=true) / sum(weights)) +end function sem(x::AbstractArray, weights::ProbabilityWeights; mean=nothing) if isempty(x) @@ -516,8 +519,8 @@ function sem(x::AbstractArray, weights::ProbabilityWeights; mean=nothing) _mean = mean === nothing ? Statistics.mean(x, weights) : mean # sum of squared errors = sse sse = sum(Broadcast.instantiate(Broadcast.broadcasted(x, weights) do x_i, w - return abs2(w * (x_i - _mean)) - end)) + return abs2(w * (x_i - _mean)) + end)) n = count(!iszero, weights) return sqrt(sse * n / (n - 1)) / sum(weights) end @@ -536,8 +539,8 @@ If `normalize` is set to `true`, the MAD is multiplied by `1 / quantile(Normal(), 3/4) ≈ 1.4826`, in order to obtain a consistent estimator of the standard deviation under the assumption that the data is normally distributed. """ -function mad(x; center=nothing, normalize::Union{Bool, Nothing}=nothing, constant=nothing) - mad!(Base.copymutable(x); center=center, normalize=normalize, constant=constant) +function mad(x; center=nothing, normalize::Union{Bool,Nothing}=nothing, constant=nothing) + return mad!(Base.copymutable(x); center=center, normalize=normalize, constant=constant) end """ @@ -558,15 +561,19 @@ function mad!(x::AbstractArray; c = center === nothing ? median!(x) : center T = promote_type(typeof(c), eltype(x)) U = eltype(x) - x2 = U == T ? x : isconcretetype(U) && isconcretetype(T) && sizeof(U) == sizeof(T) ? reinterpret(T, x) : similar(x, T) + x2 = U == T ? x : + isconcretetype(U) && isconcretetype(T) && sizeof(U) == sizeof(T) ? + reinterpret(T, x) : similar(x, T) x2 .= abs.(x .- c) m = median!(x2) if normalize isa Nothing - Base.depwarn("the `normalize` keyword argument will be false by default in future releases: set it explicitly to silence this deprecation", :mad) + Base.depwarn("the `normalize` keyword argument will be false by default in future releases: set it explicitly to silence this deprecation", + :mad) normalize = true end if !isa(constant, Nothing) - Base.depwarn("keyword argument `constant` is deprecated, use `normalize` instead or apply the multiplication directly", :mad) + Base.depwarn("keyword argument `constant` is deprecated, use `normalize` instead or apply the multiplication directly", + :mad) m * constant elseif normalize m * mad_constant @@ -582,7 +589,7 @@ end Compute the interquartile range (IQR) of collection `x`, i.e. the 75th percentile minus the 25th percentile. """ -iqr(x) = (q = quantile(x, [.25, .75]); q[2] - q[1]) +iqr(x) = (q=quantile(x, [0.25, 0.75]); q[2] - q[1]) # Generalized variance """ @@ -622,11 +629,11 @@ function _zscore!(Z::AbstractArray, X::AbstractArray, μ::Real, σ::Real) # Z and X are assumed to have the same size iσ = inv(σ) if μ == zero(μ) - for i = 1 : length(X) + for i in 1:length(X) Z[i] = X[i] * iσ end else - for i = 1 : length(X) + for i in 1:length(X) Z[i] = (X[i] - μ) * iσ end end @@ -641,15 +648,15 @@ end siz1 = size(X, 1) @nextract $N ud d->size(μ, d) if size(μ, 1) == 1 && siz1 > 1 - @nloops $N i d->(d>1 ? (1:size(X,d)) : (1:1)) d->(j_d = ud_d ==1 ? 1 : i_d) begin + @nloops $N i d->(d>1 ? (1:size(X, d)) : (1:1)) d->(j_d = ud_d == 1 ? 1 : i_d) begin v = (@nref $N μ j) c = inv(@nref $N σ j) - for i_1 = 1:siz1 + for i_1 in 1:siz1 (@nref $N Z i) = ((@nref $N X i) - v) * c end end else - @nloops $N i X d->(j_d = ud_d ==1 ? 1 : i_d) begin + @nloops $N i X d->(j_d = ud_d == 1 ? 1 : i_d) begin (@nref $N Z i) = ((@nref $N X i) - (@nref $N μ j)) / (@nref $N σ j) end end @@ -659,13 +666,13 @@ end function _zscore_chksize(X::AbstractArray, μ::AbstractArray, σ::AbstractArray) size(μ) == size(σ) || throw(DimensionMismatch("μ and σ should have the same size.")) - for i=1:ndims(X) - dμ_i = size(μ,i) - (dμ_i == 1 || dμ_i == size(X,i)) || throw(DimensionMismatch("X and μ have incompatible sizes.")) + for i in 1:ndims(X) + dμ_i = size(μ, i) + (dμ_i == 1 || dμ_i == size(X, i)) || + throw(DimensionMismatch("X and μ have incompatible sizes.")) end end - """ zscore!([Z], X, μ, σ) @@ -676,23 +683,25 @@ observation lies, i.e. ``(x - μ) / σ``. If a destination array `Z` is provided, the scores are stored in `Z` and it must have the same shape as `X`. Otherwise `X` is overwritten. """ -function zscore!(Z::AbstractArray{ZT}, X::AbstractArray{T}, μ::Real, σ::Real) where {ZT<:AbstractFloat,T<:Real} +function zscore!(Z::AbstractArray{ZT}, X::AbstractArray{T}, μ::Real, + σ::Real) where {ZT<:AbstractFloat,T<:Real} size(Z) == size(X) || throw(DimensionMismatch("Z and X must have the same size.")) - _zscore!(Z, X, μ, σ) + return _zscore!(Z, X, μ, σ) end function zscore!(Z::AbstractArray{<:AbstractFloat}, X::AbstractArray{<:Real}, μ::AbstractArray{<:Real}, σ::AbstractArray{<:Real}) size(Z) == size(X) || throw(DimensionMismatch("Z and X must have the same size.")) _zscore_chksize(X, μ, σ) - _zscore!(Z, X, μ, σ) + return _zscore!(Z, X, μ, σ) end zscore!(X::AbstractArray{<:AbstractFloat}, μ::Real, σ::Real) = _zscore!(X, X, μ, σ) -zscore!(X::AbstractArray{<:AbstractFloat}, μ::AbstractArray{<:Real}, σ::AbstractArray{<:Real}) = - (_zscore_chksize(X, μ, σ); _zscore!(X, X, μ, σ)) - +function zscore!(X::AbstractArray{<:AbstractFloat}, μ::AbstractArray{<:Real}, + σ::AbstractArray{<:Real}) + return (_zscore_chksize(X, μ, σ); _zscore!(X, X, μ, σ)) +end """ zscore(X, [μ, σ]) @@ -705,21 +714,20 @@ above the mean that an observation lies, i.e. ``(x - μ) / σ``. In particular, when `μ` and `σ` are arrays, they should have the same size, and `size(μ, i) == 1 || size(μ, i) == size(X, i)` for each dimension. """ -function zscore(X::AbstractArray{T}, μ::Real, σ::Real) where T<:Real +function zscore(X::AbstractArray{T}, μ::Real, σ::Real) where {T<:Real} ZT = typeof((zero(T) - zero(μ)) / one(σ)) - _zscore!(Array{ZT}(undef, size(X)), X, μ, σ) + return _zscore!(Array{ZT}(undef, size(X)), X, μ, σ) end -function zscore(X::AbstractArray{T}, μ::AbstractArray{U}, σ::AbstractArray{S}) where {T<:Real,U<:Real,S<:Real} +function zscore(X::AbstractArray{T}, μ::AbstractArray{U}, + σ::AbstractArray{S}) where {T<:Real,U<:Real,S<:Real} _zscore_chksize(X, μ, σ) ZT = typeof((zero(T) - zero(U)) / one(S)) - _zscore!(Array{ZT}(undef, size(X)), X, μ, σ) + return _zscore!(Array{ZT}(undef, size(X)), X, μ, σ) end -zscore(X::AbstractArray{<:Real}) = ((μ, σ) = mean_and_std(X); zscore(X, μ, σ)) -zscore(X::AbstractArray{<:Real}, dim::Int) = ((μ, σ) = mean_and_std(X, dim); zscore(X, μ, σ)) - - +zscore(X::AbstractArray{<:Real}) = ((μ, σ)=mean_and_std(X); zscore(X, μ, σ)) +zscore(X::AbstractArray{<:Real}, dim::Int) = ((μ, σ)=mean_and_std(X, dim); zscore(X, μ, σ)) ############################# # @@ -755,7 +763,7 @@ end Compute the Rényi (generalized) entropy of order `α` of an array `p`. """ -function renyientropy(p::AbstractArray{T}, α::Real) where T<:Real +function renyientropy(p::AbstractArray{T}, α::Real) where {T<:Real} α < 0 && throw(ArgumentError("Order of Rényi entropy not legal, $(α) < 0.")) s = zero(T) @@ -763,7 +771,7 @@ function renyientropy(p::AbstractArray{T}, α::Real) where T<:Real scale = sum(p) if α ≈ 0 - for i = 1:length(p) + for i in 1:length(p) pi = p[i] if pi > z s += 1 @@ -771,7 +779,7 @@ function renyientropy(p::AbstractArray{T}, α::Real) where T<:Real end s = log(s / scale) elseif α ≈ 1 - for i = 1:length(p) + for i in 1:length(p) pi = p[i] if pi > z s -= pi * log(pi) @@ -781,7 +789,7 @@ function renyientropy(p::AbstractArray{T}, α::Real) where T<:Real elseif (isinf(α)) s = -log(maximum(p)) else # a normal Rényi entropy - for i = 1:length(p) + for i in 1:length(p) pi = p[i] if pi > z s += pi ^ α @@ -803,11 +811,9 @@ function crossentropy(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}) # handle empty collections if isempty(p) - Base.depwarn( - "support for empty collections will be removed since they do not " * - "represent proper probability distributions", - :crossentropy, - ) + Base.depwarn("support for empty collections will be removed since they do not " * + "represent proper probability distributions", + :crossentropy) # return zero for empty arrays return xlogy(zero(eltype(p)), zero(eltype(q))) end @@ -817,9 +823,9 @@ function crossentropy(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}) return - sum(Broadcast.instantiate(broadcasted)) end -crossentropy(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}, b::Real) = - crossentropy(p,q) / log(b) - +function crossentropy(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}, b::Real) + return crossentropy(p, q) / log(b) +end """ kldivergence(p, q, [b]) @@ -834,11 +840,9 @@ function kldivergence(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}) # handle empty collections if isempty(p) - Base.depwarn( - "support for empty collections will be removed since they do not "* - "represent proper probability distributions", - :kldivergence, - ) + Base.depwarn("support for empty collections will be removed since they do not " * + "represent proper probability distributions", + :kldivergence) # return zero for empty arrays pzero = zero(eltype(p)) qzero = zero(eltype(q)) @@ -854,8 +858,9 @@ function kldivergence(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}) return sum(Broadcast.instantiate(broadcasted)) end -kldivergence(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}, b::Real) = - kldivergence(p,q) / log(b) +function kldivergence(p::AbstractArray{<:Real}, q::AbstractArray{<:Real}, b::Real) + return kldivergence(p, q) / log(b) +end ############################# # @@ -875,7 +880,6 @@ struct SummaryStats{T<:Union{AbstractFloat,Missing}} nmiss::Int end - """ summarystats(a) @@ -884,12 +888,12 @@ Compute summary statistics for a real-valued array `a`. Returns a number of missing observations, standard deviation, mean, minimum, 25th percentile, median, 75th percentile, and maximum. """ -function summarystats(a::AbstractArray{T}) where T<:Union{Real,Missing} +function summarystats(a::AbstractArray{T}) where {T<:Union{Real,Missing}} # `mean` doesn't fail on empty input but rather returns `NaN`, so we can use the # return type to populate the `SummaryStats` structure. s = T >: Missing ? collect(skipmissing(a)) : a m = mean(s) - stdev = std(s, mean=m) + stdev = std(s; mean=m) R = typeof(m) n = length(a) ns = length(s) @@ -900,7 +904,7 @@ function summarystats(a::AbstractArray{T}) where T<:Union{Real,Missing} else quantile(s, [0.00, 0.25, 0.50, 0.75, 1.00]) end - SummaryStats{R}(m, stdev, qs..., n, n - ns) + return SummaryStats{R}(m, stdev, qs..., n, n - ns) end function Base.show(io::IO, ss::SummaryStats) @@ -917,7 +921,6 @@ function Base.show(io::IO, ss::SummaryStats) @printf(io, "Maximum: %.6f\n", ss.max) end - """ describe(a) @@ -926,9 +929,9 @@ the mean, minimum, 25th percentile, median, 75th percentile, and maximum. """ DataAPI.describe(x) = describe(stdout, x) -function DataAPI.describe(io::IO, a::AbstractArray{T}) where T<:Union{Real,Missing} +function DataAPI.describe(io::IO, a::AbstractArray{T}) where {T<:Union{Real,Missing}} show(io, summarystats(a)) - println(io, "Type: $(string(eltype(a)))") + return println(io, "Type: $(string(eltype(a)))") end function DataAPI.describe(io::IO, a::AbstractArray) println(io, "Summary Stats:") diff --git a/src/signalcorr.jl b/src/signalcorr.jl index 06c83ba1d..21f954f0f 100644 --- a/src/signalcorr.jl +++ b/src/signalcorr.jl @@ -11,41 +11,46 @@ # ####################################### -default_laglen(lx::Int) = min(lx-1, round(Int,10*log10(lx))) -check_lags(lx::Int, lags::AbstractVector) = (maximum(lags) < lx || error("lags must be less than the sample length.")) +default_laglen(lx::Int) = min(lx-1, round(Int, 10*log10(lx))) +function check_lags(lx::Int, lags::AbstractVector) + return (maximum(lags) < lx || error("lags must be less than the sample length.")) +end -function demean_col!(z::AbstractVector{<:Real}, x::AbstractMatrix{<:Real}, j::Int, demean::Bool) +function demean_col!(z::AbstractVector{<:Real}, x::AbstractMatrix{<:Real}, j::Int, + demean::Bool) T = eltype(z) m = size(x, 1) @assert m == length(z) b = m * (j-1) if demean s = zero(T) - for i = 1 : m + for i in 1:m s += x[b + i] end mv = s / m - for i = 1 : m + for i in 1:m z[i] = x[b + i] - mv end else copyto!(z, 1, x, b+1, m) end - z + return z end - ####################################### # # Auto-correlations # ####################################### -default_autolags(lx::Int) = 0 : default_laglen(lx) - -_autodot(x::AbstractVector{<:Union{Float32, Float64}}, lx::Int, l::Int) = dot(x, 1:(lx-l), x, (1+l):lx) -_autodot(x::AbstractVector{<:Real}, lx::Int, l::Int) = dot(view(x, 1:(lx-l)), view(x, (1+l):lx)) +default_autolags(lx::Int) = 0:default_laglen(lx) +function _autodot(x::AbstractVector{<:Union{Float32,Float64}}, lx::Int, l::Int) + return dot(x, 1:(lx - l), x, (1 + l):lx) +end +function _autodot(x::AbstractVector{<:Real}, lx::Int, l::Int) + return dot(view(x, 1:(lx - l)), view(x, (1 + l):lx)) +end ## autocov """ @@ -61,7 +66,8 @@ where each column in the result will correspond to a column in `x`. The output is not normalized. See [`autocor!`](@ref) for a method with normalization. """ -function autocov!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function autocov!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, + lags::AbstractVector{<:Integer}; demean::Bool=true) lx = length(x) m = length(lags) length(r) == m || throw(DimensionMismatch()) @@ -69,13 +75,14 @@ function autocov!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, lags::Ab T = typeof(zero(eltype(x)) / 1) z::Vector{T} = demean ? x .- mean(x) : x - for k = 1 : m # foreach lag value + for k in 1:m # foreach lag value r[k] = _autodot(z, lx, lags[k]) / lx end return r end -function autocov!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function autocov!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, + lags::AbstractVector{<:Integer}; demean::Bool=true) lx = size(x, 1) ns = size(x, 2) m = length(lags) @@ -84,16 +91,15 @@ function autocov!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, lags::Ab T = typeof(zero(eltype(x)) / 1) z = Vector{T}(undef, lx) - for j = 1 : ns + for j in 1:ns demean_col!(z, x, j, demean) - for k = 1 : m - r[k,j] = _autodot(z, lx, lags[k]) / lx + for k in 1:m + r[k, j] = _autodot(z, lx, lags[k]) / lx end end return r end - """ autocov(x, [lags]; demean=true) @@ -110,18 +116,21 @@ When left unspecified, the lags used are the integers from 0 to The output is not normalized. See [`autocor`](@ref) for a function with normalization. """ -function autocov(x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function autocov(x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; + demean::Bool=true) out = Vector{float(eltype(x))}(undef, length(lags)) - autocov!(out, x, lags; demean=demean) + return autocov!(out, x, lags; demean=demean) end -function autocov(x::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Matrix{float(eltype(x))}(undef, length(lags), size(x,2)) - autocov!(out, x, lags; demean=demean) +function autocov(x::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; + demean::Bool=true) + out = Matrix{float(eltype(x))}(undef, length(lags), size(x, 2)) + return autocov!(out, x, lags; demean=demean) end -autocov(x::AbstractVecOrMat{<:Real}; demean::Bool=true) = - autocov(x, default_autolags(size(x,1)); demean=demean) +function autocov(x::AbstractVecOrMat{<:Real}; demean::Bool=true) + return autocov(x, default_autolags(size(x, 1)); demean=demean) +end ## autocor @@ -139,7 +148,8 @@ where each column in the result will correspond to a column in `x`. The output is normalized by the variance of `x`, i.e. so that the lag 0 autocorrelation is 1. See [`autocov!`](@ref) for the unnormalized form. """ -function autocor!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function autocor!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, + lags::AbstractVector{<:Integer}; demean::Bool=true) lx = length(x) m = length(lags) length(r) == m || throw(DimensionMismatch()) @@ -148,13 +158,14 @@ function autocor!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, lags::Ab T = typeof(zero(eltype(x)) / 1) z::Vector{T} = demean ? x .- mean(x) : x zz = dot(z, z) - for k = 1 : m # foreach lag value + for k in 1:m # foreach lag value r[k] = _autodot(z, lx, lags[k]) / zz end return r end -function autocor!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function autocor!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, + lags::AbstractVector{<:Integer}; demean::Bool=true) lx = size(x, 1) ns = size(x, 2) m = length(lags) @@ -163,17 +174,16 @@ function autocor!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, lags::Ab T = typeof(zero(eltype(x)) / 1) z = Vector{T}(undef, lx) - for j = 1 : ns + for j in 1:ns demean_col!(z, x, j, demean) zz = dot(z, z) - for k = 1 : m - r[k,j] = _autodot(z, lx, lags[k]) / zz + for k in 1:m + r[k, j] = _autodot(z, lx, lags[k]) / zz end end return r end - """ autocor(x, [lags]; demean=true) @@ -191,19 +201,21 @@ When left unspecified, the lags used are the integers from 0 to The output is normalized by the variance of `x`, i.e. so that the lag 0 autocorrelation is 1. See [`autocov`](@ref) for the unnormalized form. """ -function autocor(x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function autocor(x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; + demean::Bool=true) out = Vector{float(eltype(x))}(undef, length(lags)) - autocor!(out, x, lags; demean=demean) + return autocor!(out, x, lags; demean=demean) end -function autocor(x::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Matrix{float(eltype(x))}(undef, length(lags), size(x,2)) - autocor!(out, x, lags; demean=demean) +function autocor(x::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; + demean::Bool=true) + out = Matrix{float(eltype(x))}(undef, length(lags), size(x, 2)) + return autocor!(out, x, lags; demean=demean) end -autocor(x::AbstractVecOrMat{<:Real}; demean::Bool=true) = - autocor(x, default_autolags(size(x,1)); demean=demean) - +function autocor(x::AbstractVecOrMat{<:Real}; demean::Bool=true) + return autocor(x, default_autolags(size(x, 1)); demean=demean) +end ####################################### # @@ -211,20 +223,21 @@ autocor(x::AbstractVecOrMat{<:Real}; demean::Bool=true) = # ####################################### -default_crosslags(lx::Int) = (l=default_laglen(lx); -l:l) +default_crosslags(lx::Int) = (l=default_laglen(lx); (-l):l) -function _crossdot(x::AbstractVector{T}, y::AbstractVector{T}, lx::Int, l::Int) where {T<:Union{Float32, Float64}} +function _crossdot(x::AbstractVector{T}, y::AbstractVector{T}, lx::Int, + l::Int) where {T<:Union{Float32,Float64}} if l >= 0 - dot(x, 1:(lx-l), y, (1+l):lx) + dot(x, 1:(lx - l), y, (1 + l):lx) else - dot(x, (1-l):lx, y, 1:(lx+l)) + dot(x, (1 - l):lx, y, 1:(lx + l)) end end function _crossdot(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, lx::Int, l::Int) if l >= 0 - dot(view(x, 1:(lx-l)), view(y, (1+l):lx)) + dot(view(x, 1:(lx - l)), view(y, (1 + l):lx)) else - dot(view(x, (1-l):lx), view(y, 1:(lx+l))) + dot(view(x, (1 - l):lx), view(y, 1:(lx + l))) end end @@ -246,7 +259,9 @@ three-dimensional array of size `(length(lags), size(x, 2), size(y, 2))`. The output is not normalized. See [`crosscor!`](@ref) for a function with normalization. """ -function crosscov!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscov!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, + y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; + demean::Bool=true) lx = length(x) m = length(lags) (length(y) == lx && length(r) == m) || throw(DimensionMismatch()) @@ -256,13 +271,15 @@ function crosscov!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, y::Abst zx::Vector{T} = demean ? x .- mean(x) : x S = typeof(zero(eltype(y)) / 1) zy::Vector{S} = demean ? y .- mean(y) : y - for k = 1 : m # foreach lag value + for k in 1:m # foreach lag value r[k] = _crossdot(zx, zy, lx, lags[k]) / lx end return r end -function crosscov!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscov!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, + y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; + demean::Bool=true) lx = size(x, 1) ns = size(x, 2) m = length(lags) @@ -273,16 +290,18 @@ function crosscov!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, y::Abst zx = Vector{T}(undef, lx) S = typeof(zero(eltype(y)) / 1) zy::Vector{S} = demean ? y .- mean(y) : y - for j = 1 : ns + for j in 1:ns demean_col!(zx, x, j, demean) - for k = 1 : m - r[k,j] = _crossdot(zx, zy, lx, lags[k]) / lx + for k in 1:m + r[k, j] = _crossdot(zx, zy, lx, lags[k]) / lx end end return r end -function crosscov!(r::AbstractMatrix{<:Real}, x::AbstractVector{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscov!(r::AbstractMatrix{<:Real}, x::AbstractVector{<:Real}, + y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; + demean::Bool=true) lx = length(x) ns = size(y, 2) m = length(lags) @@ -293,16 +312,18 @@ function crosscov!(r::AbstractMatrix{<:Real}, x::AbstractVector{<:Real}, y::Abst zx::Vector{T} = demean ? x .- mean(x) : x S = typeof(zero(eltype(y)) / 1) zy = Vector{S}(undef, lx) - for j = 1 : ns + for j in 1:ns demean_col!(zy, y, j, demean) - for k = 1 : m - r[k,j] = _crossdot(zx, zy, lx, lags[k]) / lx + for k in 1:m + r[k, j] = _crossdot(zx, zy, lx, lags[k]) / lx end end return r end -function crosscov!(r::AbstractArray{<:Real,3}, x::AbstractMatrix{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscov!(r::AbstractArray{<:Real,3}, x::AbstractMatrix{<:Real}, + y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; + demean::Bool=true) lx = size(x, 1) nx = size(x, 2) ny = size(y, 2) @@ -314,11 +335,11 @@ function crosscov!(r::AbstractArray{<:Real,3}, x::AbstractMatrix{<:Real}, y::Abs T = typeof(zero(eltype(x)) / 1) zxs = Vector{T}[] sizehint!(zxs, nx) - for j = 1 : nx - xj = x[:,j] + for j in 1:nx + xj = x[:, j] if demean mv = mean(xj) - for i = 1 : lx + for i in 1:lx xj[i] -= mv end end @@ -327,19 +348,18 @@ function crosscov!(r::AbstractArray{<:Real,3}, x::AbstractMatrix{<:Real}, y::Abs S = typeof(zero(eltype(y)) / 1) zy = Vector{S}(undef, lx) - for j = 1 : ny + for j in 1:ny demean_col!(zy, y, j, demean) - for i = 1 : nx + for i in 1:nx zx = zxs[i] - for k = 1 : m - r[k,i,j] = _crossdot(zx, zy, lx, lags[k]) / lx + for k in 1:m + r[k, i, j] = _crossdot(zx, zy, lx, lags[k]) / lx end end end return r end - """ crosscov(x, y, [lags]; demean=true) @@ -356,29 +376,35 @@ When left unspecified, the lags used are the integers from The output is not normalized. See [`crosscor`](@ref) for a function with normalization. """ -function crosscov(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscov(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, + lags::AbstractVector{<:Integer}; demean::Bool=true) out = Vector{float(Base.promote_eltype(x, y))}(undef, length(lags)) - crosscov!(out, x, y, lags; demean=demean) + return crosscov!(out, x, y, lags; demean=demean) end -function crosscov(x::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(x,2)) - crosscov!(out, x, y, lags; demean=demean) +function crosscov(x::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, + lags::AbstractVector{<:Integer}; demean::Bool=true) + out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(x, 2)) + return crosscov!(out, x, y, lags; demean=demean) end -function crosscov(x::AbstractVector{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(y,2)) - crosscov!(out, x, y, lags; demean=demean) +function crosscov(x::AbstractVector{<:Real}, y::AbstractMatrix{<:Real}, + lags::AbstractVector{<:Integer}; demean::Bool=true) + out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(y, 2)) + return crosscov!(out, x, y, lags; demean=demean) end -function crosscov(x::AbstractMatrix{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Array{float(Base.promote_eltype(x, y)),3}(undef, length(lags), size(x,2), size(y,2)) - crosscov!(out, x, y, lags; demean=demean) +function crosscov(x::AbstractMatrix{<:Real}, y::AbstractMatrix{<:Real}, + lags::AbstractVector{<:Integer}; demean::Bool=true) + out = Array{float(Base.promote_eltype(x, y)),3}(undef, length(lags), size(x, 2), + size(y, 2)) + return crosscov!(out, x, y, lags; demean=demean) end -crosscov(x::AbstractVecOrMat{<:Real}, y::AbstractVecOrMat{<:Real}; demean::Bool=true) = - crosscov(x, y, default_crosslags(size(x,1)); demean=demean) - +function crosscov(x::AbstractVecOrMat{<:Real}, y::AbstractVecOrMat{<:Real}; + demean::Bool=true) + return crosscov(x, y, default_crosslags(size(x, 1)); demean=demean) +end ## crosscor """ @@ -397,7 +423,9 @@ three-dimensional array of size `(length(lags), size(x, 2), size(y, 2))`. The output is normalized by `sqrt(var(x)*var(y))`. See [`crosscov!`](@ref) for the unnormalized form. """ -function crosscor!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscor!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, + y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; + demean::Bool=true) lx = length(x) m = length(lags) (length(y) == lx && length(r) == m) || throw(DimensionMismatch()) @@ -408,13 +436,15 @@ function crosscor!(r::AbstractVector{<:Real}, x::AbstractVector{<:Real}, y::Abst S = typeof(zero(eltype(y)) / 1) zy::Vector{S} = demean ? y .- mean(y) : y sc = sqrt(dot(zx, zx) * dot(zy, zy)) - for k = 1 : m # foreach lag value + for k in 1:m # foreach lag value r[k] = _crossdot(zx, zy, lx, lags[k]) / sc end return r end -function crosscor!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscor!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, + y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; + demean::Bool=true) lx = size(x, 1) ns = size(x, 2) m = length(lags) @@ -426,17 +456,19 @@ function crosscor!(r::AbstractMatrix{<:Real}, x::AbstractMatrix{<:Real}, y::Abst S = typeof(zero(eltype(y)) / 1) zy::Vector{S} = demean ? y .- mean(y) : y yy = dot(zy, zy) - for j = 1 : ns + for j in 1:ns demean_col!(zx, x, j, demean) sc = sqrt(dot(zx, zx) * yy) - for k = 1 : m - r[k,j] = _crossdot(zx, zy, lx, lags[k]) / sc + for k in 1:m + r[k, j] = _crossdot(zx, zy, lx, lags[k]) / sc end end return r end -function crosscor!(r::AbstractMatrix{<:Real}, x::AbstractVector{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscor!(r::AbstractMatrix{<:Real}, x::AbstractVector{<:Real}, + y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; + demean::Bool=true) lx = length(x) ns = size(y, 2) m = length(lags) @@ -448,17 +480,19 @@ function crosscor!(r::AbstractMatrix{<:Real}, x::AbstractVector{<:Real}, y::Abst S = typeof(zero(eltype(y)) / 1) zy = Vector{S}(undef, lx) xx = dot(zx, zx) - for j = 1 : ns + for j in 1:ns demean_col!(zy, y, j, demean) sc = sqrt(xx * dot(zy, zy)) - for k = 1 : m - r[k,j] = _crossdot(zx, zy, lx, lags[k]) / sc + for k in 1:m + r[k, j] = _crossdot(zx, zy, lx, lags[k]) / sc end end return r end -function crosscor!(r::AbstractArray{<:Real,3}, x::AbstractMatrix{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscor!(r::AbstractArray{<:Real,3}, x::AbstractMatrix{<:Real}, + y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; + demean::Bool=true) lx = size(x, 1) nx = size(x, 2) ny = size(y, 2) @@ -472,11 +506,11 @@ function crosscor!(r::AbstractArray{<:Real,3}, x::AbstractMatrix{<:Real}, y::Abs sizehint!(zxs, nx) xxs = Vector{T}(undef, nx) - for j = 1 : nx - xj = x[:,j] + for j in 1:nx + xj = x[:, j] if demean mv = mean(xj) - for i = 1 : lx + for i in 1:lx xj[i] -= mv end end @@ -486,21 +520,20 @@ function crosscor!(r::AbstractArray{<:Real,3}, x::AbstractMatrix{<:Real}, y::Abs S = typeof(zero(eltype(y)) / 1) zy = Vector{S}(undef, lx) - for j = 1 : ny + for j in 1:ny demean_col!(zy, y, j, demean) yy = dot(zy, zy) - for i = 1 : nx + for i in 1:nx zx = zxs[i] sc = sqrt(xxs[i] * yy) - for k = 1 : m - r[k,i,j] = _crossdot(zx, zy, lx, lags[k]) / sc + for k in 1:m + r[k, i, j] = _crossdot(zx, zy, lx, lags[k]) / sc end end end return r end - """ crosscor(x, y, [lags]; demean=true) @@ -517,29 +550,35 @@ When left unspecified, the lags used are the integers from The output is normalized by `sqrt(var(x)*var(y))`. See [`crosscov`](@ref) for the unnormalized form. """ -function crosscor(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) +function crosscor(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, + lags::AbstractVector{<:Integer}; demean::Bool=true) out = Vector{float(Base.promote_eltype(x, y))}(undef, length(lags)) - crosscor!(out, x, y, lags; demean=demean) + return crosscor!(out, x, y, lags; demean=demean) end -function crosscor(x::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(x,2)) - crosscor!(out, x, y, lags; demean=demean) +function crosscor(x::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, + lags::AbstractVector{<:Integer}; demean::Bool=true) + out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(x, 2)) + return crosscor!(out, x, y, lags; demean=demean) end -function crosscor(x::AbstractVector{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(y,2)) - crosscor!(out, x, y, lags; demean=demean) +function crosscor(x::AbstractVector{<:Real}, y::AbstractMatrix{<:Real}, + lags::AbstractVector{<:Integer}; demean::Bool=true) + out = Matrix{float(Base.promote_eltype(x, y))}(undef, length(lags), size(y, 2)) + return crosscor!(out, x, y, lags; demean=demean) end -function crosscor(x::AbstractMatrix{<:Real}, y::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; demean::Bool=true) - out = Array{float(Base.promote_eltype(x, y)),3}(undef, length(lags), size(x,2), size(y,2)) - crosscor!(out, x, y, lags; demean=demean) +function crosscor(x::AbstractMatrix{<:Real}, y::AbstractMatrix{<:Real}, + lags::AbstractVector{<:Integer}; demean::Bool=true) + out = Array{float(Base.promote_eltype(x, y)),3}(undef, length(lags), size(x, 2), + size(y, 2)) + return crosscor!(out, x, y, lags; demean=demean) end -crosscor(x::AbstractVecOrMat{<:Real}, y::AbstractVecOrMat{<:Real}; demean::Bool=true) = - crosscor(x, y, default_crosslags(size(x,1)); demean=demean) - +function crosscor(x::AbstractVecOrMat{<:Real}, y::AbstractVecOrMat{<:Real}; + demean::Bool=true) + return crosscor(x, y, default_crosslags(size(x, 1)); demean=demean) +end ####################################### # @@ -549,36 +588,39 @@ crosscor(x::AbstractVecOrMat{<:Real}, y::AbstractVecOrMat{<:Real}; demean::Bool= # ####################################### -function pacf_regress!(r::AbstractMatrix{<:Real}, X::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}, mk::Integer) +function pacf_regress!(r::AbstractMatrix{<:Real}, X::AbstractMatrix{<:Real}, + lags::AbstractVector{<:Integer}, mk::Integer) lx = size(X, 1) tmpX = ones(eltype(X), lx, mk + 1) - for j = 1 : size(X,2) - for l = 1 : mk - for i = 1+l:lx - tmpX[i,l+1] = X[i-l,j] + for j in 1:size(X, 2) + for l in 1:mk + for i in (1 + l):lx + tmpX[i, l+1] = X[i-l, j] end end - for i = 1 : length(lags) + for i in 1:length(lags) l = lags[i] - sX = view(tmpX, 1+l:lx, 1:l+1) - r[i,j] = l == 0 ? 1 : (cholesky!(sX'sX, Val(false)) \ (sX'view(X, 1+l:lx, j)))[end] + sX = view(tmpX, (1 + l):lx, 1:(l + 1)) + r[i, j] = l == 0 ? 1 : + (cholesky!(sX'sX, Val(false)) \ (sX'view(X, (1 + l):lx, j)))[end] end end - r + return r end -function pacf_yulewalker!(r::AbstractMatrix{<:Real}, X::AbstractMatrix{T}, lags::AbstractVector{<:Integer}, mk::Integer) where T<:Union{Float32, Float64} +function pacf_yulewalker!(r::AbstractMatrix{<:Real}, X::AbstractMatrix{T}, + lags::AbstractVector{<:Integer}, + mk::Integer) where {T<:Union{Float32,Float64}} tmp = Vector{T}(undef, mk) - for j = 1 : size(X,2) - acfs = autocor(X[:,j], 1:mk) - for i = 1 : length(lags) + for j in 1:size(X, 2) + acfs = autocor(X[:, j], 1:mk) + for i in 1:length(lags) l = lags[i] - r[i,j] = l == 0 ? 1 : l == 1 ? acfs[i] : -durbin!(view(acfs, 1:l), tmp)[l] + r[i, j] = l == 0 ? 1 : l == 1 ? acfs[i] : -durbin!(view(acfs, 1:l), tmp)[l] end end end - """ pacf!(r, X, lags; method=:regression) @@ -590,12 +632,14 @@ using the Yule-Walker equations. `r` must be a matrix of size `(length(lags), size(x, 2))`. """ -function pacf!(r::AbstractMatrix{<:Real}, X::AbstractMatrix{T}, lags::AbstractVector{<:Integer}; method::Symbol=:regression) where T<:Union{Float32, Float64} +function pacf!(r::AbstractMatrix{<:Real}, X::AbstractMatrix{T}, + lags::AbstractVector{<:Integer}; + method::Symbol=:regression) where {T<:Union{Float32,Float64}} lx = size(X, 1) m = length(lags) minlag, maxlag = extrema(lags) (0 <= minlag && 2maxlag < lx) || error("Invalid lag value.") - size(r) == (m, size(X,2)) || throw(DimensionMismatch()) + size(r) == (m, size(X, 2)) || throw(DimensionMismatch()) if method == :regression pacf_regress!(r, X, lags, maxlag) @@ -607,7 +651,6 @@ function pacf!(r::AbstractMatrix{<:Real}, X::AbstractMatrix{T}, lags::AbstractVe return r end - """ pacf(X, lags; method=:regression) @@ -621,11 +664,13 @@ If `x` is a vector, return a vector of the same length as `lags`. If `x` is a matrix, return a matrix of size `(length(lags), size(x, 2))`, where each column in the result corresponds to a column in `x`. """ -function pacf(X::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; method::Symbol=:regression) - out = Matrix{float(eltype(X))}(undef, length(lags), size(X,2)) - pacf!(out, float(X), lags; method=method) +function pacf(X::AbstractMatrix{<:Real}, lags::AbstractVector{<:Integer}; + method::Symbol=:regression) + out = Matrix{float(eltype(X))}(undef, length(lags), size(X, 2)) + return pacf!(out, float(X), lags; method=method) end -function pacf(x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; method::Symbol=:regression) - vec(pacf(reshape(x, length(x), 1), lags, method=method)) +function pacf(x::AbstractVector{<:Real}, lags::AbstractVector{<:Integer}; + method::Symbol=:regression) + return vec(pacf(reshape(x, length(x), 1), lags; method=method)) end diff --git a/src/statmodels.jl b/src/statmodels.jl index b487a30e3..9cf9509a4 100644 --- a/src/statmodels.jl +++ b/src/statmodels.jl @@ -6,7 +6,7 @@ struct PValue <: Real v::Real function PValue(v::Real) 0 <= v <= 1 || isnan(v) || error("p-values must be in [0; 1]") - new(v) + return new(v) end end PValue(p::PValue) = p @@ -14,11 +14,11 @@ PValue(p::PValue) = p function show(io::IO, pv::PValue) v = pv.v if isnan(v) - @printf(io,"%d", v) + @printf(io, "%d", v) elseif v >= 1e-4 - @printf(io,"%.4f", v) + @printf(io, "%.4f", v) else - @printf(io,"<1e%2.2d", ceil(Integer, max(nextfloat(log10(v)), -99))) + @printf(io, "<1e%2.2d", ceil(Integer, max(nextfloat(log10(v)), -99))) end end @@ -30,28 +30,29 @@ end show(io::IO, x::TestStat) = @printf(io, "%.2f", x.v) TestStat(x::TestStat) = x -float(x::Union{TestStat, PValue}) = float(x.v) +float(x::Union{TestStat,PValue}) = float(x.v) for op in [:(==), :<, :≤, :(isless), :(isequal)] # isless and < to place nice with NaN @eval begin - Base.$op(x::Union{TestStat, PValue}, y::Real) = $op(x.v, y) - Base.$op(y::Real, x::Union{TestStat, PValue}) = $op(y, x.v) - Base.$op(x1::Union{TestStat, PValue}, x2::Union{TestStat, PValue}) = $op(x1.v, x2.v) + Base.$op(x::Union{TestStat,PValue}, y::Real) = $op(x.v, y) + Base.$op(y::Real, x::Union{TestStat,PValue}) = $op(y, x.v) + Base.$op(x1::Union{TestStat,PValue}, x2::Union{TestStat,PValue}) = $op(x1.v, x2.v) end end -Base.hash(x::Union{TestStat, PValue}, h::UInt) = hash(x.v, h) +Base.hash(x::Union{TestStat,PValue}, h::UInt) = hash(x.v, h) # necessary to avoid a method ambiguity with isless(::TestStat, NaN) -Base.isless(x::Union{TestStat, PValue}, y::AbstractFloat) = isless(x.v, y) -Base.isless(y::AbstractFloat, x::Union{TestStat, PValue},) = isless(y, x.v) -Base.isequal(y::AbstractFloat, x::Union{TestStat, PValue}) = isequal(y, x.v) -Base.isequal(x::Union{TestStat, PValue}, y::AbstractFloat) = isequal(x.v, y) - -Base.isapprox(x::Union{TestStat, PValue}, y::Real; kwargs...) = isapprox(x.v, y; kwargs...) -Base.isapprox(y::Real, x::Union{TestStat, PValue}; kwargs...) = isapprox(y, x.v; kwargs...) -Base.isapprox(x1::Union{TestStat, PValue}, x2::Union{TestStat, PValue}; kwargs...) = isapprox(x1.v, x2.v; kwargs...) - +Base.isless(x::Union{TestStat,PValue}, y::AbstractFloat) = isless(x.v, y) +Base.isless(y::AbstractFloat, x::Union{TestStat,PValue}) = isless(y, x.v) +Base.isequal(y::AbstractFloat, x::Union{TestStat,PValue}) = isequal(y, x.v) +Base.isequal(x::Union{TestStat,PValue}, y::AbstractFloat) = isequal(x.v, y) + +Base.isapprox(x::Union{TestStat,PValue}, y::Real; kwargs...) = isapprox(x.v, y; kwargs...) +Base.isapprox(y::Real, x::Union{TestStat,PValue}; kwargs...) = isapprox(y, x.v; kwargs...) +function Base.isapprox(x1::Union{TestStat,PValue}, x2::Union{TestStat,PValue}; kwargs...) + return isapprox(x1.v, x2.v; kwargs...) +end """Wrap a string so that show omits quotes""" struct NoQuote @@ -60,7 +61,6 @@ end show(io::IO, n::NoQuote) = print(io, n.s) - ## coefficient tables with specialized show method mutable struct CoefTable @@ -69,44 +69,48 @@ mutable struct CoefTable rownms::Vector pvalcol::Int teststatcol::Int - function CoefTable(cols::Vector,colnms::Vector,rownms::Vector, - pvalcol::Int=0,teststatcol::Int=0) + function CoefTable(cols::Vector, colnms::Vector, rownms::Vector, + pvalcol::Int=0, teststatcol::Int=0) nc = length(cols) - nrs = map(length,cols) + nrs = map(length, cols) nr = nrs[1] - length(colnms) in [0,nc] || throw(ArgumentError("colnms should have length 0 or $nc")) - length(rownms) in [0,nr] || throw(ArgumentError("rownms should have length 0 or $nr")) - all(nrs .== nr) || throw(ArgumentError("Elements of cols should have equal lengths, but got $nrs")) + length(colnms) in [0, nc] || + throw(ArgumentError("colnms should have length 0 or $nc")) + length(rownms) in [0, nr] || + throw(ArgumentError("rownms should have length 0 or $nr")) + all(nrs .== nr) || + throw(ArgumentError("Elements of cols should have equal lengths, but got $nrs")) pvalcol in 0:nc || throw(ArgumentError("pvalcol should be between 0 and $nc")) - teststatcol in 0:nc || throw(ArgumentError("teststatcol should be between 0 and $nc")) - new(cols,colnms,rownms,pvalcol,teststatcol) + teststatcol in 0:nc || + throw(ArgumentError("teststatcol should be between 0 and $nc")) + return new(cols, colnms, rownms, pvalcol, teststatcol) end - function CoefTable(mat::Matrix,colnms::Vector,rownms::Vector, - pvalcol::Int=0,teststatcol::Int=0) - nc = size(mat,2) + function CoefTable(mat::Matrix, colnms::Vector, rownms::Vector, + pvalcol::Int=0, teststatcol::Int=0) + nc = size(mat, 2) cols = Any[mat[:, i] for i in 1:nc] - CoefTable(cols,colnms,rownms,pvalcol,teststatcol) + return CoefTable(cols, colnms, rownms, pvalcol, teststatcol) end end Base.length(ct::CoefTable) = length(ct.cols[1]) function Base.eltype(ct::CoefTable) names = isempty(ct.rownms) ? - tuple(Symbol.(ct.colnms)...) : - tuple(Symbol("Name"), Symbol.(ct.colnms)...) + tuple(Symbol.(ct.colnms)...) : + tuple(Symbol("Name"), Symbol.(ct.colnms)...) types = isempty(ct.rownms) ? - Tuple{eltype.(ct.cols)...} : - Tuple{eltype(ct.rownms), eltype.(ct.cols)...} - NamedTuple{names, types} + Tuple{eltype.(ct.cols)...} : + Tuple{eltype(ct.rownms),eltype.(ct.cols)...} + return NamedTuple{names,types} end function Base.iterate(ct::CoefTable, i::Integer=1) if i in 1:length(ct) cols = getindex.(ct.cols, Ref(i)) nt = isempty(ct.rownms) ? - eltype(ct)(tuple(cols...)) : - eltype(ct)(tuple(ct.rownms[i], cols...)) + eltype(ct)(tuple(cols...)) : + eltype(ct)(tuple(ct.rownms[i], cols...)) (nt, i+1) else nothing @@ -114,17 +118,19 @@ function Base.iterate(ct::CoefTable, i::Integer=1) end function show(io::IO, ::MIME"text/plain", ct::CoefTable) - cols = ct.cols; rownms = ct.rownms; colnms = ct.colnms; + cols = ct.cols; + rownms = ct.rownms; + colnms = ct.colnms; nc = length(cols) nr = length(cols[1]) if length(rownms) == 0 - rownms = [lpad("[$i]",floor(Integer, log10(nr))+3) for i in 1:nr] + rownms = [lpad("[$i]", floor(Integer, log10(nr))+3) for i in 1:nr] end mat = [j == 1 ? NoQuote(rownms[i]) : j-1 == ct.pvalcol ? NoQuote(sprint(show, PValue(cols[j-1][i]))) : j-1 in ct.teststatcol ? TestStat(cols[j-1][i]) : cols[j-1][i] isa AbstractString ? NoQuote(cols[j-1][i]) : cols[j-1][i] - for i in 1:nr, j in 1:nc+1] + for i in 1:nr, j in 1:(nc + 1)] # Code inspired by print_matrix in Base io = IOContext(io, :compact=>true, :limit=>false) A = Base.alignment(io, mat, 1:size(mat, 1), 1:size(mat, 2), @@ -144,21 +150,23 @@ function show(io::IO, ::MIME"text/plain", ct::CoefTable) i != size(mat, 1) && println(io) end print(io, '\n', repeat('─', totwidth)) - nothing + return nothing end function show(io::IO, ::MIME"text/markdown", ct::CoefTable) - cols = ct.cols; rownms = ct.rownms; colnms = ct.colnms; + cols = ct.cols; + rownms = ct.rownms; + colnms = ct.colnms; nc = length(cols) nr = length(cols[1]) if length(rownms) == 0 - rownms = [lpad("[$i]",floor(Integer, log10(nr))+3) for i in 1:nr] + rownms = [lpad("[$i]", floor(Integer, log10(nr))+3) for i in 1:nr] end mat = [j == 1 ? NoQuote(rownms[i]) : j-1 == ct.pvalcol ? NoQuote(sprint(show, PValue(cols[j-1][i]))) : j-1 in ct.teststatcol ? TestStat(cols[j-1][i]) : cols[j-1][i] isa AbstractString ? NoQuote(cols[j-1][i]) : cols[j-1][i] - for i in 1:nr, j in 1:nc+1] + for i in 1:nr, j in 1:(nc + 1)] # Code inspired by print_matrix in Base io = IOContext(io, :compact=>true, :limit=>false) A = Base.alignment(io, mat, 1:size(mat, 1), 1:size(mat, 2), @@ -191,7 +199,7 @@ function show(io::IO, ::MIME"text/markdown", ct::CoefTable) i != size(mat, 1) && println(io) end - nothing + return nothing end """ @@ -206,7 +214,8 @@ struct ConvergenceException{T<:Real} <: Exception lastchange::T tol::T msg::String - function ConvergenceException{T}(iters, lastchange::T, tol::T, msg::String) where T<:Real + function ConvergenceException{T}(iters, lastchange::T, tol::T, + msg::String) where {T<:Real} if tol > lastchange throw(ArgumentError("Change must be greater than tol.")) else @@ -215,9 +224,10 @@ struct ConvergenceException{T<:Real} <: Exception end end -ConvergenceException(iters, lastchange::T=NaN, tol::T=NaN, - msg::AbstractString="") where {T<:Real} = - ConvergenceException{T}(iters, lastchange, tol, String(msg)) +function ConvergenceException(iters, lastchange::T=NaN, tol::T=NaN, + msg::AbstractString="") where {T<:Real} + return ConvergenceException{T}(iters, lastchange, tol, String(msg)) +end function Base.showerror(io::IO, ce::ConvergenceException) print(io, "failure to converge after $(ce.iters) iterations.") diff --git a/src/toeplitzsolvers.jl b/src/toeplitzsolvers.jl index 19146bf7a..e2e3fcb27 100644 --- a/src/toeplitzsolvers.jl +++ b/src/toeplitzsolvers.jl @@ -1,66 +1,75 @@ # Symmetric Toeplitz solver -function durbin!(r::AbstractVector{T}, y::AbstractVector{T}) where T<:BlasReal +function durbin!(r::AbstractVector{T}, y::AbstractVector{T}) where {T<:BlasReal} n = length(r) - n <= length(y) || throw(DimensionMismatch("Auxiliary vector cannot be shorter than data vector")) + n <= length(y) || + throw(DimensionMismatch("Auxiliary vector cannot be shorter than data vector")) y[1] = -r[1] β = one(T) α = -r[1] - for k = 1:n-1 + for k in 1:(n - 1) β *= one(T) - α*α α = -r[k+1] - for j = 1:k + for j in 1:k α -= r[k-j+1]*y[j] end α /= β - for j = 1:div(k,2) + for j in 1:div(k, 2) tmp = y[j] y[j] += α*y[k-j+1] y[k-j+1] += α*tmp end - if isodd(k) y[div(k,2)+1] *= one(T) + α end + if isodd(k) + y[div(k, 2)+1] *= one(T) + α + end y[k+1] = α end return y end durbin(r::AbstractVector{T}) where {T<:BlasReal} = durbin!(r, zeros(T, length(r))) -function levinson!(r::AbstractVector{T}, b::AbstractVector{T}, x::AbstractVector{T}) where T<:BlasReal +function levinson!(r::AbstractVector{T}, b::AbstractVector{T}, + x::AbstractVector{T}) where {T<:BlasReal} n = length(b) n == length(r) || throw(DimensionMismatch("Vectors must have same length")) - n <= length(x) || throw(DimensionMismatch("Auxiliary vector cannot be shorter than data vector")) + n <= length(x) || + throw(DimensionMismatch("Auxiliary vector cannot be shorter than data vector")) x[1] = b[1] b[1] = -r[2]/r[1] β = one(T) α = -r[2]/r[1] - for k = 1:n-1 + for k in 1:(n - 1) β *= one(T) - α*α μ = b[k+1] - for j = 2:k+1 + for j in 2:(k + 1) μ -= r[j]/r[1]*x[k-j+2] end μ /= β - for j = 1:k + for j in 1:k x[j] += μ*b[k-j+1] end x[k+1] = μ if k < n - 1 α = -r[k+2] - for j = 2:k+1 + for j in 2:(k + 1) α -= r[j]*b[k-j+2] end α /= β*r[1] - for j = 1:div(k,2) + for j in 1:div(k, 2) tmp = b[j] b[j] += α*b[k-j+1] b[k-j+1] += α*tmp end - if isodd(k) b[div(k,2)+1] *= one(T) + α end + if isodd(k) + b[div(k, 2)+1] *= one(T) + α + end b[k+1] = α end end - for i = 1:n + for i in 1:n x[i] /= r[1] end return x end -levinson(r::AbstractVector{T}, b::AbstractVector{T}) where {T<:BlasReal} = levinson!(r, copy(b), zeros(T, length(b))) +function levinson(r::AbstractVector{T}, b::AbstractVector{T}) where {T<:BlasReal} + return levinson!(r, copy(b), zeros(T, length(b))) +end diff --git a/src/transformations.jl b/src/transformations.jl index 387aa2bfa..8d819d28d 100644 --- a/src/transformations.jl +++ b/src/transformations.jl @@ -8,20 +8,22 @@ abstract type AbstractDataTransform end Apply transformation `t` to vector or matrix `x` in place. """ -transform!(t::AbstractDataTransform, x::AbstractMatrix{<:Real}) = - transform!(x, t, x) -transform!(t::AbstractDataTransform, x::AbstractVector{<:Real}) = - (transform!(t, reshape(x, :, 1)); x) +transform!(t::AbstractDataTransform, x::AbstractMatrix{<:Real}) = transform!(x, t, x) +function transform!(t::AbstractDataTransform, x::AbstractVector{<:Real}) + return (transform!(t, reshape(x, :, 1)); x) +end """ transform(t::AbstractDataTransform, x) Return a standardized copy of vector or matrix `x` using transformation `t`. """ -transform(t::AbstractDataTransform, x::AbstractMatrix{<:Real}) = - transform!(similar(x), t, x) -transform(t::AbstractDataTransform, x::AbstractVector{<:Real}) = - vec(transform(t, reshape(x, :, 1))) +function transform(t::AbstractDataTransform, x::AbstractMatrix{<:Real}) + return transform!(similar(x), t, x) +end +function transform(t::AbstractDataTransform, x::AbstractVector{<:Real}) + return vec(transform(t, reshape(x, :, 1))) +end # reconstruct the original data from transformed values """ @@ -30,10 +32,10 @@ transform(t::AbstractDataTransform, x::AbstractVector{<:Real}) = Perform an in-place reconstruction into an original data scale from a transformed vector or matrix `y` using transformation `t`. """ -reconstruct!(t::AbstractDataTransform, y::AbstractMatrix{<:Real}) = - reconstruct!(y, t, y) -reconstruct!(t::AbstractDataTransform, y::AbstractVector{<:Real}) = - (reconstruct!(t, reshape(y, :, 1)); y) +reconstruct!(t::AbstractDataTransform, y::AbstractMatrix{<:Real}) = reconstruct!(y, t, y) +function reconstruct!(t::AbstractDataTransform, y::AbstractVector{<:Real}) + return (reconstruct!(t, reshape(y, :, 1)); y) +end """ reconstruct(t::AbstractDataTransform, y) @@ -41,28 +43,31 @@ reconstruct!(t::AbstractDataTransform, y::AbstractVector{<:Real}) = Return a reconstruction of an originally scaled data from a transformed vector or matrix `y` using transformation `t`. """ -reconstruct(t::AbstractDataTransform, y::AbstractMatrix{<:Real}) = - reconstruct!(similar(y), t, y) -reconstruct(t::AbstractDataTransform, y::AbstractVector{<:Real}) = - vec(reconstruct(t, reshape(y, :, 1))) +function reconstruct(t::AbstractDataTransform, y::AbstractMatrix{<:Real}) + return reconstruct!(similar(y), t, y) +end +function reconstruct(t::AbstractDataTransform, y::AbstractVector{<:Real}) + return vec(reconstruct(t, reshape(y, :, 1))) +end """ ZScoreTransform <: AbstractDataTransform Standardization (Z-score transformation) """ -struct ZScoreTransform{T<:Real, U<:AbstractVector{T}} <: AbstractDataTransform +struct ZScoreTransform{T<:Real,U<:AbstractVector{T}} <: AbstractDataTransform len::Int dims::Int mean::U scale::U - function ZScoreTransform(l::Int, dims::Int, m::U, s::U) where {T<:Real, U<:AbstractVector{T}} + function ZScoreTransform(l::Int, dims::Int, m::U, + s::U) where {T<:Real,U<:AbstractVector{T}} lenm = length(m) lens = length(s) lenm == l || lenm == 0 || throw(DimensionMismatch("Inconsistent dimensions.")) lens == l || lens == 0 || throw(DimensionMismatch("Inconsistent dimensions.")) - new{T, U}(l, dims, m, s) + return new{T,U}(l, dims, m, s) end end @@ -126,24 +131,27 @@ function fit(::Type{ZScoreTransform}, X::AbstractMatrix{<:Real}; throw(DomainError(dims, "fit only accept dims to be 1 or 2.")) end return ZScoreTransform(l, dims, (center ? vec(m) : similar(m, 0)), - (scale ? vec(s) : similar(s, 0))) + (scale ? vec(s) : similar(s, 0))) end function fit(::Type{ZScoreTransform}, X::AbstractVector{<:Real}; dims::Integer=1, center::Bool=true, scale::Bool=true) if dims != 1 - throw(DomainError(dims, "fit only accepts dims=1 over a vector. Try fit(t, x, dims=1).")) + throw(DomainError(dims, + "fit only accepts dims=1 over a vector. Try fit(t, x, dims=1).")) end return fit(ZScoreTransform, reshape(X, :, 1); dims=dims, center=center, scale=scale) end -function transform!(y::AbstractMatrix{<:Real}, t::ZScoreTransform, x::AbstractMatrix{<:Real}) +function transform!(y::AbstractMatrix{<:Real}, t::ZScoreTransform, + x::AbstractMatrix{<:Real}) if t.dims == 1 l = t.len - size(x,2) == size(y,2) == l || throw(DimensionMismatch("Inconsistent dimensions.")) - n = size(y,1) - size(x,1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) + size(x, 2) == size(y, 2) == l || + throw(DimensionMismatch("Inconsistent dimensions.")) + n = size(y, 1) + size(x, 1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) m = t.mean s = t.scale @@ -160,7 +168,7 @@ function transform!(y::AbstractMatrix{<:Real}, t::ZScoreTransform, x::AbstractMa if isempty(s) broadcast!(-, y, x, m') else - broadcast!((x,m,s)->(x-m)/s, y, x, m', s') + broadcast!((x, m, s)->(x-m)/s, y, x, m', s') end end elseif t.dims == 2 @@ -170,12 +178,14 @@ function transform!(y::AbstractMatrix{<:Real}, t::ZScoreTransform, x::AbstractMa return y end -function reconstruct!(x::AbstractMatrix{<:Real}, t::ZScoreTransform, y::AbstractMatrix{<:Real}) +function reconstruct!(x::AbstractMatrix{<:Real}, t::ZScoreTransform, + y::AbstractMatrix{<:Real}) if t.dims == 1 l = t.len - size(x,2) == size(y,2) == l || throw(DimensionMismatch("Inconsistent dimensions.")) - n = size(y,1) - size(x,1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) + size(x, 2) == size(y, 2) == l || + throw(DimensionMismatch("Inconsistent dimensions.")) + n = size(y, 1) + size(x, 1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) m = t.mean s = t.scale @@ -192,7 +202,7 @@ function reconstruct!(x::AbstractMatrix{<:Real}, t::ZScoreTransform, y::Abstract if isempty(s) broadcast!(+, x, y, m') else - broadcast!((y,m,s)->y*s+m, x, y, m', s') + broadcast!((y, m, s)->y*s+m, x, y, m', s') end end elseif t.dims == 2 @@ -207,19 +217,20 @@ end Unit range normalization """ -struct UnitRangeTransform{T<:Real, U<:AbstractVector} <: AbstractDataTransform +struct UnitRangeTransform{T<:Real,U<:AbstractVector} <: AbstractDataTransform len::Int dims::Int unit::Bool min::U scale::U - function UnitRangeTransform(l::Int, dims::Int, unit::Bool, min::U, max::U) where {T, U<:AbstractVector{T}} + function UnitRangeTransform(l::Int, dims::Int, unit::Bool, min::U, + max::U) where {T,U<:AbstractVector{T}} lenmin = length(min) lenmax = length(max) lenmin == l || lenmin == 0 || throw(DimensionMismatch("Inconsistent dimensions.")) lenmax == l || lenmax == 0 || throw(DimensionMismatch("Inconsistent dimensions.")) - new{T, U}(l, dims, unit, min, max) + return new{T,U}(l, dims, unit, min, max) end end @@ -291,25 +302,28 @@ end function fit(::Type{UnitRangeTransform}, X::AbstractVector{<:Real}; dims::Integer=1, unit::Bool=true) if dims != 1 - throw(DomainError(dims, "fit only accept dims=1 over a vector. Try fit(t, x, dims=1).")) + throw(DomainError(dims, + "fit only accept dims=1 over a vector. Try fit(t, x, dims=1).")) end tmin, tmax = extrema(X) tmax = 1 / (tmax - tmin) return UnitRangeTransform(1, dims, unit, [tmin], [tmax]) end -function transform!(y::AbstractMatrix{<:Real}, t::UnitRangeTransform, x::AbstractMatrix{<:Real}) +function transform!(y::AbstractMatrix{<:Real}, t::UnitRangeTransform, + x::AbstractMatrix{<:Real}) if t.dims == 1 l = t.len - size(x,2) == size(y,2) == l || throw(DimensionMismatch("Inconsistent dimensions.")) - n = size(x,1) - size(y,1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) + size(x, 2) == size(y, 2) == l || + throw(DimensionMismatch("Inconsistent dimensions.")) + n = size(x, 1) + size(y, 1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) tmin = t.min tscale = t.scale if t.unit - broadcast!((x,s,m)->(x-m)*s, y, x, tscale', tmin') + broadcast!((x, s, m)->(x-m)*s, y, x, tscale', tmin') else broadcast!(*, y, x, tscale') end @@ -320,18 +334,20 @@ function transform!(y::AbstractMatrix{<:Real}, t::UnitRangeTransform, x::Abstrac return y end -function reconstruct!(x::AbstractMatrix{<:Real}, t::UnitRangeTransform, y::AbstractMatrix{<:Real}) +function reconstruct!(x::AbstractMatrix{<:Real}, t::UnitRangeTransform, + y::AbstractMatrix{<:Real}) if t.dims == 1 l = t.len - size(x,2) == size(y,2) == l || throw(DimensionMismatch("Inconsistent dimensions.")) - n = size(y,1) - size(x,1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) + size(x, 2) == size(y, 2) == l || + throw(DimensionMismatch("Inconsistent dimensions.")) + n = size(y, 1) + size(x, 1) == n || throw(DimensionMismatch("Inconsistent dimensions.")) tmin = t.min tscale = t.scale if t.unit - broadcast!((y,s,m)->y/s+m, x, y, tscale', tmin') + broadcast!((y, s, m)->y/s+m, x, y, tscale', tmin') else broadcast!(/, x, y, tscale') end @@ -367,6 +383,7 @@ julia> standardize(UnitRangeTransform, [0.0 -0.5 0.5; 0.0 1.0 2.0], dims=2) 0.0 0.5 1.0 ``` """ -function standardize(::Type{DT}, X::AbstractVecOrMat{<:Real}; kwargs...) where {DT <: AbstractDataTransform} +function standardize(::Type{DT}, X::AbstractVecOrMat{<:Real}; + kwargs...) where {DT<:AbstractDataTransform} return transform(fit(DT, X; kwargs...), X) end diff --git a/src/weights.jl b/src/weights.jl index c7164b33d..9d7be485e 100644 --- a/src/weights.jl +++ b/src/weights.jl @@ -1,5 +1,5 @@ ##### Weight vector ##### -abstract type AbstractWeights{S<:Real, T<:Real, V<:AbstractVector{T}} <: AbstractVector{T} end +abstract type AbstractWeights{S<:Real,T<:Real,V<:AbstractVector{T}} <: AbstractVector{T} end """ @weights name @@ -9,15 +9,19 @@ and stores the `values` (`V<:AbstractVector{<:Real}`) and `sum` (`S<:Real`). """ macro weights(name) return quote - mutable struct $name{S<:Real, T<:Real, V<:AbstractVector{T}} <: AbstractWeights{S, T, V} + mutable struct $name{S<:Real,T<:Real,V<:AbstractVector{T}} <: AbstractWeights{S,T,V} values::V sum::S - function $(esc(name)){S, T, V}(values, sum) where {S<:Real, T<:Real, V<:AbstractVector{T}} - isfinite(sum) || throw(ArgumentError("weights cannot contain Inf or NaN values")) - return new{S, T, V}(values, sum) + function $(esc(name)){S,T,V}(values, + sum) where {S<:Real,T<:Real,V<:AbstractVector{T}} + isfinite(sum) || + throw(ArgumentError("weights cannot contain Inf or NaN values")) + return new{S,T,V}(values, sum) end end - $(esc(name))(values::AbstractVector{T}, sum::S) where {S<:Real, T<:Real} = $(esc(name)){S, T, typeof(values)}(values, sum) + function $(esc(name))(values::AbstractVector{T}, sum::S) where {S<:Real,T<:Real} + return $(esc(name)){S,T,typeof(values)}(values, sum) + end $(esc(name))(values::AbstractVector{<:Real}) = $(esc(name))(values, sum(values)) end end @@ -36,16 +40,17 @@ Base.convert(::Type{Vector}, wv::AbstractWeights) = convert(Vector, wv.values) @propagate_inbounds function Base.getindex(wv::AbstractWeights, i::Integer) @boundscheck checkbounds(wv, i) - wv.values[i] + return wv.values[i] end -@propagate_inbounds function Base.getindex(wv::W, i::AbstractArray) where W <: AbstractWeights +@propagate_inbounds function Base.getindex(wv::W, + i::AbstractArray) where {W<:AbstractWeights} @boundscheck checkbounds(wv, i) v = wv.values[i] - W(v, sum(v)) + return W(v, sum(v)) end -Base.getindex(wv::W, ::Colon) where {W <: AbstractWeights} = W(copy(wv.values), sum(wv)) +Base.getindex(wv::W, ::Colon) where {W<:AbstractWeights} = W(copy(wv.values), sum(wv)) @propagate_inbounds function Base.setindex!(wv::AbstractWeights, v::Real, i::Int) s = v - wv[i] @@ -53,7 +58,7 @@ Base.getindex(wv::W, ::Colon) where {W <: AbstractWeights} = W(copy(wv.values), isfinite(sum) || throw(ArgumentError("weights cannot contain Inf or NaN values")) wv.values[i] = v wv.sum = sum - v + return v end """ @@ -97,7 +102,7 @@ if `corrected=true`. @inline function varcorrection(w::Weights, corrected::Bool=false) corrected && throw(ArgumentError("Weights type does not support bias correction: " * "use FrequencyWeights, AnalyticWeights or ProbabilityWeights if applicable.")) - 1 / w.sum + return 1 / w.sum end @weights AnalyticWeights @@ -279,20 +284,22 @@ function eweights(t::AbstractArray{<:Integer}, λ::Real; kwargs...) end eweights(n::Integer, λ::Real; kwargs...) = _eweights(1:n, λ, n; kwargs...) -eweights(t::AbstractVector, r::AbstractRange, λ::Real; kwargs...) = - _eweights(something.(indexin(t, r)), λ, length(r); kwargs...) +function eweights(t::AbstractVector, r::AbstractRange, λ::Real; kwargs...) + return _eweights(something.(indexin(t, r)), λ, length(r); kwargs...) +end -function _eweights(t::AbstractArray{<:Integer}, λ::Real, n::Integer; scale::Union{Bool, Nothing}=nothing) +function _eweights(t::AbstractArray{<:Integer}, λ::Real, n::Integer; + scale::Union{Bool,Nothing}=nothing) 0 < λ <= 1 || throw(ArgumentError("Smoothing factor must be between 0 and 1")) f = depcheck(:eweights, :scale, scale) ? _scaled_eweight : _unscaled_eweight w0 = map(t) do i i > 0 || throw(ArgumentError("Time indices must be non-zero positive integers")) - f(i, λ, n) + return f(i, λ, n) end s = sum(w0) - Weights(w0, s) + return Weights(w0, s) end _unscaled_eweight(i, λ, n) = λ * (1 - λ)^(1 - i) @@ -300,7 +307,7 @@ _scaled_eweight(i, λ, n) = (1 - λ)^(n - i) # NOTE: no variance correction is implemented for exponential weights -struct UnitWeights{T<:Real} <: AbstractWeights{Int, T, V where V<:Vector{T}} +struct UnitWeights{T<:Real} <: AbstractWeights{Int,T,V where V<:Vector{T}} len::Int end @@ -311,7 +318,7 @@ Construct a `UnitWeights` vector with length `s` and weight elements of type `T` All weight elements are identically one. """ UnitWeights -sum(wv::UnitWeights{T}) where T = convert(T, length(wv)) +sum(wv::UnitWeights{T}) where {T} = convert(T, length(wv)) isempty(wv::UnitWeights) = iszero(wv.len) length(wv::UnitWeights) = wv.len size(wv::UnitWeights) = tuple(length(wv)) @@ -320,19 +327,20 @@ Base.axes(wv::UnitWeights) = tuple(Base.OneTo(length(wv))) Base.dataids(::UnitWeights) = () Base.convert(::Type{Vector}, wv::UnitWeights{T}) where {T} = ones(T, length(wv)) -@propagate_inbounds function Base.getindex(wv::UnitWeights{T}, i::Integer) where T +@propagate_inbounds function Base.getindex(wv::UnitWeights{T}, i::Integer) where {T} @boundscheck checkbounds(wv, i) - one(T) + return one(T) end -@propagate_inbounds function Base.getindex(wv::UnitWeights{T}, i::AbstractArray{<:Int}) where T +@propagate_inbounds function Base.getindex(wv::UnitWeights{T}, + i::AbstractArray{<:Int}) where {T} @boundscheck checkbounds(wv, i) - UnitWeights{T}(length(i)) + return UnitWeights{T}(length(i)) end -function Base.getindex(wv::UnitWeights{T}, i::AbstractArray{Bool}) where T - length(wv) == length(i) || throw(DimensionMismatch()) - UnitWeights{T}(count(i)) +function Base.getindex(wv::UnitWeights{T}, i::AbstractArray{Bool}) where {T} + length(wv) == length(i) || throw(DimensionMismatch()) + return UnitWeights{T}(count(i)) end Base.getindex(wv::UnitWeights{T}, ::Colon) where {T} = UnitWeights{T}(wv.len) @@ -359,7 +367,7 @@ julia> uweights(Float64, 3) 1.0 ``` """ -uweights(s::Int) = UnitWeights{Int}(s) +uweights(s::Int) = UnitWeights{Int}(s) uweights(::Type{T}, s::Int) where {T<:Real} = UnitWeights{T}(s) """ @@ -371,7 +379,7 @@ uweights(::Type{T}, s::Int) where {T<:Real} = UnitWeights{T}(s) This definition is equivalent to the correction applied to unweighted data. """ @inline function varcorrection(w::UnitWeights, corrected::Bool=false) - corrected ? (1 / (w.len - 1)) : (1 / w.len) + return corrected ? (1 / (w.len - 1)) : (1 / w.len) end #### Equality tests ##### @@ -379,15 +387,15 @@ end for w in (AnalyticWeights, FrequencyWeights, ProbabilityWeights, Weights) @eval begin Base.isequal(x::$w, y::$w) = isequal(x.sum, y.sum) && isequal(x.values, y.values) - Base.:(==)(x::$w, y::$w) = (x.sum == y.sum) && (x.values == y.values) + Base.:(==)(x::$w, y::$w) = (x.sum == y.sum) && (x.values == y.values) end end Base.isequal(x::UnitWeights, y::UnitWeights) = isequal(x.len, y.len) -Base.:(==)(x::UnitWeights, y::UnitWeights) = (x.len == y.len) +Base.:(==)(x::UnitWeights, y::UnitWeights) = (x.len == y.len) Base.isequal(x::AbstractWeights, y::AbstractWeights) = false -Base.:(==)(x::AbstractWeights, y::AbstractWeights) = false +Base.:(==)(x::AbstractWeights, y::AbstractWeights) = false Base.allequal(wv::AbstractWeights) = allequal(wv.values) Base.allequal(::UnitWeights) = true @@ -410,7 +418,8 @@ wsum(v::AbstractArray, w::AbstractVector, dims::Colon=:) = transpose(w) * vec(v) for W in (AnalyticWeights, FrequencyWeights, ProbabilityWeights, Weights) @eval begin function wsum(v::AbstractArray, w::$W, dims::Colon) - length(w) == length(v) || throw(DimensionMismatch("Inconsistent array lengths.")) + length(w) == length(v) || + throw(DimensionMismatch("Inconsistent array lengths.")) return transpose(w.values) * vec(v) end end @@ -442,16 +451,17 @@ end ## general Cartesian-based weighted sum across dimensions @generated function _wsum_general!(R::AbstractArray{RT}, f::supertype(typeof(abs)), - A::AbstractArray{T,N}, w::AbstractVector{WT}, dim::Int, init::Bool) where {T,RT,WT,N} + A::AbstractArray{T,N}, w::AbstractVector{WT}, dim::Int, + init::Bool) where {T,RT,WT,N} quote init && fill!(R, zero(RT)) wi = zero(WT) if dim == 1 - @nextract $N sizeR d->size(R,d) + @nextract $N sizeR d->size(R, d) sizA1 = size(A, 1) - @nloops $N i d->(d>1 ? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @nloops $N i d->(d>1 ? (1:size(A, d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin r = (@nref $N R j) - for i_1 = 1:sizA1 + for i_1 in 1:sizA1 r += f(@nref $N A i) * w[i_1] end (@nref $N R j) = r @@ -475,12 +485,12 @@ end init && fill!(R, zero(RT)) wi = zero(WT) if dim == 1 - @nextract $N sizeR d->size(R,d) + @nextract $N sizeR d->size(R, d) sizA1 = size(A, 1) - @nloops $N i d->(d>1 ? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @nloops $N i d->(d>1 ? (1:size(A, d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin r = (@nref $N R j) m = (@nref $N means j) - for i_1 = 1:sizA1 + for i_1 in 1:sizA1 r += f((@nref $N A i) - m) * w[i_1] end (@nref $N R j) = r @@ -491,14 +501,16 @@ end j_d = 1 else j_d = i_d - end) (@nref $N R j) += f((@nref $N A i) - (@nref $N means j)) * wi + end) (@nref $N R j) += f((@nref $N A i) - (@nref $N means j)) * + wi end return R end end -_wsum!(R::AbstractArray, A::AbstractArray, w::AbstractVector, dim::Int, init::Bool) = - _wsum_general!(R, identity, A, w, dim, init) +function _wsum!(R::AbstractArray, A::AbstractArray, w::AbstractVector, dim::Int, init::Bool) + return _wsum_general!(R, identity, A, w, dim, init) +end ## wsum! and wsum @@ -513,22 +525,24 @@ Compute the weighted sum of `A` with weights `w` over the dimension `dim` and st the result in `R`. If `init=false`, the sum is added to `R` rather than starting from zero. """ -function wsum!(R::AbstractArray, A::AbstractArray{T,N}, w::AbstractVector, dim::Int; init::Bool=true) where {T,N} +function wsum!(R::AbstractArray, A::AbstractArray{T,N}, w::AbstractVector, dim::Int; + init::Bool=true) where {T,N} 1 <= dim <= N || error("dim should be within [1, $N]") ndims(R) <= N || error("ndims(R) should not exceed $N") - length(w) == size(A,dim) || throw(DimensionMismatch("Inconsistent array dimension.")) + length(w) == size(A, dim) || throw(DimensionMismatch("Inconsistent array dimension.")) # TODO: more careful examination of R's size - _wsum!(R, A, w, dim, init) + return _wsum!(R, A, w, dim, init) end function wsum(A::AbstractArray{T}, w::AbstractVector{W}, dim::Int) where {T<:Number,W<:Real} - length(w) == size(A,dim) || throw(DimensionMismatch("Inconsistent array dimension.")) - _wsum!(similar(A, wsumtype(T,W), Base.reduced_indices(axes(A), dim)), A, w, dim, true) + length(w) == size(A, dim) || throw(DimensionMismatch("Inconsistent array dimension.")) + return _wsum!(similar(A, wsumtype(T, W), Base.reduced_indices(axes(A), dim)), A, w, dim, + true) end function wsum(A::AbstractArray{<:Number}, w::UnitWeights, dim::Int) size(A, dim) != length(w) && throw(DimensionMismatch("Inconsistent array dimension.")) - return sum(A, dims=dim) + return sum(A; dims=dim) end ## extended sum! and wsum @@ -542,8 +556,10 @@ Compute the weighted sum of `A` with weights `w` over the dimension `dim` and st the result in `R`. If `init=false`, the sum is added to `R` rather than starting from zero. """ -Base.sum!(R::AbstractArray, A::AbstractArray, w::AbstractWeights{<:Real}, dim::Int; init::Bool=true) = - wsum!(R, A, w, dim; init=init) +function Base.sum!(R::AbstractArray, A::AbstractArray, w::AbstractWeights{<:Real}, dim::Int; + init::Bool=true) + return wsum!(R, A, w, dim; init=init) +end """ sum(v::AbstractArray, w::AbstractWeights{<:Real}; [dims]) @@ -551,14 +567,15 @@ Base.sum!(R::AbstractArray, A::AbstractArray, w::AbstractWeights{<:Real}, dim::I Compute the weighted sum of an array `v` with weights `w`, optionally over the dimension `dims`. """ -Base.sum(A::AbstractArray, w::AbstractWeights{<:Real}; dims::Union{Colon,Int}=:) = - wsum(A, w, dims) +function Base.sum(A::AbstractArray, w::AbstractWeights{<:Real}; dims::Union{Colon,Int}=:) + return wsum(A, w, dims) +end ##### Weighted means ##### function wmean(v::AbstractArray{<:Number}, w::AbstractVector) Base.depwarn("wmean is deprecated, use mean(v, weights(w)) instead.", :wmean) - mean(v, weights(w)) + return mean(v, weights(w)) end """ @@ -567,14 +584,20 @@ end Compute the weighted mean of array `A` with weight vector `w` (of type `AbstractWeights`) along dimension `dims`, and write results to `R`. """ -mean!(R::AbstractArray, A::AbstractArray, w::AbstractWeights; dims::Union{Nothing,Int}=nothing) = - _mean!(R, A, w, dims) -_mean!(R::AbstractArray, A::AbstractArray, w::AbstractWeights, dims::Nothing) = +function mean!(R::AbstractArray, A::AbstractArray, w::AbstractWeights; + dims::Union{Nothing,Int}=nothing) + return _mean!(R, A, w, dims) +end +function _mean!(R::AbstractArray, A::AbstractArray, w::AbstractWeights, dims::Nothing) throw(ArgumentError("dims argument must be provided")) -_mean!(R::AbstractArray, A::AbstractArray, w::AbstractWeights, dims::Int) = - rmul!(Base.sum!(R, A, w, dims), inv(sum(w))) +end +function _mean!(R::AbstractArray, A::AbstractArray, w::AbstractWeights, dims::Int) + return rmul!(Base.sum!(R, A, w, dims), inv(sum(w))) +end -wmeantype(::Type{T}, ::Type{W}) where {T,W} = typeof((zero(T)*zero(W) + zero(T)*zero(W)) / one(W)) +function wmeantype(::Type{T}, ::Type{W}) where {T,W} + return typeof((zero(T)*zero(W) + zero(T)*zero(W)) / one(W)) +end """ mean(A::AbstractArray, w::AbstractWeights[, dims::Int]) @@ -591,17 +614,17 @@ w = rand(n) mean(x, weights(w)) ``` """ -mean(A::AbstractArray, w::AbstractWeights; dims::Union{Colon,Int}=:) = - _mean(A, w, dims) -_mean(A::AbstractArray, w::AbstractWeights, dims::Colon) = - sum(A, w) / sum(w) -_mean(A::AbstractArray{T}, w::AbstractWeights{W}, dims::Int) where {T,W} = - _mean!(similar(A, wmeantype(T, W), Base.reduced_indices(axes(A), dims)), A, w, dims) +mean(A::AbstractArray, w::AbstractWeights; dims::Union{Colon,Int}=:) = _mean(A, w, dims) +_mean(A::AbstractArray, w::AbstractWeights, dims::Colon) = sum(A, w) / sum(w) +function _mean(A::AbstractArray{T}, w::AbstractWeights{W}, dims::Int) where {T,W} + return _mean!(similar(A, wmeantype(T, W), Base.reduced_indices(axes(A), dims)), A, w, + dims) +end function mean(A::AbstractArray, w::UnitWeights; dims::Union{Colon,Int}=:) a = (dims === :) ? length(A) : size(A, dims) a != length(w) && throw(DimensionMismatch("Inconsistent array dimension.")) - return mean(A, dims=dims) + return mean(A; dims=dims) end ##### Weighted quantile ##### @@ -626,7 +649,8 @@ is strictly superior to ``h``. The weighted ``p`` quantile is given by ``v_k + with ``γ = (h - S_k)/(S_{k+1} - S_k)``. In particular, when all weights are equal, the function returns the same result as the unweighted `quantile`. """ -function quantile(v::AbstractVector{V}, w::AbstractWeights{W}, p::AbstractVector{<:Real}) where {V, W<:Real} +function quantile(v::AbstractVector{V}, w::AbstractWeights{W}, + p::AbstractVector{<:Real}) where {V,W<:Real} # checks isempty(v) && throw(ArgumentError("quantile of an empty array is undefined")) isempty(p) && throw(ArgumentError("empty quantile array")) @@ -634,8 +658,9 @@ function quantile(v::AbstractVector{V}, w::AbstractWeights{W}, p::AbstractVector all(x -> 0 <= x <= 1, p) || throw(ArgumentError("input probability out of [0,1] range")) w.sum == 0 && throw(ArgumentError("weight vector cannot sum to zero")) - length(v) == length(w) || throw(ArgumentError("data and weight vectors must be the same size," * - "got $(length(v)) and $(length(w))")) + length(v) == length(w) || + throw(ArgumentError("data and weight vectors must be the same size," * + "got $(length(v)) and $(length(w))")) for x in w.values x < 0 && throw(ArgumentError("weight vector cannot contain negative entries")) end @@ -684,8 +709,8 @@ function quantile(v::AbstractVector{V}, w::AbstractWeights{W}, p::AbstractVector while Sk <= h k += 1 if k > N - # out was initialized with maximum v - return out + # out was initialized with maximum v + return out end Skold, vkold = Sk, vk vk, wk = vw[k] diff --git a/test/counts.jl b/test/counts.jl index cb16abe3f..7e141def8 100644 --- a/test/counts.jl +++ b/test/counts.jl @@ -11,40 +11,39 @@ n = 5000 w0 = deepcopy(w) c0 = Int[count(v->v == i, x) for i in 1:5] - @test counts(x, 5) == c0 - @test counts(x .+ 1, 2:6) == c0 - @test proportions(x, 1:5) ≈ (c0 ./ n) + @test counts(x, 5) == c0 + @test counts(x .+ 1, 2:6) == c0 + @test proportions(x, 1:5) ≈ (c0 ./ n) @test counts(reshape(x, 10, 50, 10), 5) == c0 - @test counts(x) == c0 - @test proportions(x) ≈ (c0 ./ n) + @test counts(x) == c0 + @test proportions(x) ≈ (c0 ./ n) @test counts(reshape(x, 10, 50, 10)) == c0 c0 = reshape(c0, 1, 5) - @test addcounts!(fill(0, 1, 5), x, 1:5) == c0 + @test addcounts!(fill(0, 1, 5), x, 1:5) == c0 @test addcounts!(fill(0, 1, 5), reshape(x, 10, 50, 10), 1:5) == c0 c0 = Float64[sum(w.values[x .== i]) for i in 1:5] - @test counts(x, 5, w) ≈ c0 - @test counts(x .+ 1, 2:6, w) ≈ c0 - @test proportions(x, 1:5, w) ≈ (c0 ./ sum(w)) + @test counts(x, 5, w) ≈ c0 + @test counts(x .+ 1, 2:6, w) ≈ c0 + @test proportions(x, 1:5, w) ≈ (c0 ./ sum(w)) @test counts(reshape(x, 10, 50, 10), 5, w) ≈ c0 # Perhaps this should not be allowed - @test counts(x, w) ≈ c0 - @test counts(x .+ 1, 2:6, w) ≈ c0 - @test proportions(x, w) ≈ (c0 ./ sum(w)) + @test counts(x, w) ≈ c0 + @test counts(x .+ 1, 2:6, w) ≈ c0 + @test proportions(x, w) ≈ (c0 ./ sum(w)) @test counts(reshape(x, 10, 50, 10), w) ≈ c0 # Perhaps this should not be allowed #addcounts! to row matrix c0 = reshape(c0, 1, 5) - @test addcounts!(fill(0.0, 1, 5), x, 1:5, w) ≈ c0 + @test addcounts!(fill(0.0, 1, 5), x, 1:5, w) ≈ c0 @test addcounts!(fill(0.0, 1, 5), reshape(x, 10, 50, 10), 1:5, w) ≈ c0 # Perhaps this should not be allowed @test x == x0 @test w == w0 end - @testset "2D integer counts" begin x = rand(1:4, n) y = rand(1:5, n) @@ -53,26 +52,26 @@ end y0 = deepcopy(y) w0 = deepcopy(w) - c0 = Int[count(t->t != 0, (x .== i) .& (y .== j)) for i in 1:4, j in 1:5] - @test counts(x, y, (4, 5)) == c0 - @test counts(x .+ 2, y .+ 3, (3:6, 4:8)) == c0 - @test proportions(x, y, (1:4, 1:5)) ≈ (c0 ./ n) + c0 = Int[count(t->t != 0, (x .== i) .& (y .== j)) for i in 1:4, j in 1:5] + @test counts(x, y, (4, 5)) == c0 + @test counts(x .+ 2, y .+ 3, (3:6, 4:8)) == c0 + @test proportions(x, y, (1:4, 1:5)) ≈ (c0 ./ n) @test counts(reshape(x, 10, 50, 10), reshape(y, 10, 50, 10), (4, 5)) == c0 - @test counts(x, y) == c0 - @test counts(x .+ 2, y .+ 3, (3:6, 4:8)) == c0 - @test proportions(x, y,) ≈ (c0 ./ n) + @test counts(x, y) == c0 + @test counts(x .+ 2, y .+ 3, (3:6, 4:8)) == c0 + @test proportions(x, y) ≈ (c0 ./ n) @test counts(reshape(x, 10, 50, 10), reshape(y, 10, 50, 10)) == c0 c0 = Float64[sum(w.values[(x .== i) .& (y .== j)]) for i in 1:4, j in 1:5] - @test counts(x, y, (4, 5), w) ≈ c0 - @test counts(x .+ 2, y .+ 3, (3:6, 4:8), w) ≈ c0 - @test proportions(x, y, (1:4, 1:5), w) ≈ (c0 ./ sum(w)) + @test counts(x, y, (4, 5), w) ≈ c0 + @test counts(x .+ 2, y .+ 3, (3:6, 4:8), w) ≈ c0 + @test proportions(x, y, (1:4, 1:5), w) ≈ (c0 ./ sum(w)) @test counts(reshape(x, 10, 50, 10), reshape(y, 10, 50, 10), (4, 5), w) ≈ c0 # Perhaps this should not be allowed - @test counts(x, y, w) ≈ c0 - @test counts(x .+ 2, y .+ 3, (3:6, 4:8), w) ≈ c0 - @test proportions(x, y, w) ≈ (c0 ./ sum(w)) + @test counts(x, y, w) ≈ c0 + @test counts(x .+ 2, y .+ 3, (3:6, 4:8), w) ≈ c0 + @test proportions(x, y, w) ≈ (c0 ./ sum(w)) @test counts(reshape(x, 10, 50, 10), reshape(y, 10, 50, 10), w) ≈ c0 # Perhaps this should not be allowed @test x == x0 @@ -93,15 +92,14 @@ end cm_missing = countmap(skipmissing(x)) cm_any_itr = countmap((i for i in x)) @test cm_missing == cm_any_itr == cm - @test cm_missing isa Dict{String, Int} - @test cm_any_itr isa Dict{Any, Int} + @test cm_missing isa Dict{String,Int} + @test cm_any_itr isa Dict{Any,Int} pm = proportionmap(x) @test pm["a"] ≈ (1/2) @test pm["b"] ≈ (1/3) @test pm["c"] ≈ (1/6) - # testing the radixsort branch of countmap xx = repeat([6, 1, 3, 1], outer=100_000) cm = countmap(xx) @@ -109,7 +107,7 @@ end # with iterator cm_missing = countmap(skipmissing(xx)) - @test cm_missing isa Dict{Int, Int} + @test cm_missing isa Dict{Int,Int} @test cm_missing == cm cm_any_itr = countmap((i for i in xx)) @@ -118,33 +116,33 @@ end # with multidimensional array @test countmap(reshape(xx, 20, 100, 20, 10); alg=:radixsort) == cm - @test countmap(reshape(xx, 20, 100, 20, 10); alg=:dict) == cm + @test countmap(reshape(xx, 20, 100, 20, 10); alg=:dict) == cm # with empty array - @test countmap(Int[]) == Dict{Int, Int}() + @test countmap(Int[]) == Dict{Int,Int}() # testing the radixsort-based addcounts xx = repeat([6, 1, 3, 1], outer=100_000) - cm = Dict{Int, Int}() - StatsBase.addcounts_radixsort!(cm,xx) + cm = Dict{Int,Int}() + StatsBase.addcounts_radixsort!(cm, xx) @test cm == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000) xx2 = repeat([7, 1, 3, 1], outer=100_000) - StatsBase.addcounts_radixsort!(cm,xx2) + StatsBase.addcounts_radixsort!(cm, xx2) @test cm == Dict(1 => 400_000, 3 => 200_000, 6 => 100_000, 7 => 100_000) # with iterator - cm_missing = Dict{Int, Int}() - StatsBase.addcounts_radixsort!(cm_missing,skipmissing(xx)) + cm_missing = Dict{Int,Int}() + StatsBase.addcounts_radixsort!(cm_missing, skipmissing(xx)) @test cm_missing == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000) - StatsBase.addcounts_radixsort!(cm_missing,skipmissing(xx2)) + StatsBase.addcounts_radixsort!(cm_missing, skipmissing(xx2)) @test cm_missing == Dict(1 => 400_000, 3 => 200_000, 6 => 100_000, 7 => 100_000) # testing the Dict-based addcounts - cm = Dict{Int, Int}() - cm_itr = Dict{Int, Int}() - StatsBase.addcounts_dict!(cm,xx) - StatsBase.addcounts_dict!(cm_itr,skipmissing(xx)) + cm = Dict{Int,Int}() + cm_itr = Dict{Int,Int}() + StatsBase.addcounts_dict!(cm, xx) + StatsBase.addcounts_dict!(cm_itr, skipmissing(xx)) @test cm_itr == cm == Dict(1 => 200_000, 3 => 100_000, 6 => 100_000) - @test cm_itr isa Dict{Int, Int} + @test cm_itr isa Dict{Int,Int} cm = countmap(x, weights(w)) @test cm["a"] == 5.5 @@ -163,27 +161,28 @@ end bx = [true, false, true, true, false] cm_bx_missing = countmap(skipmissing(bx)) @test cm_bx_missing == countmap(bx) == Dict(true => 3, false => 2) - @test cm_bx_missing isa Dict{Bool, Int} + @test cm_bx_missing isa Dict{Bool,Int} for T in [UInt8, UInt16, Int8, Int16] tx = T[typemin(T), 8, typemax(T), 19, 8] tx_missing = skipmissing(T[typemin(T), 8, typemax(T), 19, 8]) cm_tx_missing = countmap(tx_missing) - @test cm_tx_missing == countmap(tx) == Dict(typemin(T) => 1, typemax(T) => 1, 8 => 2, 19 => 1) - @test cm_tx_missing isa Dict{T, Int} + @test cm_tx_missing == countmap(tx) == + Dict(typemin(T) => 1, typemax(T) => 1, 8 => 2, 19 => 1) + @test cm_tx_missing isa Dict{T,Int} end # -0.0 and NaN @test countmap([0.0, -0.0, 0.0, -0.0, -0.0], alg=:dict) == - countmap([0.0, -0.0, 0.0, -0.0, -0.0], alg=:radixsort) == - Dict(0.0 => 2, -0.0 => 3) + countmap([0.0, -0.0, 0.0, -0.0, -0.0], alg=:radixsort) == + Dict(0.0 => 2, -0.0 => 3) @test countmap([NaN, NaN], alg=:dict) == - countmap([NaN, NaN], alg=:radixsort) == - Dict(NaN => 2) + countmap([NaN, NaN], alg=:radixsort) == + Dict(NaN => 2) end @testset "views" begin - X = view([1,1,1,2,2], 1:5) + X = view([1, 1, 1, 2, 2], 1:5) @test countmap(X) == countmap(copy(X)) end @@ -197,13 +196,13 @@ end zw = weights(OffsetArray(w, -2n)) # proportions calls counts which calls addcounts! - @test proportions(x) == proportions(y) == proportions(z) - @test proportions(x, xw) == proportions(y, yw) == proportions(z, zw) - @test proportionmap(x) == proportionmap(y) == proportionmap(z) + @test proportions(x) == proportions(y) == proportions(z) + @test proportions(x, xw) == proportions(y, yw) == proportions(z, zw) + @test proportionmap(x) == proportionmap(y) == proportionmap(z) @test proportionmap(x, xw) == proportionmap(y, yw) == proportionmap(z, zw) - @test countmap(x) == countmap(x; alg = :dict) == countmap(x; alg = :radixsort) == - countmap(y) == countmap(y; alg = :dict) == countmap(y; alg = :radixsort) == - countmap(z) == countmap(z; alg = :dict) == countmap(z; alg = :radixsort) + @test countmap(x) == countmap(x; alg=:dict) == countmap(x; alg=:radixsort) == + countmap(y) == countmap(y; alg=:dict) == countmap(y; alg=:radixsort) == + countmap(z) == countmap(z; alg=:dict) == countmap(z; alg=:radixsort) @test proportionmap(x, xw) == proportionmap(y, yw) == proportionmap(z, zw) # countmap and proportionmap only support the :dict algorithm for weighted sums. end diff --git a/test/cov.jl b/test/cov.jl index 27543b31e..33c21e132 100644 --- a/test/cov.jl +++ b/test/cov.jl @@ -4,330 +4,376 @@ using LinearAlgebra, Random, Test struct EmptyCovarianceEstimator <: CovarianceEstimator end @testset "StatsBase.Covariance" begin -weight_funcs = (weights, aweights, fweights, pweights) - -function test_isapprox_preserves_symherm_structure(f::F, x::AbstractMatrix, y::AbstractMatrix, args...) where F - for wrapper in (identity, x -> Symmetric(x, :U), x -> Symmetric(x, :L), x -> Hermitian(x, :U), x -> Hermitian(x, :L)) - A = wrapper(copy(x)) - fA = @inferred(f(A, args...)) - @test fA ≈ y - if f === StatsBase.cov2cor! || f === StatsBase.cor2cov! - @test fA === A - if A isa Union{Symmetric,Hermitian} - @test parent(fA) != fA # only active triangle is written to - end - else - @test fA !== A - if A isa Union{Symmetric,Hermitian} - @test fA isa (A isa Symmetric ? Symmetric : Hermitian) - @test fA.uplo == A.uplo - @test parent(fA) != fA # only active triangle is written to + weight_funcs = (weights, aweights, fweights, pweights) + + function test_isapprox_preserves_symherm_structure(f::F, x::AbstractMatrix, + y::AbstractMatrix, args...) where {F} + for wrapper in + (identity, x -> Symmetric(x, :U), x -> Symmetric(x, :L), x -> Hermitian(x, :U), + x -> Hermitian(x, :L)) + A = wrapper(copy(x)) + fA = @inferred(f(A, args...)) + @test fA ≈ y + if f === StatsBase.cov2cor! || f === StatsBase.cor2cov! + @test fA === A + if A isa Union{Symmetric,Hermitian} + @test parent(fA) != fA # only active triangle is written to + end + else + @test fA !== A + if A isa Union{Symmetric,Hermitian} + @test fA isa (A isa Symmetric ? Symmetric : Hermitian) + @test fA.uplo == A.uplo + @test parent(fA) != fA # only active triangle is written to + end end end end -end - -@testset "$f" for f in weight_funcs - X = randn(3, 8) - - Z1 = X .- mean(X, dims = 1) - Z2 = X .- mean(X, dims = 2) - - w1 = rand(3) - w2 = rand(8) - - # varcorrection is negative if sum of weights is smaller than 1 - if f === fweights - w1[1] += 1 - w2[1] += 1 - end - - wv1 = f(w1) - wv2 = f(w2) - - Z1w = X .- mean(X, wv1, dims=1) - Z2w = X .- mean(X, wv2, dims=2) - - ## reference results - - S1 = Z1'Z1 - S2 = Z2 * Z2' - - Sz1 = X'X - Sz2 = X * X' - - S1w = Z1w' * Matrix(Diagonal(w1)) * Z1w - S2w = Z2w * Matrix(Diagonal(w2)) * Z2w' - Sz1w = X' * Matrix(Diagonal(w1)) * X - Sz2w = X * Matrix(Diagonal(w2)) * X' + @testset "$f" for f in weight_funcs + X = randn(3, 8) - @testset "Scattermat" begin - @test scattermat(X) ≈ S1 - @test scattermat(X, dims=2) ≈ S2 + Z1 = X .- mean(X, dims=1) + Z2 = X .- mean(X, dims=2) - @test StatsBase.scattermat(X, mean=0) ≈ Sz1 - @test StatsBase.scattermat(X, mean=0, dims=2) ≈ Sz2 + w1 = rand(3) + w2 = rand(8) - @test StatsBase.scattermat(X, mean=mean(X, dims=1)) ≈ S1 - @test StatsBase.scattermat(X, mean=mean(X, dims=2), dims=2) ≈ S2 - - @test StatsBase.scattermat(X, mean=zeros(1,8)) ≈ Sz1 - @test StatsBase.scattermat(X, mean=zeros(3), dims=2) ≈ Sz2 + # varcorrection is negative if sum of weights is smaller than 1 + if f === fweights + w1[1] += 1 + w2[1] += 1 + end - @testset "Weighted" begin - @test scattermat(X, wv1) ≈ S1w - @test scattermat(X, wv2, dims=2) ≈ S2w + wv1 = f(w1) + wv2 = f(w2) - @test StatsBase.scattermat(X, wv1, mean=0) ≈ Sz1w - @test StatsBase.scattermat(X, wv2, mean=0, dims=2) ≈ Sz2w + Z1w = X .- mean(X, wv1, dims=1) + Z2w = X .- mean(X, wv2, dims=2) - @test StatsBase.scattermat(X, wv1, mean=mean(X, wv1, dims=1)) ≈ S1w - @test StatsBase.scattermat(X, wv2, mean=mean(X, wv2, dims=2), dims=2) ≈ S2w + ## reference results - @test StatsBase.scattermat(X, wv1, mean=zeros(1,8)) ≈ Sz1w - @test StatsBase.scattermat(X, wv2, mean=zeros(3), dims=2) ≈ Sz2w - end - end + S1 = Z1'Z1 + S2 = Z2 * Z2' - @testset "Uncorrected" begin - @testset "Weighted Covariance" begin - @test cov(X, wv1; corrected=false) ≈ S1w ./ sum(wv1) - @test cov(X, wv2, 2; corrected=false) ≈ S2w ./ sum(wv2) + Sz1 = X'X + Sz2 = X * X' - @test StatsBase.covm(X, 0, wv1, 1; corrected=false) ≈ Sz1w ./ sum(wv1) - @test StatsBase.covm(X, 0, wv2, 2; corrected=false) ≈ Sz2w ./ sum(wv2) - - @test StatsBase.covm(X, mean(X, wv1, dims=1), wv1, 1; corrected=false) ≈ S1w ./ sum(wv1) - @test StatsBase.covm(X, mean(X, wv2, dims=2), wv2, 2; corrected=false) ≈ S2w ./ sum(wv2) - - @test StatsBase.covm(X, zeros(1,8), wv1, 1; corrected=false) ≈ Sz1w ./ sum(wv1) - @test StatsBase.covm(X, zeros(3), wv2, 2; corrected=false) ≈ Sz2w ./ sum(wv2) - end + S1w = Z1w' * Matrix(Diagonal(w1)) * Z1w + S2w = Z2w * Matrix(Diagonal(w2)) * Z2w' - @testset "Mean and covariance" begin - (m, C) = mean_and_cov(X; corrected=false) - @test m == mean(X, dims=1) - @test C == cov(X, dims=1, corrected=false) + Sz1w = X' * Matrix(Diagonal(w1)) * X + Sz2w = X * Matrix(Diagonal(w2)) * X' - (m, C) = mean_and_cov(X, 1; corrected=false) - @test m == mean(X, dims=1) - @test C == cov(X, dims=1, corrected = false) + @testset "Scattermat" begin + @test scattermat(X) ≈ S1 + @test scattermat(X, dims=2) ≈ S2 - (m, C) = mean_and_cov(X, 2; corrected=false) - @test m == mean(X, dims=2) - @test C == cov(X, dims=2, corrected = false) + @test StatsBase.scattermat(X, mean=0) ≈ Sz1 + @test StatsBase.scattermat(X, mean=0, dims=2) ≈ Sz2 - (m, C) = mean_and_cov(X, wv1; corrected=false) - @test m == mean(X, wv1, dims=1) - @test C == cov(X, wv1, 1, corrected=false) + @test StatsBase.scattermat(X, mean=mean(X, dims=1)) ≈ S1 + @test StatsBase.scattermat(X, mean=mean(X, dims=2), dims=2) ≈ S2 - (m, C) = mean_and_cov(X, wv1, 1; corrected=false) - @test m == mean(X, wv1, dims=1) - @test C == cov(X, wv1, 1, corrected=false) + @test StatsBase.scattermat(X, mean=zeros(1, 8)) ≈ Sz1 + @test StatsBase.scattermat(X, mean=zeros(3), dims=2) ≈ Sz2 - (m, C) = mean_and_cov(X, wv2, 2; corrected=false) - @test m == mean(X, wv2, dims=2) - @test C == cov(X, wv2, 2, corrected=false) - end - @testset "Conversions" begin - std1 = std(X, wv1, 1; corrected=false) - std2 = std(X, wv2, 2; corrected=false) + @testset "Weighted" begin + @test scattermat(X, wv1) ≈ S1w + @test scattermat(X, wv2, dims=2) ≈ S2w - cov1 = cov(X, wv1, 1; corrected=false) - cov2 = cov(X, wv2, 2; corrected=false) + @test StatsBase.scattermat(X, wv1, mean=0) ≈ Sz1w + @test StatsBase.scattermat(X, wv2, mean=0, dims=2) ≈ Sz2w - cor1 = cor(X, wv1, 1) - cor2 = cor(X, wv2, 2) + @test StatsBase.scattermat(X, wv1, mean=mean(X, wv1, dims=1)) ≈ S1w + @test StatsBase.scattermat(X, wv2, mean=mean(X, wv2, dims=2), dims=2) ≈ S2w - @testset "cov2cor" begin - test_isapprox_preserves_symherm_structure(cov2cor, cov(X, dims = 1), cor(X, dims = 1), std(X, dims = 1)) - test_isapprox_preserves_symherm_structure(cov2cor, cov(X, dims = 2), cor(X, dims = 2), std(X, dims = 2)) - test_isapprox_preserves_symherm_structure(cov2cor, cov1, cor1) - test_isapprox_preserves_symherm_structure(cov2cor, cov2, cor2) - test_isapprox_preserves_symherm_structure(cov2cor, cov1, cor1, std1) - test_isapprox_preserves_symherm_structure(cov2cor, cov2, cor2, std2) - end - @testset "StatsBase.cov2cor!" begin - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov(X, dims = 1), cor(X, dims = 1), std(X, dims = 1)) - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov(X, dims = 2), cor(X, dims = 2), std(X, dims = 2)) - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov1, cor1) - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov2, cor2) - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov1, cor1, std1) - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov2, cor2, std2) - end - @testset "cor2cov" begin - test_isapprox_preserves_symherm_structure(cor2cov, cor(X, dims = 1), cov(X, dims = 1), std(X, dims = 1)) - test_isapprox_preserves_symherm_structure(cor2cov, cor(X, dims = 2), cov(X, dims = 2), std(X, dims = 2)) - test_isapprox_preserves_symherm_structure(cor2cov, cor1, cov1, std1) - test_isapprox_preserves_symherm_structure(cor2cov, cor2, cov2, std2) - end - @testset "StatsBase.cor2cov!" begin - test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor(X, dims = 1), cov(X, dims = 1), std(X, dims = 1)) - test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor(X, dims = 2), cov(X, dims = 2), std(X, dims = 2)) - test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor1, cov1, std1) - test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor2, cov2, std2) + @test StatsBase.scattermat(X, wv1, mean=zeros(1, 8)) ≈ Sz1w + @test StatsBase.scattermat(X, wv2, mean=zeros(3), dims=2) ≈ Sz2w end end - end - - @testset "Corrected" begin - @testset "Weighted Covariance" begin - if isa(wv1, Weights) - @test_throws ArgumentError cov(X, wv1; corrected=true) - else - var_corr1 = StatsBase.varcorrection(wv1, true) - var_corr2 = StatsBase.varcorrection(wv2, true) - @test cov(X, wv1; corrected=true) ≈ S1w .* var_corr1 - @test cov(X, wv2, 2; corrected=true) ≈ S2w .* var_corr2 + @testset "Uncorrected" begin + @testset "Weighted Covariance" begin + @test cov(X, wv1; corrected=false) ≈ S1w ./ sum(wv1) + @test cov(X, wv2, 2; corrected=false) ≈ S2w ./ sum(wv2) - @test StatsBase.covm(X, 0, wv1, 1; corrected=true) ≈ Sz1w .* var_corr1 - @test StatsBase.covm(X, 0, wv2, 2; corrected=true) ≈ Sz2w .* var_corr2 + @test StatsBase.covm(X, 0, wv1, 1; corrected=false) ≈ Sz1w ./ sum(wv1) + @test StatsBase.covm(X, 0, wv2, 2; corrected=false) ≈ Sz2w ./ sum(wv2) - @test StatsBase.covm(X, mean(X, wv1, dims=1), wv1, 1; corrected=true) ≈ S1w .* var_corr1 - @test StatsBase.covm(X, mean(X, wv2, dims=2), wv2, 2; corrected=true) ≈ S2w .* var_corr2 + @test StatsBase.covm(X, mean(X, wv1, dims=1), wv1, 1; corrected=false) ≈ + S1w ./ sum(wv1) + @test StatsBase.covm(X, mean(X, wv2, dims=2), wv2, 2; corrected=false) ≈ + S2w ./ sum(wv2) - @test StatsBase.covm(X, zeros(1,8), wv1, 1; corrected=true) ≈ Sz1w .* var_corr1 - @test StatsBase.covm(X, zeros(3), wv2, 2; corrected=true) ≈ Sz2w .* var_corr2 + @test StatsBase.covm(X, zeros(1, 8), wv1, 1; corrected=false) ≈ + Sz1w ./ sum(wv1) + @test StatsBase.covm(X, zeros(3), wv2, 2; corrected=false) ≈ + Sz2w ./ sum(wv2) end - end - @testset "Mean and covariance" begin - (m, C) = mean_and_cov(X; corrected=true) - @test m == mean(X, dims=1) - @test C == cov(X, dims=1, corrected = true) - (m, C) = mean_and_cov(X, 1; corrected=true) - @test m == mean(X, dims=1) - @test C == cov(X, dims=1, corrected = true) + @testset "Mean and covariance" begin + (m, C) = mean_and_cov(X; corrected=false) + @test m == mean(X, dims=1) + @test C == cov(X, dims=1, corrected=false) - (m, C) = mean_and_cov(X, 2; corrected=true) - @test m == mean(X, dims=2) - @test C == cov(X, dims=2, corrected = true) + (m, C) = mean_and_cov(X, 1; corrected=false) + @test m == mean(X, dims=1) + @test C == cov(X, dims=1, corrected=false) - if isa(wv1, Weights) - @test_throws ArgumentError mean_and_cov(X, wv1; corrected=true) - else - (m, C) = mean_and_cov(X, wv1; corrected=true) + (m, C) = mean_and_cov(X, 2; corrected=false) + @test m == mean(X, dims=2) + @test C == cov(X, dims=2, corrected=false) + + (m, C) = mean_and_cov(X, wv1; corrected=false) @test m == mean(X, wv1, dims=1) - @test C == cov(X, wv1, 1; corrected=true) + @test C == cov(X, wv1, 1, corrected=false) - (m, C) = mean_and_cov(X, wv1, 1; corrected=true) + (m, C) = mean_and_cov(X, wv1, 1; corrected=false) @test m == mean(X, wv1, dims=1) - @test C == cov(X, wv1, 1; corrected=true) + @test C == cov(X, wv1, 1, corrected=false) - (m, C) = mean_and_cov(X, wv2, 2; corrected=true) + (m, C) = mean_and_cov(X, wv2, 2; corrected=false) @test m == mean(X, wv2, dims=2) - @test C == cov(X, wv2, 2; corrected=true) + @test C == cov(X, wv2, 2, corrected=false) end - end - @testset "Conversions" begin - if !isa(wv1, Weights) - std1 = std(X, wv1, 1; corrected=true) - std2 = std(X, wv2, 2; corrected=true) + @testset "Conversions" begin + std1 = std(X, wv1, 1; corrected=false) + std2 = std(X, wv2, 2; corrected=false) - cov1 = cov(X, wv1, 1; corrected=true) - cov2 = cov(X, wv2, 2; corrected=true) + cov1 = cov(X, wv1, 1; corrected=false) + cov2 = cov(X, wv2, 2; corrected=false) cor1 = cor(X, wv1, 1) cor2 = cor(X, wv2, 2) @testset "cov2cor" begin + test_isapprox_preserves_symherm_structure(cov2cor, cov(X, dims=1), + cor(X, dims=1), + std(X, dims=1)) + test_isapprox_preserves_symherm_structure(cov2cor, cov(X, dims=2), + cor(X, dims=2), + std(X, dims=2)) test_isapprox_preserves_symherm_structure(cov2cor, cov1, cor1) test_isapprox_preserves_symherm_structure(cov2cor, cov2, cor2) test_isapprox_preserves_symherm_structure(cov2cor, cov1, cor1, std1) test_isapprox_preserves_symherm_structure(cov2cor, cov2, cor2, std2) end @testset "StatsBase.cov2cor!" begin - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov1, cor1) - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov2, cor2) - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov1, cor1, std1) - test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov2, cor2, std2) + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, + cov(X, dims=1), + cor(X, dims=1), + std(X, dims=1)) + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, + cov(X, dims=2), + cor(X, dims=2), + std(X, dims=2)) + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov1, + cor1) + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov2, + cor2) + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov1, + cor1, std1) + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov2, + cor2, std2) end @testset "cor2cov" begin + test_isapprox_preserves_symherm_structure(cor2cov, cor(X, dims=1), + cov(X, dims=1), + std(X, dims=1)) + test_isapprox_preserves_symherm_structure(cor2cov, cor(X, dims=2), + cov(X, dims=2), + std(X, dims=2)) test_isapprox_preserves_symherm_structure(cor2cov, cor1, cov1, std1) test_isapprox_preserves_symherm_structure(cor2cov, cor2, cov2, std2) end @testset "StatsBase.cor2cov!" begin - test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor1, cov1, std1) - test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor2, cov2, std2) + test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, + cor(X, dims=1), + cov(X, dims=1), + std(X, dims=1)) + test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, + cor(X, dims=2), + cov(X, dims=2), + std(X, dims=2)) + test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor1, + cov1, std1) + test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor2, + cov2, std2) end end end - end - @testset "Correlation" begin - @test cor(X, f(ones(3)), 1) ≈ cor(X, dims = 1) - @test cor(X, f(ones(8)), 2) ≈ cor(X, dims = 2) + @testset "Corrected" begin + @testset "Weighted Covariance" begin + if isa(wv1, Weights) + @test_throws ArgumentError cov(X, wv1; corrected=true) + else + var_corr1 = StatsBase.varcorrection(wv1, true) + var_corr2 = StatsBase.varcorrection(wv2, true) + + @test cov(X, wv1; corrected=true) ≈ S1w .* var_corr1 + @test cov(X, wv2, 2; corrected=true) ≈ S2w .* var_corr2 + + @test StatsBase.covm(X, 0, wv1, 1; corrected=true) ≈ Sz1w .* var_corr1 + @test StatsBase.covm(X, 0, wv2, 2; corrected=true) ≈ Sz2w .* var_corr2 + + @test StatsBase.covm(X, mean(X, wv1, dims=1), wv1, 1; corrected=true) ≈ + S1w .* var_corr1 + @test StatsBase.covm(X, mean(X, wv2, dims=2), wv2, 2; corrected=true) ≈ + S2w .* var_corr2 + + @test StatsBase.covm(X, zeros(1, 8), wv1, 1; corrected=true) ≈ + Sz1w .* var_corr1 + @test StatsBase.covm(X, zeros(3), wv2, 2; corrected=true) ≈ + Sz2w .* var_corr2 + end + end + @testset "Mean and covariance" begin + (m, C) = mean_and_cov(X; corrected=true) + @test m == mean(X, dims=1) + @test C == cov(X, dims=1, corrected=true) + + (m, C) = mean_and_cov(X, 1; corrected=true) + @test m == mean(X, dims=1) + @test C == cov(X, dims=1, corrected=true) + + (m, C) = mean_and_cov(X, 2; corrected=true) + @test m == mean(X, dims=2) + @test C == cov(X, dims=2, corrected=true) + + if isa(wv1, Weights) + @test_throws ArgumentError mean_and_cov(X, wv1; corrected=true) + else + (m, C) = mean_and_cov(X, wv1; corrected=true) + @test m == mean(X, wv1, dims=1) + @test C == cov(X, wv1, 1; corrected=true) + + (m, C) = mean_and_cov(X, wv1, 1; corrected=true) + @test m == mean(X, wv1, dims=1) + @test C == cov(X, wv1, 1; corrected=true) + + (m, C) = mean_and_cov(X, wv2, 2; corrected=true) + @test m == mean(X, wv2, dims=2) + @test C == cov(X, wv2, 2; corrected=true) + end + end + @testset "Conversions" begin + if !isa(wv1, Weights) + std1 = std(X, wv1, 1; corrected=true) + std2 = std(X, wv2, 2; corrected=true) + + cov1 = cov(X, wv1, 1; corrected=true) + cov2 = cov(X, wv2, 2; corrected=true) + + cor1 = cor(X, wv1, 1) + cor2 = cor(X, wv2, 2) + + @testset "cov2cor" begin + test_isapprox_preserves_symherm_structure(cov2cor, cov1, cor1) + test_isapprox_preserves_symherm_structure(cov2cor, cov2, cor2) + test_isapprox_preserves_symherm_structure(cov2cor, cov1, cor1, std1) + test_isapprox_preserves_symherm_structure(cov2cor, cov2, cor2, std2) + end + @testset "StatsBase.cov2cor!" begin + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov1, + cor1) + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov2, + cor2) + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov1, + cor1, std1) + test_isapprox_preserves_symherm_structure(StatsBase.cov2cor!, cov2, + cor2, std2) + end + @testset "cor2cov" begin + test_isapprox_preserves_symherm_structure(cor2cov, cor1, cov1, std1) + test_isapprox_preserves_symherm_structure(cor2cov, cor2, cov2, std2) + end + @testset "StatsBase.cor2cov!" begin + test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor1, + cov1, std1) + test_isapprox_preserves_symherm_structure(StatsBase.cor2cov!, cor2, + cov2, std2) + end + end + end + end - cov1 = cov(X, wv1, 1; corrected=false) - std1 = std(X, wv1, 1; corrected=false) - cov2 = cov(X, wv2, 2; corrected=false) - std2 = std(X, wv2, 2; corrected=false) - expected_cor1 = StatsBase.cov2cor!(cov1, std1) - expected_cor2 = StatsBase.cov2cor!(cov2, std2) + @testset "Correlation" begin + @test cor(X, f(ones(3)), 1) ≈ cor(X, dims=1) + @test cor(X, f(ones(8)), 2) ≈ cor(X, dims=2) - @test cor(X, wv1, 1) ≈ expected_cor1 - @test cor(X, wv2, 2) ≈ expected_cor2 - end + cov1 = cov(X, wv1, 1; corrected=false) + std1 = std(X, wv1, 1; corrected=false) + cov2 = cov(X, wv2, 2; corrected=false) + std2 = std(X, wv2, 2; corrected=false) + expected_cor1 = StatsBase.cov2cor!(cov1, std1) + expected_cor2 = StatsBase.cov2cor!(cov2, std2) - @testset "Abstract covariance estimation" begin - Xm1 = mean(X, dims=1) - Xm2 = mean(X, dims=2) + @test cor(X, wv1, 1) ≈ expected_cor1 + @test cor(X, wv2, 2) ≈ expected_cor2 + end - for corrected ∈ (false, true) - scc = SimpleCovariance(corrected=corrected) - @test_throws ArgumentError cov(scc, X, dims=0) - @test_throws ArgumentError cov(scc, X, wv1, dims=0) - @test cov(scc, X) ≈ cov(X, corrected=corrected) - @test cov(scc, X, mean=Xm1) ≈ StatsBase.covm(X, Xm1, corrected=corrected) - @test cov(scc, X, mean=Xm2, dims=2) ≈ StatsBase.covm(X, Xm2, 2, corrected=corrected) - if f !== weights || corrected === false - @test cov(scc, X, wv1, dims=1) ≈ cov(X, wv1, 1, corrected=corrected) - @test cov(scc, X, wv2, dims=2) ≈ cov(X, wv2, 2, corrected=corrected) - @test cov(scc, X, wv1, mean=Xm1) ≈ StatsBase.covm(X, Xm1, wv1, corrected=corrected) - @test cov(scc, X, wv2, mean=Xm2, dims=2) ≈ StatsBase.covm(X, Xm2, wv2, 2, corrected=corrected) + @testset "Abstract covariance estimation" begin + Xm1 = mean(X, dims=1) + Xm2 = mean(X, dims=2) + + for corrected in (false, true) + scc = SimpleCovariance(corrected=corrected) + @test_throws ArgumentError cov(scc, X, dims=0) + @test_throws ArgumentError cov(scc, X, wv1, dims=0) + @test cov(scc, X) ≈ cov(X, corrected=corrected) + @test cov(scc, X, mean=Xm1) ≈ StatsBase.covm(X, Xm1, corrected=corrected) + @test cov(scc, X, mean=Xm2, dims=2) ≈ + StatsBase.covm(X, Xm2, 2, corrected=corrected) + if f !== weights || corrected === false + @test cov(scc, X, wv1, dims=1) ≈ cov(X, wv1, 1, corrected=corrected) + @test cov(scc, X, wv2, dims=2) ≈ cov(X, wv2, 2, corrected=corrected) + @test cov(scc, X, wv1, mean=Xm1) ≈ + StatsBase.covm(X, Xm1, wv1, corrected=corrected) + @test cov(scc, X, wv2, mean=Xm2, dims=2) ≈ + StatsBase.covm(X, Xm2, wv2, 2, corrected=corrected) + end end end end -end - -@testset "Abstract covariance estimation" begin - est = EmptyCovarianceEstimator() - wv = fweights(rand(2)) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0]) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], dims = 2) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv, dims = 2) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], mean = nothing) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv, mean = nothing) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], dims = 2, mean = nothing) - @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv, dims = 2, mean = nothing) - @test_throws ErrorException cov(est, [1.0, 2.0], [3.0, 4.0]) - @test_throws ErrorException cov(est, [1.0, 2.0]) - - x = rand(8) - y = rand(8) - wv = fweights(rand(8)) - X = hcat(x, y) - - for corrected ∈ (false, true) - @test_throws MethodError SimpleCovariance(corrected) - scc = SimpleCovariance(corrected=corrected) - @test cov(scc, x) ≈ cov(x; corrected=corrected) - @test cov(scc, x, y) ≈ cov(x, y; corrected=corrected) - @test cov(scc, X) ≈ cov(X; corrected=corrected) - @test cov(scc, X, wv) ≈ cov(X, wv; corrected=corrected) - - @test var(scc, x) ≈ var(x; corrected=corrected) - @test std(scc, x) ≈ std(x; corrected=corrected) - - # NB That we should get the same correlation regardless of `corrected`, since it - # only affects the overall scale of the covariance. This cancels out when turning - # it into a correlation matrix. - @test cor(scc, x, y) ≈ cor(x, y) - @test cor(scc, X) ≈ cor(X) - @test cor(scc, X, wv) ≈ cor(X, wv) + + @testset "Abstract covariance estimation" begin + est = EmptyCovarianceEstimator() + wv = fweights(rand(2)) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0]) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], dims=2) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv, dims=2) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], mean=nothing) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv, mean=nothing) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], dims=2, mean=nothing) + @test_throws ErrorException cov(est, [1.0 2.0; 3.0 4.0], wv, dims=2, mean=nothing) + @test_throws ErrorException cov(est, [1.0, 2.0], [3.0, 4.0]) + @test_throws ErrorException cov(est, [1.0, 2.0]) + + x = rand(8) + y = rand(8) + wv = fweights(rand(8)) + X = hcat(x, y) + + for corrected in (false, true) + @test_throws MethodError SimpleCovariance(corrected) + scc = SimpleCovariance(corrected=corrected) + @test cov(scc, x) ≈ cov(x; corrected=corrected) + @test cov(scc, x, y) ≈ cov(x, y; corrected=corrected) + @test cov(scc, X) ≈ cov(X; corrected=corrected) + @test cov(scc, X, wv) ≈ cov(X, wv; corrected=corrected) + + @test var(scc, x) ≈ var(x; corrected=corrected) + @test std(scc, x) ≈ std(x; corrected=corrected) + + # NB That we should get the same correlation regardless of `corrected`, since it + # only affects the overall scale of the covariance. This cancels out when turning + # it into a correlation matrix. + @test cor(scc, x, y) ≈ cor(x, y) + @test cor(scc, X) ≈ cor(X) + @test cor(scc, X, wv) ≈ cor(X, wv) + end end -end end # @testset "StatsBase.Covariance" diff --git a/test/deviation.jl b/test/deviation.jl index 4ace5846d..a3acfc8aa 100644 --- a/test/deviation.jl +++ b/test/deviation.jl @@ -1,9 +1,13 @@ using StatsBase, OffsetArrays using Test -@testset "counting (arrays with element types $T1 and $T2)" for T1 in (Int, Float32, Float64), T2 in (Int, Float32, Float64) +@testset "counting (arrays with element types $T1 and $T2)" for T1 in + (Int, Float32, Float64), + T2 in + (Int, Float32, Float64) + a = T1[1, 2, 3, 4, 5, 6, 7] - b = T2[1, 3, 3, 4, 6, 7, 8] + b = T2[1, 3, 3, 4, 6, 7, 8] a_offset = OffsetArray(a, -5:1) b_offset = OffsetArray(b, -5:1) for (a, b) in ((a, b), (a_offset, b_offset)) @@ -23,13 +27,15 @@ using Test # Inconsistent lengths err = DimensionMismatch("Inconsistent array lengths.") - for (a, b) in ((a, empty_b), (empty_a, b), (a_offset, empty_b_offset), (empty_a_offset, b_offset)) + for (a, b) in + ((a, empty_b), (empty_a, b), (a_offset, empty_b_offset), (empty_a_offset, b_offset)) @test_throws err counteq(a, b) @test_throws err countne(a, b) end end -@testset "deviation (arrays with element types $T1 and $T2)" for T1 in (Float32, Float64), T2 in (Float32, Float64) +@testset "deviation (arrays with element types $T1 and $T2)" for T1 in (Float32, Float64), + T2 in (Float32, Float64) T = promote_type(T1, T2) a = rand(T1, 5, 6) b = rand(T2, 5, 6) @@ -37,18 +43,19 @@ end b_offset = OffsetArray(b, 5, -10) for (a, b) in ((a, b), (a_offset, b_offset)) @test @inferred(sqL2dist(a, b))::T ≈ sum(abs2.(a - b)) - @test @inferred(L2dist(a, b))::T ≈ sqrt(sqL2dist(a, b)) - @test @inferred(L1dist(a, b))::T ≈ sum(abs.(a - b)) + @test @inferred(L2dist(a, b))::T ≈ sqrt(sqL2dist(a, b)) + @test @inferred(L1dist(a, b))::T ≈ sum(abs.(a - b)) @test @inferred(Linfdist(a, b))::T ≈ maximum(abs.(a - b)) @test @inferred(gkldiv(a, b))::T ≈ sum(a .* log.(a ./ b) - a + b) - @test @inferred(meanad(a, b))::T ≈ mean(abs.(a - b)) - @test @inferred(maxad(a, b))::T ≈ maximum(abs.(a - b)) - @test @inferred(msd(a, b))::T ≈ mean(abs2.(a - b)) - @test @inferred(rmsd(a, b))::T ≈ sqrt(msd(a, b)) - @test @inferred(rmsd(a, b; normalize=true))::T ≈ rmsd(a, b) / (maximum(a) - minimum(a)) + @test @inferred(meanad(a, b))::T ≈ mean(abs.(a - b)) + @test @inferred(maxad(a, b))::T ≈ maximum(abs.(a - b)) + @test @inferred(msd(a, b))::T ≈ mean(abs2.(a - b)) + @test @inferred(rmsd(a, b))::T ≈ sqrt(msd(a, b)) + @test @inferred(rmsd(a, b; normalize=true))::T ≈ + rmsd(a, b) / (maximum(a) - minimum(a)) for T2 in (Int, Float32, Float64) S = promote_type(T, T2) - @test @inferred(psnr(a, b, T2(2)))::S ≈ 10 * log10(4 / msd(a, b)) + @test @inferred(psnr(a, b, T2(2)))::S ≈ 10 * log10(4 / msd(a, b)) end end @@ -75,7 +82,8 @@ end end err = DimensionMismatch("Inconsistent array lengths.") - for (a, b) in ((a, empty_b), (empty_a, b), (a_offset, empty_b_offset), (empty_a_offset, b_offset)) + for (a, b) in + ((a, empty_b), (empty_a, b), (a_offset, empty_b_offset), (empty_a_offset, b_offset)) @test_throws err sqL2dist(a, b) @test_throws err L2dist(a, b) @test_throws err L1dist(a, b) diff --git a/test/empirical.jl b/test/empirical.jl index 6af076611..eefc741c8 100644 --- a/test/empirical.jl +++ b/test/empirical.jl @@ -5,14 +5,15 @@ using Test x = randn(10000000) fnecdf = ecdf(x) y = [-1.96, -1.644854, -1.281552, -0.6744898, 0, 0.6744898, 1.281552, 1.644854, 1.96] - @test isapprox(fnecdf(y), [0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975], atol=1e-3) + @test isapprox(fnecdf(y), [0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975], + atol=1e-3) @test isapprox(fnecdf(1.96), 0.975, atol=1e-3) @test fnecdf(y) ≈ map(fnecdf, y) @test extrema(fnecdf) == (minimum(fnecdf), maximum(fnecdf)) == extrema(x) fnecdf = ecdf([0.5]) @test fnecdf([zeros(5000); ones(5000)]) == [zeros(5000); ones(5000)] @test extrema(fnecdf) == (minimum(fnecdf), maximum(fnecdf)) == (0.5, 0.5) - @test isnan(ecdf([1,2,3])(NaN)) + @test isnan(ecdf([1, 2, 3])(NaN)) @test_throws ArgumentError ecdf([1, NaN]) end @@ -27,7 +28,8 @@ end @test fnecdf.weights != w1 # check that w wasn't accidentally modified in place @test fnecdfalt.weights != w2 y = [-1.96, -1.644854, -1.281552, -0.6744898, 0, 0.6744898, 1.281552, 1.644854, 1.96] - @test isapprox(fnecdf(y), [0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975], atol=1e-3) + @test isapprox(fnecdf(y), [0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975], + atol=1e-3) @test isapprox(fnecdf(1.96), 0.975, atol=1e-3) @test fnecdf(y) ≈ map(fnecdf, y) @test extrema(fnecdf) == (minimum(fnecdf), maximum(fnecdf)) == extrema(x) diff --git a/test/hist.jl b/test/hist.jl index 5d82fe7f6..c89044d7d 100644 --- a/test/hist.jl +++ b/test/hist.jl @@ -2,270 +2,278 @@ using StatsBase using LinearAlgebra, Random, Test @testset "StatsBase.Histogram" begin - - -@testset "Histogram binindex and binvolume" begin - edg1 = -2:0.5:9 - edg1f0 = -2:0.5f0:9 - edg2 = [-2, -1, 2, 7, 19] - h1 = Histogram(edg1) - h2 = Histogram((edg1, edg2)) - h3 = Histogram((edg1f0, edg2)) - - @test h1 == Histogram(edg1, :left, false) - - @test @inferred StatsBase.binindex(h1, -0.5) == 4 - @test @inferred StatsBase.binindex(h2, (1.5, 2)) == (8, 3) - - @test [StatsBase.binvolume(h1, i) for i in axes(h1.weights, 1)] ≈ diff(edg1) - @test [StatsBase.binvolume(h2, (i,j)) for i in axes(h2.weights, 1), j in axes(h2.weights, 2)] ≈ diff(edg1) * diff(edg2)' - - @test typeof(@inferred(StatsBase.binvolume(h2, (1,1)))) == Float64 - @test typeof(@inferred(StatsBase.binvolume(h3, (1,1)))) == Float32 - @test typeof(@inferred(StatsBase.binvolume(Float64, h3, (1,1)))) == Float64 -end - - -@testset "Histogram append" begin - h = Histogram(0:20:100, Float64, :left, false) - @test @inferred(append!(h, 0:0.5:99.99)) == h - @test append!(Histogram(0:20:100, Float64, :left, false), 0:0.5:99.99).weights ≈ [40,40,40,40,40] - @test append!(Histogram(0:20:100, Float64, :left, true), 0:0.5:99.99).weights ≈ [2,2,2,2,2] - @test append!(Histogram(0:20:100, Float64, :left, false), 0:0.5:99.99, fill(2, 200)).weights ≈ [80,80,80,80,80] - @test append!(Histogram(0:20:100, Float64, :left, true), 0:0.5:99.99, fill(2, 200)).weights ≈ [4,4,4,4,4] -end - - -@testset "Histogram fit" begin - @test sum(fit(Histogram,[1,2,3]).weights) == 3 - @test fit(Histogram,Int[]).weights == Int[] - @test fit(Histogram,[1]).weights == [1] - @test fit(Histogram,[1,2,3],[0,2,4]) == Histogram([0,2,4],[1,2], :left) - @test fit(Histogram,[1,2,3],[0,2,4]) != Histogram([0,2,4],[1,1], :left) - @test fit(Histogram,[1,2,3],0:2:4) == Histogram(0:2:4,[1,2], :left) - @test all(fit(Histogram,[0:99;]/100,0.0:0.01:1.0).weights .==1) - @test fit(Histogram,[1,1,1,1,1]).weights[1] == 5 - @test sum(fit(Histogram,(rand(100),rand(100))).weights) == 100 - @test fit(Histogram,1:100,nbins=5,closed=:right).weights == [20,20,20,20,20] - @test fit(Histogram,1:100,nbins=5,closed=:left).weights == [19,20,20,20,20,1] - @test fit(Histogram,0:99,nbins=5,closed=:right).weights == [1,20,20,20,20,19] - @test fit(Histogram,0:99,nbins=5,closed=:left).weights == [20,20,20,20,20] - - @test fit(Histogram,(0:99,0:99),nbins=5).weights == Matrix(Diagonal([20,20,20,20,20])) - @test fit(Histogram,(0:99,0:99),nbins=(5,5)).weights == Matrix(Diagonal([20,20,20,20,20])) - - @test fit(Histogram,0:99,weights(ones(100)),nbins=5).weights == [20,20,20,20,20] - @test fit(Histogram,0:99,weights(2*ones(100)),nbins=5).weights == [40,40,40,40,40] - @test fit(Histogram{Int32},0:99,weights(2*ones(100)),nbins=5).weights == [40,40,40,40,40] - @test fit(Histogram{Float32},0:99,weights(2*ones(100)),nbins=5).weights == [40,40,40,40,40] - - d = collect(0:99) - v = view(d, fill(true, 100)) - @test fit(Histogram{Float32},v,weights(2*ones(100)),nbins=5).weights == [40,40,40,40,40] -end - - -@testset "Histogram element type" begin - @test eltype(@inferred(fit(Histogram,1:100,weights(ones(Int,100)),nbins=5)).weights) == Int - @test eltype(@inferred(fit(Histogram{Float32},1:100,weights(ones(Int,100)),nbins=5)).weights) == Float32 - @test eltype(@inferred(fit(Histogram,1:100,weights(ones(Float64,100)),nbins=5)).weights) == Float64 - @test eltype(@inferred(fit(Histogram{Float32},1:100,weights(ones(Float64,100)),nbins=5)).weights) == Float32 -end - - -@testset "histrange" begin - # Note: atm histrange must be qualified - @test @inferred(StatsBase.histrange(Float64[], 0, :left)) == 0.0:1.0:0.0 - @test StatsBase.histrange(Float64[1:5;], 1, :left) == 0.0:5.0:10.0 - @test StatsBase.histrange(Float64[1:10;], 1, :left) == 0.0:10.0:20.0 - @test StatsBase.histrange(1.0, 10.0, 1, :left) == 0.0:10.0:20.0 - - @test StatsBase.histrange([0.201,0.299], 10, :left) == 0.2:0.01:0.3 - @test StatsBase.histrange([0.2,0.299], 10, :left) == 0.2:0.01:0.3 - @test StatsBase.histrange([0.2,0.3], 10, :left) == 0.2:0.01:0.31 - @test StatsBase.histrange(0.2, 0.3, 10, :left) == 0.2:0.01:0.31 - @test StatsBase.histrange([0.2,0.3], 10, :right) == 0.19:0.01:0.3 - @test StatsBase.histrange(0.2, 0.3, 10, :right) == 0.19:0.01:0.3 - - @test StatsBase.histrange([200.1,299.9], 10, :left) == 200.0:10.0:300.0 - @test StatsBase.histrange([200.0,299.9], 10, :left) == 200.0:10.0:300.0 - @test StatsBase.histrange([200.0,300.0], 10, :left) == 200.0:10.0:310.0 - @test StatsBase.histrange([200.0,300.0], 10, :right) == 190.0:10.0:300.0 - - @test @inferred(StatsBase.histrange(Int64[1:5;], 1, :left)) == 0:5:10 - @test StatsBase.histrange(Int64[1:10;], 1, :left) == 0:10:20 - - @test StatsBase.histrange([0, 1, 2, 3], 4, :left) == 0.0:1.0:4.0 - @test StatsBase.histrange([0, 1, 1, 3], 4, :left) == 0.0:1.0:4.0 - @test StatsBase.histrange([0, 9], 4, :left) == 0.0:5.0:10.0 - @test StatsBase.histrange([0, 19], 4, :left) == 0.0:5.0:20.0 - @test StatsBase.histrange([0, 599], 4, :left) == 0.0:200.0:600.0 - @test StatsBase.histrange([-1, -1000], 4, :left) == -1000.0:500.0:0.0 - - # Base issue #13326 - l,h = extrema(StatsBase.histrange([typemin(Int),typemax(Int)], 10, :left)) - @test l <= typemin(Int) - @test h >= typemax(Int) - - # Issue 616/667 - @test StatsBase.histrange([1.0 for i in 1:100], 10, :left) == 1.0:1.0:2.0 - @test StatsBase.histrange([1.05 for i in 1:100], 10, :left) == 1.05:1.0:2.05 - - @test_throws ArgumentError StatsBase.histrange([1, 10], 0, :left) - @test_throws ArgumentError StatsBase.histrange([1, 10], -1, :left) - @test_throws ArgumentError StatsBase.histrange([1.0, 10.0], 0, :left) - @test_throws ArgumentError StatsBase.histrange([1.0, 10.0], -1, :left) - @test_throws ArgumentError StatsBase.histrange(Float64[],-1, :left) - @test_throws ArgumentError StatsBase.histrange([0.], 0, :left) -end - - -@testset "Histogram show" begin - # hist show - show_h = sprint(show, fit(Histogram,[0,1,2])) - @test occursin("edges:\n 0.0:1.0:3.0", show_h) - @test occursin("weights: $([1,1,1])", show_h) - @test occursin("closed: left", show_h) - @test occursin("isdensity: false", show_h) -end - - -@testset "Histogram norm and normalize" begin - rng = MersenneTwister(345678) - edges = ( - cumsum(rand(rng) * rand(rng, 9)), - cumsum(rand(rng, 1:10) * rand(rng, 1:100, 11)), - cumsum(5 * rand(rng) * rand(rng, 14)) - ) - - n = 100000 - - data = ( - maximum(edges[1]) .* (randn(rng, n) ./ 6 .+ 0.5), - rand(rng, 1:maximum(edges[2]), n), - maximum(edges[3]) .* rand(rng, n) - ) - - h = fit(Histogram, data, edges, closed = :left) - - weight_sum = sum(h.weights) - bin_vols = [ x * y * z for x in diff(edges[1]), y in diff(edges[2]), z in diff(edges[3])] - - @test norm(h) ≈ sum(h.weights .* bin_vols) - - @test @inferred(normalize(h, mode = :none)) == h - - - h_pdf = normalize(h, mode = :pdf) - @test h_pdf.weights ≈ h.weights ./ bin_vols ./ weight_sum - @test h_pdf.isdensity == true - @test @inferred(norm(h_pdf)) ≈ 1 -# @test @inferred(normalize(h_pdf, mode = :pdf)) == h_pdf - @test @inferred(normalize(h_pdf, mode = :density)) == h_pdf -# @test @inferred(normalize(h_pdf, mode = :probability)) == h_pdf - - h_density = normalize(h, mode = :density) - @test h_density.weights ≈ h.weights ./ bin_vols - @test h_density.isdensity == true - @test @inferred(norm(h_density)) ≈ weight_sum - @test @inferred(normalize(h_density, mode = :pdf)) == - Histogram(h_density.edges, h_density.weights .* (1/norm(h_density)), h_density.closed, true) - @test normalize(h_density, mode = :pdf).weights ≈ h_pdf.weights - @test normalize(h_density, mode = :density) == h_density - @test normalize(h_density, mode = :probability).weights ≈ h_pdf.weights - - h_fraction = normalize(h, mode = :probability) - @test sum(h_fraction.weights) ≈ 1 - @test h_fraction.isdensity == false - @test normalize(h_fraction, mode = :pdf).weights ≈ h_pdf.weights - @test normalize(h_fraction, mode = :density).weights ≈ h_pdf.weights - @test normalize(h_fraction, mode = :probability).weights ≈ h_fraction.weights - - h_copy = deepcopy(float(h)) - @test @inferred(normalize!(h_copy, mode = :density)) == h_copy - - h2 = deepcopy(float(h)) - mod_h2 = normalize!(h2, mode = :density) - @test mod_h2 === h2 && mod_h2.weights === h2.weights - @test h2.weights == h_density.weights - - aux_weights = sqrt.(h.weights) - divor0 = (a,b) -> (a == 0 && b == 0) ? 0 : a/b - divor0_cmp = (a_n, a_d, b_n, b_d) -> maximum(abs.(map(divor0, a_n, a_d) - map(divor0, b_n, b_d))) < 1e-10 - - h_pdf2, h_pdf2_aux = normalize(float(h), aux_weights, mode = :pdf) - @test divor0_cmp(h_pdf2_aux, aux_weights, h_pdf2.weights, h.weights) - - h_density2, h_density2_aux = normalize(float(h), aux_weights, mode = :density) - @test divor0_cmp(h_density2_aux, aux_weights, h_density2.weights, h.weights) - - h_density3, h_density3_aux = normalize(h_density2, h_density2_aux, mode = :pdf) - @test divor0_cmp(h_density3_aux, h_density2_aux, h_density3.weights, h_density2.weights) -end - - -@testset "Histogram zero" begin - h = fit(Histogram, (rand(100), rand(100))) - h2 = @inferred zero(h) - @test all(x -> x≈0, h2.weights) - @test !(h.weights === h2.weights) - @test h.edges == h2.edges - @test h.closed == h2.closed - @test h.isdensity == h2.isdensity -end - - -@testset "Histogram merge" begin - histograms = [fit(Histogram, (rand(100), 10 * rand(100)), (0:0.1:1, 0:1:10)) for _ in 1:10] - h = zero(histograms[1]) - merge!(h, histograms ...) - @test h.weights == (+).((x->x.weights).(histograms)...) - @test (@inferred merge(histograms...)) == h -end - -@testset "midpoints" begin - @test StatsBase.midpoints([1, 2, 4]) == [1.5, 3.0] - @test StatsBase.midpoints(range(0, stop = 1, length = 5)) == 0.125:0.25:0.875 -end - -@testset "histogram with -0.0" begin - @test fit(Histogram, [-0.0, 1.0]) == fit(Histogram, [0.0, 1.0]) - @test fit(Histogram, [-0.0, 1.0], closed=:right) == - fit(Histogram, [0.0, 1.0], closed=:right) - @test fit(Histogram, [-0.0, -1.0]) == fit(Histogram, [0.0, -1.0]) - @test fit(Histogram, [-0.0, -1.0], closed=:right) == - fit(Histogram, [0.0, -1.0], closed=:right) - - @test fit(Histogram, [-0.0, 1.0], [-0.0, 0.5]) == - fit(Histogram, [0.0, 1.0], [0.0, 0.5]) == - fit(Histogram, [-0.0, 1.0], [0.0, 0.5]) == - fit(Histogram, [0.0, 1.0], [-0.0, 0.5]) == - fit(Histogram, [0.0, 1.0], 0.0:0.5:0.5) == - fit(Histogram, [-0.0, 1.0], 0.0:0.5:0.5) - @test fit(Histogram, [-0.0, 1.0], [-0.5, -0.0]) == - fit(Histogram, [0.0, 1.0], [-0.5, -0.0]) == - fit(Histogram, [-0.0, 1.0], [-0.5, 0.0]) == - fit(Histogram, [0.0, 1.0], [-0.5, 0.0]) == - fit(Histogram, [-0.0, 1.0], -0.5:0.5:0.0) == - fit(Histogram, [0.0, 1.0], -0.5:0.5:0.0) - @test fit(Histogram, [-0.0, 1.0], [-0.5, -0.0], closed=:right) == - fit(Histogram, [0.0, 1.0], [-0.5, 0.0], closed=:right) == - fit(Histogram, [0.0, 1.0], -0.5:0.5:0.0, closed=:right) - @test fit(Histogram, [-0.0, 1.0], [-0.0, 0.5], closed=:right) == - fit(Histogram, [0.0, 1.0], [0.0, 0.5], closed=:right) == - fit(Histogram, [0.0, 1.0], [-0.0, 0.5], closed=:right) == - fit(Histogram, [-0.0, 1.0], [0.0, 0.5], closed=:right) == - fit(Histogram, [0.0, 1.0], 0.0:0.5:0.5, closed=:right) == - fit(Histogram, [-0.0, 1.0], 0.0:0.5:0.5, closed=:right) - @test fit(Histogram, [-0.0, 1.0], [-0.5, -0.0], closed=:right) == - fit(Histogram, [0.0, 1.0], [-0.5, 0.0], closed=:right) == - fit(Histogram, [0.0, 1.0], [-0.5, -0.0], closed=:right) == - fit(Histogram, [-0.0, 1.0], [-0.5, 0.0], closed=:right) == - fit(Histogram, [0.0, 1.0], -0.5:0.5:0.0, closed=:right) == - fit(Histogram, [-0.0, 1.0], -0.5:0.5:0.0, closed=:right) - - @test_throws ArgumentError fit(Histogram, [-0.5], LinRange(-1.0, -0.0, 3)) - @test_throws ArgumentError fit(Histogram, [-0.5], UnitRange(-0.0, 1.0)) -end - + @testset "Histogram binindex and binvolume" begin + edg1 = -2:0.5:9 + edg1f0 = -2:0.5f0:9 + edg2 = [-2, -1, 2, 7, 19] + h1 = Histogram(edg1) + h2 = Histogram((edg1, edg2)) + h3 = Histogram((edg1f0, edg2)) + + @test h1 == Histogram(edg1, :left, false) + + @test @inferred StatsBase.binindex(h1, -0.5) == 4 + @test @inferred StatsBase.binindex(h2, (1.5, 2)) == (8, 3) + + @test [StatsBase.binvolume(h1, i) for i in axes(h1.weights, 1)] ≈ diff(edg1) + @test [StatsBase.binvolume(h2, (i, j)) + for i in axes(h2.weights, 1), j in axes(h2.weights, 2)] ≈ + diff(edg1) * diff(edg2)' + + @test typeof(@inferred(StatsBase.binvolume(h2, (1, 1)))) == Float64 + @test typeof(@inferred(StatsBase.binvolume(h3, (1, 1)))) == Float32 + @test typeof(@inferred(StatsBase.binvolume(Float64, h3, (1, 1)))) == Float64 + end + + @testset "Histogram append" begin + h = Histogram(0:20:100, Float64, :left, false) + @test @inferred(append!(h, 0:0.5:99.99)) == h + @test append!(Histogram(0:20:100, Float64, :left, false), 0:0.5:99.99).weights ≈ + [40, 40, 40, 40, 40] + @test append!(Histogram(0:20:100, Float64, :left, true), 0:0.5:99.99).weights ≈ + [2, 2, 2, 2, 2] + @test append!(Histogram(0:20:100, Float64, :left, false), 0:0.5:99.99, + fill(2, 200)).weights ≈ [80, 80, 80, 80, 80] + @test append!(Histogram(0:20:100, Float64, :left, true), 0:0.5:99.99, + fill(2, 200)).weights ≈ [4, 4, 4, 4, 4] + end + + @testset "Histogram fit" begin + @test sum(fit(Histogram, [1, 2, 3]).weights) == 3 + @test fit(Histogram, Int[]).weights == Int[] + @test fit(Histogram, [1]).weights == [1] + @test fit(Histogram, [1, 2, 3], [0, 2, 4]) == Histogram([0, 2, 4], [1, 2], :left) + @test fit(Histogram, [1, 2, 3], [0, 2, 4]) != Histogram([0, 2, 4], [1, 1], :left) + @test fit(Histogram, [1, 2, 3], 0:2:4) == Histogram(0:2:4, [1, 2], :left) + @test all(fit(Histogram, [0:99;]/100, 0.0:0.01:1.0).weights .== 1) + @test fit(Histogram, [1, 1, 1, 1, 1]).weights[1] == 5 + @test sum(fit(Histogram, (rand(100), rand(100))).weights) == 100 + @test fit(Histogram, 1:100, nbins=5, closed=:right).weights == [20, 20, 20, 20, 20] + @test fit(Histogram, 1:100, nbins=5, closed=:left).weights == + [19, 20, 20, 20, 20, 1] + @test fit(Histogram, 0:99, nbins=5, closed=:right).weights == + [1, 20, 20, 20, 20, 19] + @test fit(Histogram, 0:99, nbins=5, closed=:left).weights == [20, 20, 20, 20, 20] + + @test fit(Histogram, (0:99, 0:99), nbins=5).weights == + Matrix(Diagonal([20, 20, 20, 20, 20])) + @test fit(Histogram, (0:99, 0:99), nbins=(5, 5)).weights == + Matrix(Diagonal([20, 20, 20, 20, 20])) + + @test fit(Histogram, 0:99, weights(ones(100)), nbins=5).weights == + [20, 20, 20, 20, 20] + @test fit(Histogram, 0:99, weights(2*ones(100)), nbins=5).weights == + [40, 40, 40, 40, 40] + @test fit(Histogram{Int32}, 0:99, weights(2*ones(100)), nbins=5).weights == + [40, 40, 40, 40, 40] + @test fit(Histogram{Float32}, 0:99, weights(2*ones(100)), nbins=5).weights == + [40, 40, 40, 40, 40] + + d = collect(0:99) + v = view(d, fill(true, 100)) + @test fit(Histogram{Float32}, v, weights(2*ones(100)), nbins=5).weights == + [40, 40, 40, 40, 40] + end + + @testset "Histogram element type" begin + @test eltype(@inferred(fit(Histogram, 1:100, weights(ones(Int, 100)), nbins=5)).weights) == + Int + @test eltype(@inferred(fit(Histogram{Float32}, 1:100, weights(ones(Int, 100)), + nbins=5)).weights) == Float32 + @test eltype(@inferred(fit(Histogram, 1:100, weights(ones(Float64, 100)), nbins=5)).weights) == + Float64 + @test eltype(@inferred(fit(Histogram{Float32}, 1:100, weights(ones(Float64, 100)), + nbins=5)).weights) == Float32 + end + + @testset "histrange" begin + # Note: atm histrange must be qualified + @test @inferred(StatsBase.histrange(Float64[], 0, :left)) == 0.0:1.0:0.0 + @test StatsBase.histrange(Float64[1:5;], 1, :left) == 0.0:5.0:10.0 + @test StatsBase.histrange(Float64[1:10;], 1, :left) == 0.0:10.0:20.0 + @test StatsBase.histrange(1.0, 10.0, 1, :left) == 0.0:10.0:20.0 + + @test StatsBase.histrange([0.201, 0.299], 10, :left) == 0.2:0.01:0.3 + @test StatsBase.histrange([0.2, 0.299], 10, :left) == 0.2:0.01:0.3 + @test StatsBase.histrange([0.2, 0.3], 10, :left) == 0.2:0.01:0.31 + @test StatsBase.histrange(0.2, 0.3, 10, :left) == 0.2:0.01:0.31 + @test StatsBase.histrange([0.2, 0.3], 10, :right) == 0.19:0.01:0.3 + @test StatsBase.histrange(0.2, 0.3, 10, :right) == 0.19:0.01:0.3 + + @test StatsBase.histrange([200.1, 299.9], 10, :left) == 200.0:10.0:300.0 + @test StatsBase.histrange([200.0, 299.9], 10, :left) == 200.0:10.0:300.0 + @test StatsBase.histrange([200.0, 300.0], 10, :left) == 200.0:10.0:310.0 + @test StatsBase.histrange([200.0, 300.0], 10, :right) == 190.0:10.0:300.0 + + @test @inferred(StatsBase.histrange(Int64[1:5;], 1, :left)) == 0:5:10 + @test StatsBase.histrange(Int64[1:10;], 1, :left) == 0:10:20 + + @test StatsBase.histrange([0, 1, 2, 3], 4, :left) == 0.0:1.0:4.0 + @test StatsBase.histrange([0, 1, 1, 3], 4, :left) == 0.0:1.0:4.0 + @test StatsBase.histrange([0, 9], 4, :left) == 0.0:5.0:10.0 + @test StatsBase.histrange([0, 19], 4, :left) == 0.0:5.0:20.0 + @test StatsBase.histrange([0, 599], 4, :left) == 0.0:200.0:600.0 + @test StatsBase.histrange([-1, -1000], 4, :left) == -1000.0:500.0:0.0 + + # Base issue #13326 + l, h = extrema(StatsBase.histrange([typemin(Int), typemax(Int)], 10, :left)) + @test l <= typemin(Int) + @test h >= typemax(Int) + + # Issue 616/667 + @test StatsBase.histrange([1.0 for i in 1:100], 10, :left) == 1.0:1.0:2.0 + @test StatsBase.histrange([1.05 for i in 1:100], 10, :left) == 1.05:1.0:2.05 + + @test_throws ArgumentError StatsBase.histrange([1, 10], 0, :left) + @test_throws ArgumentError StatsBase.histrange([1, 10], -1, :left) + @test_throws ArgumentError StatsBase.histrange([1.0, 10.0], 0, :left) + @test_throws ArgumentError StatsBase.histrange([1.0, 10.0], -1, :left) + @test_throws ArgumentError StatsBase.histrange(Float64[], -1, :left) + @test_throws ArgumentError StatsBase.histrange([0.0], 0, :left) + end + + @testset "Histogram show" begin + # hist show + show_h = sprint(show, fit(Histogram, [0, 1, 2])) + @test occursin("edges:\n 0.0:1.0:3.0", show_h) + @test occursin("weights: $([1,1,1])", show_h) + @test occursin("closed: left", show_h) + @test occursin("isdensity: false", show_h) + end + + @testset "Histogram norm and normalize" begin + rng = MersenneTwister(345678) + edges = (cumsum(rand(rng) * rand(rng, 9)), + cumsum(rand(rng, 1:10) * rand(rng, 1:100, 11)), + cumsum(5 * rand(rng) * rand(rng, 14))) + + n = 100000 + + data = (maximum(edges[1]) .* (randn(rng, n) ./ 6 .+ 0.5), + rand(rng, 1:maximum(edges[2]), n), + maximum(edges[3]) .* rand(rng, n)) + + h = fit(Histogram, data, edges, closed=:left) + + weight_sum = sum(h.weights) + bin_vols = [x * y * z + for x in diff(edges[1]), y in diff(edges[2]), z in diff(edges[3])] + + @test norm(h) ≈ sum(h.weights .* bin_vols) + + @test @inferred(normalize(h, mode=:none)) == h + + h_pdf = normalize(h, mode=:pdf) + @test h_pdf.weights ≈ h.weights ./ bin_vols ./ weight_sum + @test h_pdf.isdensity == true + @test @inferred(norm(h_pdf)) ≈ 1 + # @test @inferred(normalize(h_pdf, mode = :pdf)) == h_pdf + @test @inferred(normalize(h_pdf, mode=:density)) == h_pdf + # @test @inferred(normalize(h_pdf, mode = :probability)) == h_pdf + + h_density = normalize(h, mode=:density) + @test h_density.weights ≈ h.weights ./ bin_vols + @test h_density.isdensity == true + @test @inferred(norm(h_density)) ≈ weight_sum + @test @inferred(normalize(h_density, mode=:pdf)) == + Histogram(h_density.edges, h_density.weights .* (1/norm(h_density)), + h_density.closed, true) + @test normalize(h_density, mode=:pdf).weights ≈ h_pdf.weights + @test normalize(h_density, mode=:density) == h_density + @test normalize(h_density, mode=:probability).weights ≈ h_pdf.weights + + h_fraction = normalize(h, mode=:probability) + @test sum(h_fraction.weights) ≈ 1 + @test h_fraction.isdensity == false + @test normalize(h_fraction, mode=:pdf).weights ≈ h_pdf.weights + @test normalize(h_fraction, mode=:density).weights ≈ h_pdf.weights + @test normalize(h_fraction, mode=:probability).weights ≈ h_fraction.weights + + h_copy = deepcopy(float(h)) + @test @inferred(normalize!(h_copy, mode=:density)) == h_copy + + h2 = deepcopy(float(h)) + mod_h2 = normalize!(h2, mode=:density) + @test mod_h2 === h2 && mod_h2.weights === h2.weights + @test h2.weights == h_density.weights + + aux_weights = sqrt.(h.weights) + divor0 = (a, b) -> (a == 0 && b == 0) ? 0 : a/b + divor0_cmp = (a_n, a_d, b_n, b_d) -> maximum(abs.(map(divor0, a_n, a_d) - + map(divor0, b_n, b_d))) < 1e-10 + + h_pdf2, h_pdf2_aux = normalize(float(h), aux_weights, mode=:pdf) + @test divor0_cmp(h_pdf2_aux, aux_weights, h_pdf2.weights, h.weights) + + h_density2, h_density2_aux = normalize(float(h), aux_weights, mode=:density) + @test divor0_cmp(h_density2_aux, aux_weights, h_density2.weights, h.weights) + + h_density3, h_density3_aux = normalize(h_density2, h_density2_aux, mode=:pdf) + @test divor0_cmp(h_density3_aux, h_density2_aux, h_density3.weights, + h_density2.weights) + end + + @testset "Histogram zero" begin + h = fit(Histogram, (rand(100), rand(100))) + h2 = @inferred zero(h) + @test all(x -> x≈0, h2.weights) + @test !(h.weights === h2.weights) + @test h.edges == h2.edges + @test h.closed == h2.closed + @test h.isdensity == h2.isdensity + end + + @testset "Histogram merge" begin + histograms = [fit(Histogram, (rand(100), 10 * rand(100)), (0:0.1:1, 0:1:10)) + for _ in 1:10] + h = zero(histograms[1]) + merge!(h, histograms ...) + @test h.weights == (+).((x->x.weights).(histograms)...) + @test (@inferred merge(histograms...)) == h + end + + @testset "midpoints" begin + @test StatsBase.midpoints([1, 2, 4]) == [1.5, 3.0] + @test StatsBase.midpoints(range(0, stop=1, length=5)) == 0.125:0.25:0.875 + end + + @testset "histogram with -0.0" begin + @test fit(Histogram, [-0.0, 1.0]) == fit(Histogram, [0.0, 1.0]) + @test fit(Histogram, [-0.0, 1.0], closed=:right) == + fit(Histogram, [0.0, 1.0], closed=:right) + @test fit(Histogram, [-0.0, -1.0]) == fit(Histogram, [0.0, -1.0]) + @test fit(Histogram, [-0.0, -1.0], closed=:right) == + fit(Histogram, [0.0, -1.0], closed=:right) + + @test fit(Histogram, [-0.0, 1.0], [-0.0, 0.5]) == + fit(Histogram, [0.0, 1.0], [0.0, 0.5]) == + fit(Histogram, [-0.0, 1.0], [0.0, 0.5]) == + fit(Histogram, [0.0, 1.0], [-0.0, 0.5]) == + fit(Histogram, [0.0, 1.0], 0.0:0.5:0.5) == + fit(Histogram, [-0.0, 1.0], 0.0:0.5:0.5) + @test fit(Histogram, [-0.0, 1.0], [-0.5, -0.0]) == + fit(Histogram, [0.0, 1.0], [-0.5, -0.0]) == + fit(Histogram, [-0.0, 1.0], [-0.5, 0.0]) == + fit(Histogram, [0.0, 1.0], [-0.5, 0.0]) == + fit(Histogram, [-0.0, 1.0], -0.5:0.5:0.0) == + fit(Histogram, [0.0, 1.0], -0.5:0.5:0.0) + @test fit(Histogram, [-0.0, 1.0], [-0.5, -0.0], closed=:right) == + fit(Histogram, [0.0, 1.0], [-0.5, 0.0], closed=:right) == + fit(Histogram, [0.0, 1.0], -0.5:0.5:0.0, closed=:right) + @test fit(Histogram, [-0.0, 1.0], [-0.0, 0.5], closed=:right) == + fit(Histogram, [0.0, 1.0], [0.0, 0.5], closed=:right) == + fit(Histogram, [0.0, 1.0], [-0.0, 0.5], closed=:right) == + fit(Histogram, [-0.0, 1.0], [0.0, 0.5], closed=:right) == + fit(Histogram, [0.0, 1.0], 0.0:0.5:0.5, closed=:right) == + fit(Histogram, [-0.0, 1.0], 0.0:0.5:0.5, closed=:right) + @test fit(Histogram, [-0.0, 1.0], [-0.5, -0.0], closed=:right) == + fit(Histogram, [0.0, 1.0], [-0.5, 0.0], closed=:right) == + fit(Histogram, [0.0, 1.0], [-0.5, -0.0], closed=:right) == + fit(Histogram, [-0.0, 1.0], [-0.5, 0.0], closed=:right) == + fit(Histogram, [0.0, 1.0], -0.5:0.5:0.0, closed=:right) == + fit(Histogram, [-0.0, 1.0], -0.5:0.5:0.0, closed=:right) + + @test_throws ArgumentError fit(Histogram, [-0.5], LinRange(-1.0, -0.0, 3)) + @test_throws ArgumentError fit(Histogram, [-0.5], UnitRange(-0.0, 1.0)) + end end # @testset "StatsBase.Histogram" diff --git a/test/jet.jl b/test/jet.jl index 78d06ebc7..53572ef7a 100644 --- a/test/jet.jl +++ b/test/jet.jl @@ -1,19 +1,20 @@ using StatsBase, Test -import JET +using JET: JET # JET has only experimental support for Julia 1.12 currently # It throws an internal `AssertionError` in the tests below if VERSION < v"1.12-" @testset "JET" begin # Check that there are no undefined global references and undefined field accesses - JET.test_package("StatsBase"; target_defined_modules = true, mode = :typo) + JET.test_package("StatsBase"; target_defined_modules=true, mode=:typo) # Default error analysis for common problem fails since JET errors on interface definitions # The (deprecated) `model_response(::StatisticalModel)` calls the interface # function `response(::StatisticalModel)` for which no method exists yet # Note: This analysis is not enough strict to guarantee that there are no runtime errors! # Ref https://github.com/aviatesk/JET.jl/issues/495 - res = JET.report_package("StatsBase"; target_defined_modules = true, mode = :basic, toplevel_logger = nothing) + res = JET.report_package("StatsBase"; target_defined_modules=true, mode=:basic, + toplevel_logger=nothing) println(res) reports = JET.get_reports(res) @test_broken isempty(reports) diff --git a/test/misc.jl b/test/misc.jl index 288a76f40..4bb8c507e 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -37,9 +37,9 @@ b = [true, false, false, true, false, true, true, false] # indicatormat -II = [false true false false false; - true false false false true; - false false true true false] +II = [false true false false false; + true false false false true; + false false true true false] x = [2, 1, 3, 3, 2] @test indicatormat(x, 3) == II diff --git a/test/moments.jl b/test/moments.jl index a3402b965..36b48fb80 100644 --- a/test/moments.jl +++ b/test/moments.jl @@ -3,482 +3,562 @@ using Statistics using Test @testset "StatsBase.Moments" begin -weight_funcs = (weights, aweights, fweights, pweights) - -##### weighted var & std - -x = [0.57, 0.10, 0.91, 0.72, 0.46, 0.0] -xf0 = Float32.(x) -w = [3.84, 2.70, 8.29, 8.91, 9.71, 0.0] -wf0 = Float32.(w) - -@testset "Uncorrected with $f (values of type $(eltype(x)), weights of type $(eltype(w)))" for f in weight_funcs, x in (x, xf0), w in (w, wf0) - TX = eltype(x) - T = promote_type(TX, eltype(w)) - wv = f(w) - m = @inferred(mean(x, wv))::T - - # expected uncorrected output - expected_var = sum(abs2.(x .- m), wv) / sum(wv) - expected_std = sqrt.(expected_var) - - @testset "Variance" begin - @test @inferred(var(x, wv; corrected=false))::T ≈ expected_var - @test @inferred(var(x, wv; mean=m, corrected=false))::T ≈ expected_var - @test @inferred(varm(x, wv, m; corrected=false))::T ≈ expected_var - end - - @testset "Standard Deviation" begin - @test @inferred(std(x, wv; corrected=false))::T ≈ expected_std - @test @inferred(std(x, wv; mean=m, corrected=false))::T ≈ expected_std - @test @inferred(stdm(x, wv, m; corrected=false))::T ≈ expected_std - end - - @testset "Mean and Variance" begin - (m, v) = @inferred(mean_and_var(x; corrected=false))::Tuple{TX,TX} - @test m == mean(x) - @test v == var(x; corrected=corrected=false) - - (m, v) = @inferred(mean_and_var(x, wv; corrected=false))::Tuple{T,T} - @test m == mean(x, wv) - @test v == var(x, wv; corrected=false) - end - - @testset "Mean and Standard Deviation" begin - (m, s) = @inferred(mean_and_std(x; corrected=false))::Tuple{TX,TX} - @test m == mean(x) - @test s == std(x; corrected=false) - - (m, s) = @inferred(mean_and_std(x, wv; corrected=false))::Tuple{T,T} - @test m == mean(x, wv) - @test s == std(x, wv; corrected=false) - end -end - -# expected corrected output for (weights, aweights, fweights, pweights) -expected_var = [NaN, 0.0694434191182236, 0.05466601256158146, 0.06628969012045285] -expected_std = sqrt.(expected_var) - -@testset "Corrected with $(weight_funcs[i]) (values of type $(eltype(x)), weights of type $(eltype(w)))" for i in eachindex(weight_funcs), x in (x, xf0), w in (w, wf0) - TX = eltype(x) - TW = eltype(w) - T = promote_type(TX, TW) - TR = TX === Float32 || TW === Float32 ? Float32 : Float64 - wv = weight_funcs[i](w) - m = @inferred(mean(x, wv))::T - - @testset "Variance" begin - if isa(wv, Weights) - @test_throws ArgumentError var(x, wv; corrected=true) - else - @test @inferred(var(x, wv; corrected=true))::T ≈ TR(expected_var[i]) - @test @inferred(var(x, wv; mean=m, corrected=true))::T ≈ TR(expected_var[i]) - @test @inferred(varm(x, wv, m; corrected=true))::T ≈ TR(expected_var[i]) + weight_funcs = (weights, aweights, fweights, pweights) + + ##### weighted var & std + + x = [0.57, 0.10, 0.91, 0.72, 0.46, 0.0] + xf0 = Float32.(x) + w = [3.84, 2.70, 8.29, 8.91, 9.71, 0.0] + wf0 = Float32.(w) + + @testset "Uncorrected with $f (values of type $(eltype(x)), weights of type $(eltype(w)))" for f in + weight_funcs, + x in + (x, + xf0), + w in + (w, + wf0) + + TX = eltype(x) + T = promote_type(TX, eltype(w)) + wv = f(w) + m = @inferred(mean(x, wv))::T + + # expected uncorrected output + expected_var = sum(abs2.(x .- m), wv) / sum(wv) + expected_std = sqrt.(expected_var) + + @testset "Variance" begin + @test @inferred(var(x, wv; corrected=false))::T ≈ expected_var + @test @inferred(var(x, wv; mean=m, corrected=false))::T ≈ expected_var + @test @inferred(varm(x, wv, m; corrected=false))::T ≈ expected_var end - end - @testset "Standard Deviation" begin - if isa(wv, Weights) - @test_throws ArgumentError std(x, wv; corrected=true) - else - @test @inferred(std(x, wv; corrected=true))::T ≈ TR(expected_std[i]) - @test @inferred(std(x, wv; mean=m, corrected=true))::T ≈ TR(expected_std[i]) - @test @inferred(stdm(x, wv, m; corrected=true))::T ≈ TR(expected_std[i]) + @testset "Standard Deviation" begin + @test @inferred(std(x, wv; corrected=false))::T ≈ expected_std + @test @inferred(std(x, wv; mean=m, corrected=false))::T ≈ expected_std + @test @inferred(stdm(x, wv, m; corrected=false))::T ≈ expected_std end - end - @testset "Mean and Variance" begin - (m, v) = @inferred(mean_and_var(x; corrected=true))::Tuple{TX,TX} - @test m == mean(x) - @test v == var(x; corrected=true) + @testset "Mean and Variance" begin + (m, v) = @inferred(mean_and_var(x; corrected=false))::Tuple{TX,TX} + @test m == mean(x) + @test v == var(x; corrected=corrected=false) - if isa(wv, Weights) - @test_throws ArgumentError mean_and_var(x, wv; corrected=true) - else - (m, v) = @inferred(mean_and_var(x, wv; corrected=true))::Tuple{T,T} + (m, v) = @inferred(mean_and_var(x, wv; corrected=false))::Tuple{T,T} @test m == mean(x, wv) - @test v == var(x, wv; corrected=true) + @test v == var(x, wv; corrected=false) end - end - @testset "Mean and Standard Deviation" begin - (m, s) = @inferred(mean_and_std(x; corrected=true))::Tuple{TX,TX} - @test m == mean(x) - @test s == std(x; corrected=true) + @testset "Mean and Standard Deviation" begin + (m, s) = @inferred(mean_and_std(x; corrected=false))::Tuple{TX,TX} + @test m == mean(x) + @test s == std(x; corrected=false) - if isa(wv, Weights) - @test_throws ArgumentError mean_and_std(x, wv; corrected=true) - else - (m, s) = @inferred(mean_and_std(x, wv; corrected=true))::Tuple{T,T} + (m, s) = @inferred(mean_and_std(x, wv; corrected=false))::Tuple{T,T} @test m == mean(x, wv) - @test s == std(x, wv; corrected=true) + @test s == std(x, wv; corrected=false) end end -end - -x = rand(5, 6) -xf0 = Float32.(x) -w1 = [0.57, 5.10, 0.91, 1.72, 0.0] -w1f0 = Float32.(w1) -w2 = [3.84, 2.70, 8.29, 8.91, 9.71, 0.0] -w2f0 = Float32.(w2) - -@testset "Uncorrected with $f (values of type $(eltype(x)), 1st weights of type $(eltype(w1)), 2nd weights of type $(eltype(w2)))" for f in weight_funcs, x in (x, xf0), w1 in (w1, w1f0), w2 in (w2, w2f0) - TX = eltype(x) - TW1 = eltype(w1) - TW2 = eltype(w2) - T1 = promote_type(TX, TW1) - T2 = promote_type(TX, TW2) - wv1 = f(w1) - wv2 = f(w2) - m1 = @inferred(mean(x, wv1, dims=1))::Matrix{T1} - m2 = @inferred(mean(x, wv2, dims=2))::Matrix{T2} - - expected_var1 = sum(abs2.(x .- m1) .* w1, dims = 1) ./ sum(wv1) - expected_var2 = sum(abs2.(x .- m2) .* w2', dims = 2) ./ sum(wv2) - expected_std1 = sqrt.(expected_var1) - expected_std2 = sqrt.(expected_var2) - - @testset "Variance" begin - @test @inferred(var(x, wv1, 1; corrected=false))::Matrix{T1} ≈ expected_var1 - @test @inferred(var(x, wv2, 2; corrected=false))::Matrix{T2} ≈ expected_var2 - @test @inferred(var(x, wv1, 1; mean=m1, corrected=false))::Matrix{T1} ≈ expected_var1 - @test @inferred(var(x, wv2, 2; mean=m2, corrected=false))::Matrix{T2} ≈ expected_var2 - @test @inferred(varm(x, wv1, m1, 1; corrected=false))::Matrix{T1} ≈ expected_var1 - @test @inferred(varm(x, wv2, m2, 2; corrected=false))::Matrix{T2} ≈ expected_var2 - end - @testset "Standard Deviation" begin - @test @inferred(std(x, wv1, 1; corrected=false))::Matrix{T1} ≈ expected_std1 - @test @inferred(std(x, wv2, 2; corrected=false))::Matrix{T2} ≈ expected_std2 - @test @inferred(std(x, wv1, 1; mean=m1, corrected=false))::Matrix{T1} ≈ expected_std1 - @test @inferred(std(x, wv2, 2; mean=m2, corrected=false))::Matrix{T2} ≈ expected_std2 - @test @inferred(stdm(x, wv1, m1, 1; corrected=false))::Matrix{T1} ≈ expected_std1 - @test @inferred(stdm(x, wv2, m2, 2; corrected=false))::Matrix{T2} ≈ expected_std2 - end + # expected corrected output for (weights, aweights, fweights, pweights) + expected_var = [NaN, 0.0694434191182236, 0.05466601256158146, 0.06628969012045285] + expected_std = sqrt.(expected_var) - @testset "Mean and Variance" begin - for d in 1:2 - (m, v) = @inferred(mean_and_var(x, d; corrected=false))::Tuple{Matrix{TX},Matrix{TX}} - @test m == mean(x, dims=d) - @test v == var(x, dims=d, corrected=false) + @testset "Corrected with $(weight_funcs[i]) (values of type $(eltype(x)), weights of type $(eltype(w)))" for i in + eachindex(weight_funcs), + x in + (x, + xf0), + w in + (w, + wf0) + + TX = eltype(x) + TW = eltype(w) + T = promote_type(TX, TW) + TR = TX === Float32 || TW === Float32 ? Float32 : Float64 + wv = weight_funcs[i](w) + m = @inferred(mean(x, wv))::T + + @testset "Variance" begin + if isa(wv, Weights) + @test_throws ArgumentError var(x, wv; corrected=true) + else + @test @inferred(var(x, wv; corrected=true))::T ≈ TR(expected_var[i]) + @test @inferred(var(x, wv; mean=m, corrected=true))::T ≈ TR(expected_var[i]) + @test @inferred(varm(x, wv, m; corrected=true))::T ≈ TR(expected_var[i]) + end end - (m, v) = @inferred(mean_and_var(x, wv1, 1; corrected=false))::Tuple{Matrix{T1},Matrix{T1}} - @test m == mean(x, wv1, dims=1) - @test v == var(x, wv1, 1; corrected=false) + @testset "Standard Deviation" begin + if isa(wv, Weights) + @test_throws ArgumentError std(x, wv; corrected=true) + else + @test @inferred(std(x, wv; corrected=true))::T ≈ TR(expected_std[i]) + @test @inferred(std(x, wv; mean=m, corrected=true))::T ≈ TR(expected_std[i]) + @test @inferred(stdm(x, wv, m; corrected=true))::T ≈ TR(expected_std[i]) + end + end - (m, v) = @inferred(mean_and_var(x, wv2, 2; corrected=false))::Tuple{Matrix{T2},Matrix{T2}} - @test m == mean(x, wv2, dims=2) - @test v == var(x, wv2, 2; corrected=false) - end + @testset "Mean and Variance" begin + (m, v) = @inferred(mean_and_var(x; corrected=true))::Tuple{TX,TX} + @test m == mean(x) + @test v == var(x; corrected=true) - @testset "Mean and Standard Deviation" begin - for d in 1:2 - (m, s) = @inferred(mean_and_std(x, d; corrected=false))::Tuple{Matrix{TX},Matrix{TX}} - @test m == mean(x, dims=d) - @test s == std(x, dims=d; corrected=false) + if isa(wv, Weights) + @test_throws ArgumentError mean_and_var(x, wv; corrected=true) + else + (m, v) = @inferred(mean_and_var(x, wv; corrected=true))::Tuple{T,T} + @test m == mean(x, wv) + @test v == var(x, wv; corrected=true) + end end - (m, s) = @inferred(mean_and_std(x, wv1, 1; corrected=false))::Tuple{Matrix{T1},Matrix{T1}} - @test m == mean(x, wv1, dims=1) - @test s == std(x, wv1, 1; corrected=false) + @testset "Mean and Standard Deviation" begin + (m, s) = @inferred(mean_and_std(x; corrected=true))::Tuple{TX,TX} + @test m == mean(x) + @test s == std(x; corrected=true) - (m, s) = @inferred(mean_and_std(x, wv2, 2; corrected=false))::Tuple{Matrix{T2},Matrix{T2}} - @test m == mean(x, wv2, dims=2) - @test s == std(x, wv2, 2; corrected=false) + if isa(wv, Weights) + @test_throws ArgumentError mean_and_std(x, wv; corrected=true) + else + (m, s) = @inferred(mean_and_std(x, wv; corrected=true))::Tuple{T,T} + @test m == mean(x, wv) + @test s == std(x, wv; corrected=true) + end + end end -end - -@testset "Corrected with $f (values of type $(eltype(x)), weights of type $(eltype(w1)))" for f in weight_funcs, x in (Float32.(x), Float64.(x)), (w1, w2) in ((Float32.(w1), Float32.(w2)), (Float64.(w1), Float64.(w2))) - TX = eltype(x) - TW1 = eltype(w1) - TW2 = eltype(w2) - T1 = promote_type(TX, TW1) - T2 = promote_type(TX, TW2) - wv1 = f(w1) - wv2 = f(w2) - m1 = @inferred(mean(x, wv1, dims=1))::Matrix{T1} - m2 = @inferred(mean(x, wv2, dims=2))::Matrix{T2} - - if !isa(wv1, Weights) - expected_var1 = sum(abs2.(x .- m1) .* w1, dims = 1) .* StatsBase.varcorrection(wv1, true) - expected_var2 = sum(abs2.(x .- m2) .* w2', dims = 2) .* StatsBase.varcorrection(wv2, true) + + x = rand(5, 6) + xf0 = Float32.(x) + w1 = [0.57, 5.10, 0.91, 1.72, 0.0] + w1f0 = Float32.(w1) + w2 = [3.84, 2.70, 8.29, 8.91, 9.71, 0.0] + w2f0 = Float32.(w2) + + @testset "Uncorrected with $f (values of type $(eltype(x)), 1st weights of type $(eltype(w1)), 2nd weights of type $(eltype(w2)))" for f in + weight_funcs, + x in + (x, + xf0), + w1 in + (w1, + w1f0), + w2 in + (w2, + w2f0) + + TX = eltype(x) + TW1 = eltype(w1) + TW2 = eltype(w2) + T1 = promote_type(TX, TW1) + T2 = promote_type(TX, TW2) + wv1 = f(w1) + wv2 = f(w2) + m1 = @inferred(mean(x, wv1, dims=1))::Matrix{T1} + m2 = @inferred(mean(x, wv2, dims=2))::Matrix{T2} + + expected_var1 = sum(abs2.(x .- m1) .* w1, dims=1) ./ sum(wv1) + expected_var2 = sum(abs2.(x .- m2) .* w2', dims=2) ./ sum(wv2) expected_std1 = sqrt.(expected_var1) expected_std2 = sqrt.(expected_var2) - end - @testset "Variance" begin - if isa(wv1, Weights) - @test_throws ArgumentError var(x, wv1, 1; corrected=true) - else - @test @inferred(var(x, wv1, 1; corrected=true))::Matrix{T1} ≈ expected_var1 - @test @inferred(var(x, wv2, 2; corrected=true))::Matrix{T2} ≈ expected_var2 - @test @inferred(var(x, wv1, 1; mean=m1, corrected=true))::Matrix{T1} ≈ expected_var1 - @test @inferred(var(x, wv2, 2; mean=m2, corrected=true))::Matrix{T2} ≈ expected_var2 - @test @inferred(varm(x, wv1, m1, 1; corrected=true))::Matrix{T1} ≈ expected_var1 - @test @inferred(varm(x, wv2, m2, 2; corrected=true))::Matrix{T2} ≈ expected_var2 + @testset "Variance" begin + @test @inferred(var(x, wv1, 1; corrected=false))::Matrix{T1} ≈ expected_var1 + @test @inferred(var(x, wv2, 2; corrected=false))::Matrix{T2} ≈ expected_var2 + @test @inferred(var(x, wv1, 1; mean=m1, corrected=false))::Matrix{T1} ≈ + expected_var1 + @test @inferred(var(x, wv2, 2; mean=m2, corrected=false))::Matrix{T2} ≈ + expected_var2 + @test @inferred(varm(x, wv1, m1, 1; corrected=false))::Matrix{T1} ≈ + expected_var1 + @test @inferred(varm(x, wv2, m2, 2; corrected=false))::Matrix{T2} ≈ + expected_var2 end - end - @testset "Standard Deviation" begin - if isa(wv1, Weights) - @test_throws ArgumentError std(x, wv1, 1; corrected=true) - else - @test @inferred(std(x, wv1, 1; corrected=true))::Matrix{T1} ≈ expected_std1 - @test @inferred(std(x, wv2, 2; corrected=true))::Matrix{T2} ≈ expected_std2 - @test @inferred(std(x, wv1, 1; mean=m1, corrected=true))::Matrix{T1} ≈ expected_std1 - @test @inferred(std(x, wv2, 2; mean=m2, corrected=true))::Matrix{T2} ≈ expected_std2 - @test @inferred(stdm(x, wv1, m1, 1; corrected=true))::Matrix{T1} ≈ expected_std1 - @test @inferred(stdm(x, wv2, m2, 2; corrected=true))::Matrix{T2} ≈ expected_std2 + @testset "Standard Deviation" begin + @test @inferred(std(x, wv1, 1; corrected=false))::Matrix{T1} ≈ expected_std1 + @test @inferred(std(x, wv2, 2; corrected=false))::Matrix{T2} ≈ expected_std2 + @test @inferred(std(x, wv1, 1; mean=m1, corrected=false))::Matrix{T1} ≈ + expected_std1 + @test @inferred(std(x, wv2, 2; mean=m2, corrected=false))::Matrix{T2} ≈ + expected_std2 + @test @inferred(stdm(x, wv1, m1, 1; corrected=false))::Matrix{T1} ≈ + expected_std1 + @test @inferred(stdm(x, wv2, m2, 2; corrected=false))::Matrix{T2} ≈ + expected_std2 end - end - @testset "Mean and Variance" begin - for d in 1:2 - (m, v) = @inferred(mean_and_var(x, d; corrected=true))::Tuple{Matrix{TX},Matrix{TX}} - @test m == mean(x, dims=d) - @test v == var(x, dims=d, corrected=true) - end + @testset "Mean and Variance" begin + for d in 1:2 + (m, v) = @inferred(mean_and_var(x, d; corrected=false))::Tuple{Matrix{TX}, + Matrix{TX}} + @test m == mean(x, dims=d) + @test v == var(x, dims=d, corrected=false) + end - if isa(wv1, Weights) - @test_throws ArgumentError mean_and_var(x, wv1, 1; corrected=true) - else - (m, v) = @inferred(mean_and_var(x, wv1, 1; corrected=true))::Tuple{Matrix{T1},Matrix{T1}} + (m, v) = @inferred(mean_and_var(x, wv1, 1; corrected=false))::Tuple{Matrix{T1}, + Matrix{T1}} @test m == mean(x, wv1, dims=1) - @test v == var(x, wv1, 1; corrected=true) + @test v == var(x, wv1, 1; corrected=false) - (m, v) = @inferred(mean_and_var(x, wv2, 2; corrected=true))::Tuple{Matrix{T2},Matrix{T2}} + (m, v) = @inferred(mean_and_var(x, wv2, 2; corrected=false))::Tuple{Matrix{T2}, + Matrix{T2}} @test m == mean(x, wv2, dims=2) - @test v == var(x, wv2, 2; corrected=true) + @test v == var(x, wv2, 2; corrected=false) end - end - @testset "Mean and Standard Deviation" begin - for d in 1:2 - (m, s) = @inferred(mean_and_std(x, d; corrected=true))::Tuple{Matrix{TX},Matrix{TX}} - @test m == mean(x, dims=d) - @test s == std(x, dims=d, corrected=true) - end + @testset "Mean and Standard Deviation" begin + for d in 1:2 + (m, s) = @inferred(mean_and_std(x, d; corrected=false))::Tuple{Matrix{TX}, + Matrix{TX}} + @test m == mean(x, dims=d) + @test s == std(x, dims=d; corrected=false) + end - if isa(wv1, Weights) - @test_throws ArgumentError mean_and_std(x, wv1, 1; corrected=true) - else - (m, s) = @inferred(mean_and_std(x, wv1, 1; corrected=true))::Tuple{Matrix{T1},Matrix{T1}} + (m, s) = @inferred(mean_and_std(x, wv1, 1; corrected=false))::Tuple{Matrix{T1}, + Matrix{T1}} @test m == mean(x, wv1, dims=1) - @test s == std(x, wv1, 1; corrected=true) + @test s == std(x, wv1, 1; corrected=false) - (m, s) = @inferred(mean_and_std(x, wv2, 2; corrected=true))::Tuple{Matrix{T2},Matrix{T2}} + (m, s) = @inferred(mean_and_std(x, wv2, 2; corrected=false))::Tuple{Matrix{T2}, + Matrix{T2}} @test m == mean(x, wv2, dims=2) - @test s == std(x, wv2, 2; corrected=true) + @test s == std(x, wv2, 2; corrected=false) end end -end -@testset "Skewness and Kurtosis with $f" for f in weight_funcs - for T in (Int, Float32, Float64) - for v in (T(1):T(5), collect(T, 1:5)) - s = @inferred(skewness(v)) - @test s isa float(T) - @test iszero(s) + @testset "Corrected with $f (values of type $(eltype(x)), weights of type $(eltype(w1)))" for f in + weight_funcs, + x in + (Float32.(x), + Float64.(x)), + (w1, + w2) in + ((Float32.(w1), + Float32.(w2)), + (Float64.(w1), + Float64.(w2))) + + TX = eltype(x) + TW1 = eltype(w1) + TW2 = eltype(w2) + T1 = promote_type(TX, TW1) + T2 = promote_type(TX, TW2) + wv1 = f(w1) + wv2 = f(w2) + m1 = @inferred(mean(x, wv1, dims=1))::Matrix{T1} + m2 = @inferred(mean(x, wv2, dims=2))::Matrix{T2} + + if !isa(wv1, Weights) + expected_var1 = sum(abs2.(x .- m1) .* w1, dims=1) .* + StatsBase.varcorrection(wv1, true) + expected_var2 = sum(abs2.(x .- m2) .* w2', dims=2) .* + StatsBase.varcorrection(wv2, true) + expected_std1 = sqrt.(expected_var1) + expected_std2 = sqrt.(expected_var2) + end - k = @inferred(kurtosis(v)) - @test k isa float(T) - @test k ≈ oftype(k, -1.3) + @testset "Variance" begin + if isa(wv1, Weights) + @test_throws ArgumentError var(x, wv1, 1; corrected=true) + else + @test @inferred(var(x, wv1, 1; corrected=true))::Matrix{T1} ≈ expected_var1 + @test @inferred(var(x, wv2, 2; corrected=true))::Matrix{T2} ≈ expected_var2 + @test @inferred(var(x, wv1, 1; mean=m1, corrected=true))::Matrix{T1} ≈ + expected_var1 + @test @inferred(var(x, wv2, 2; mean=m2, corrected=true))::Matrix{T2} ≈ + expected_var2 + @test @inferred(varm(x, wv1, m1, 1; corrected=true))::Matrix{T1} ≈ + expected_var1 + @test @inferred(varm(x, wv2, m2, 2; corrected=true))::Matrix{T2} ≈ + expected_var2 + end end - v = T[1, 2, 2, 2, 5] - s = @inferred(skewness(v)) - @test s isa float(T) - @test s ≈ oftype(s, 1.1731251294063556) - - v = T[1, 4, 4, 4, 5] - s = @inferred(skewness(v)) - @test s isa float(T) - @test s ≈ oftype(s, -1.1731251294063556) - - v = T[1, 2, 3, 3, 2] - k = @inferred(kurtosis(v)) - @test k isa float(T) - @test k ≈ oftype(k, -1.1530612244897953) - - # Empty arrays - s = @inferred(skewness(T[])) - @test s isa float(T) - @test isnan(s) - k = @inferred(kurtosis(T[])) - @test k isa float(T) - @test isnan(k) - - for T2 in (Int, Float32, Float64) - wv = f(fill(T2(2), 5)) - v = T[1, 2, 2, 2, 5] - s = @inferred(skewness(v, wv)) - @test s isa float(promote_type(T, T2)) - @test s ≈ oftype(s, 1.1731251294063556) + @testset "Standard Deviation" begin + if isa(wv1, Weights) + @test_throws ArgumentError std(x, wv1, 1; corrected=true) + else + @test @inferred(std(x, wv1, 1; corrected=true))::Matrix{T1} ≈ expected_std1 + @test @inferred(std(x, wv2, 2; corrected=true))::Matrix{T2} ≈ expected_std2 + @test @inferred(std(x, wv1, 1; mean=m1, corrected=true))::Matrix{T1} ≈ + expected_std1 + @test @inferred(std(x, wv2, 2; mean=m2, corrected=true))::Matrix{T2} ≈ + expected_std2 + @test @inferred(stdm(x, wv1, m1, 1; corrected=true))::Matrix{T1} ≈ + expected_std1 + @test @inferred(stdm(x, wv2, m2, 2; corrected=true))::Matrix{T2} ≈ + expected_std2 + end + end - v = collect(T, 1:5) - k = @inferred(kurtosis(v, wv)) - @test k isa float(promote_type(T, T2)) - @test k ≈ oftype(k, -1.3) + @testset "Mean and Variance" begin + for d in 1:2 + (m, v) = @inferred(mean_and_var(x, d; corrected=true))::Tuple{Matrix{TX}, + Matrix{TX}} + @test m == mean(x, dims=d) + @test v == var(x, dims=d, corrected=true) + end - # Empty arrays - wv = f(T2[]) - s = @inferred(skewness(T[], wv)) - @test s isa float(promote_type(T, T2)) - @test isnan(s) - k = @inferred(kurtosis(T[], wv)) - @test k isa float(promote_type(T, T2)) - @test isnan(k) + if isa(wv1, Weights) + @test_throws ArgumentError mean_and_var(x, wv1, 1; corrected=true) + else + (m, v) = @inferred(mean_and_var(x, wv1, 1; corrected=true))::Tuple{Matrix{T1}, + Matrix{T1}} + @test m == mean(x, wv1, dims=1) + @test v == var(x, wv1, 1; corrected=true) + + (m, v) = @inferred(mean_and_var(x, wv2, 2; corrected=true))::Tuple{Matrix{T2}, + Matrix{T2}} + @test m == mean(x, wv2, dims=2) + @test v == var(x, wv2, 2; corrected=true) + end end - # Invalid arguments - v = collect(T, 1:5) - for n in (length(x) - 1, length(x) + 1) - @test_throws DimensionMismatch("Inconsistent array lengths.") kurtosis(v, f(ones(T, n))) - @test_throws DimensionMismatch("Inconsistent array lengths.") skewness(v, f(ones(T, n))) + @testset "Mean and Standard Deviation" begin + for d in 1:2 + (m, s) = @inferred(mean_and_std(x, d; corrected=true))::Tuple{Matrix{TX}, + Matrix{TX}} + @test m == mean(x, dims=d) + @test s == std(x, dims=d, corrected=true) + end + + if isa(wv1, Weights) + @test_throws ArgumentError mean_and_std(x, wv1, 1; corrected=true) + else + (m, s) = @inferred(mean_and_std(x, wv1, 1; corrected=true))::Tuple{Matrix{T1}, + Matrix{T1}} + @test m == mean(x, wv1, dims=1) + @test s == std(x, wv1, 1; corrected=true) + + (m, s) = @inferred(mean_and_std(x, wv2, 2; corrected=true))::Tuple{Matrix{T2}, + Matrix{T2}} + @test m == mean(x, wv2, dims=2) + @test s == std(x, wv2, 2; corrected=true) + end end end -end -@testset "General Moments with $f" for f in weight_funcs - for T in (Int, Float32, Float64) - x = collect(T, 2:8) - for k in 2:5 - momk = @inferred(moment(x, k)) - @test momk isa float(T) - @test momk ≈ sum((x .- 5).^k) / length(x) + @testset "Skewness and Kurtosis with $f" for f in weight_funcs + for T in (Int, Float32, Float64) + for v in (T(1):T(5), collect(T, 1:5)) + s = @inferred(skewness(v)) + @test s isa float(T) + @test iszero(s) - # Empty array - momk = @inferred(moment(T[], k)) - @test momk isa float(T) - @test isnan(momk) + k = @inferred(kurtosis(v)) + @test k isa float(T) + @test k ≈ oftype(k, -1.3) + end - for TM in (Int, Float32, Float64) - m = TM(4) - momk = @inferred(moment(x, k, m)) - @test momk isa float(promote_type(T, TM)) - @test momk ≈ sum((x .- 4).^k) / length(x) + v = T[1, 2, 2, 2, 5] + s = @inferred(skewness(v)) + @test s isa float(T) + @test s ≈ oftype(s, 1.1731251294063556) - # Empty array - momk = @inferred(moment(T[], k, zero(TM))) - @test momk isa float(promote_type(T, TM)) - @test isnan(momk) + v = T[1, 4, 4, 4, 5] + s = @inferred(skewness(v)) + @test s isa float(T) + @test s ≈ oftype(s, -1.1731251294063556) + + v = T[1, 2, 3, 3, 2] + k = @inferred(kurtosis(v)) + @test k isa float(T) + @test k ≈ oftype(k, -1.1530612244897953) + + # Empty arrays + s = @inferred(skewness(T[])) + @test s isa float(T) + @test isnan(s) + k = @inferred(kurtosis(T[])) + @test k isa float(T) + @test isnan(k) + + for T2 in (Int, Float32, Float64) + wv = f(fill(T2(2), 5)) + v = T[1, 2, 2, 2, 5] + s = @inferred(skewness(v, wv)) + @test s isa float(promote_type(T, T2)) + @test s ≈ oftype(s, 1.1731251294063556) + + v = collect(T, 1:5) + k = @inferred(kurtosis(v, wv)) + @test k isa float(promote_type(T, T2)) + @test k ≈ oftype(k, -1.3) + + # Empty arrays + wv = f(T2[]) + s = @inferred(skewness(T[], wv)) + @test s isa float(promote_type(T, T2)) + @test isnan(s) + k = @inferred(kurtosis(T[], wv)) + @test k isa float(promote_type(T, T2)) + @test isnan(k) + end + + # Invalid arguments + v = collect(T, 1:5) + for n in (length(x) - 1, length(x) + 1) + @test_throws DimensionMismatch("Inconsistent array lengths.") kurtosis(v, + f(ones(T, + n))) + @test_throws DimensionMismatch("Inconsistent array lengths.") skewness(v, + f(ones(T, + n))) end end + end - for T2 in (Int, Float32, Float64) - wv = f(T2[1, 1, 1, 1, 1, 0, 0]) - x2 = collect(T, 2:6) + @testset "General Moments with $f" for f in weight_funcs + for T in (Int, Float32, Float64) + x = collect(T, 2:8) for k in 2:5 - momk = @inferred(moment(x, k, wv)) - @test momk isa float(promote_type(T, T2)) - @test momk ≈ sum((x2 .- 4).^k) / 5 + momk = @inferred(moment(x, k)) + @test momk isa float(T) + @test momk ≈ sum((x .- 5) .^ k) / length(x) # Empty array - momk = @inferred(moment(T[], k, f(T2[]))) - @test momk isa float(promote_type(T, T2)) + momk = @inferred(moment(T[], k)) + @test momk isa float(T) @test isnan(momk) for TM in (Int, Float32, Float64) - m = TM(3) - momk = @inferred(moment(x, k, wv, m)) - @test momk isa float(promote_type(T, T2, TM)) - @test momk ≈ sum((x2 .- 3).^k) / 5 + m = TM(4) + momk = @inferred(moment(x, k, m)) + @test momk isa float(promote_type(T, TM)) + @test momk ≈ sum((x .- 4) .^ k) / length(x) # Empty array - momk = @inferred(moment(T[], k, f(T2[]), zero(TM))) - @test momk isa float(promote_type(T, T2, TM)) + momk = @inferred(moment(T[], k, zero(TM))) + @test momk isa float(promote_type(T, TM)) @test isnan(momk) end end - end - end -end - -@testset "Cumulants with $f" for f in weight_funcs - for T in (Int, Float32, Float64) - x = collect(T, 2:8) - for k in 1:6 - cumk = @inferred(cumulant(x, k)) - @test cumk isa float(T) - if k == 1 - @test cumk ≈ mean(x) - elseif k == 2 || k == 3 - @test cumk ≈ moment(x, k) - elseif k == 4 - @test cumk ≈ moment(x, 4) - 3*moment(x, 2)^2 - elseif k == 5 - @test cumk ≈ moment(x, 5) - 10*moment(x, 3)*moment(x, 2) - else - @assert k == 6 - @test cumk ≈ moment(x, 6) - 15*moment(x, 4)*moment(x, 2) - 10*moment(x, 3)^2 + 30*moment(x, 2)^3 + + for T2 in (Int, Float32, Float64) + wv = f(T2[1, 1, 1, 1, 1, 0, 0]) + x2 = collect(T, 2:6) + for k in 2:5 + momk = @inferred(moment(x, k, wv)) + @test momk isa float(promote_type(T, T2)) + @test momk ≈ sum((x2 .- 4) .^ k) / 5 + + # Empty array + momk = @inferred(moment(T[], k, f(T2[]))) + @test momk isa float(promote_type(T, T2)) + @test isnan(momk) + + for TM in (Int, Float32, Float64) + m = TM(3) + momk = @inferred(moment(x, k, wv, m)) + @test momk isa float(promote_type(T, T2, TM)) + @test momk ≈ sum((x2 .- 3) .^ k) / 5 + + # Empty array + momk = @inferred(moment(T[], k, f(T2[]), zero(TM))) + @test momk isa float(promote_type(T, T2, TM)) + @test isnan(momk) + end + end end end - cumks = @inferred(cumulant(x, 1:6)) - @test cumks isa Vector{float(T)} - @test cumks == [cumulant(x, i) for i in 1:6] + end - for TM in (Int, Float32, Float64) - m = TM(4) + @testset "Cumulants with $f" for f in weight_funcs + for T in (Int, Float32, Float64) + x = collect(T, 2:8) for k in 1:6 - cumk = @inferred(cumulant(x, k, m)) - @test cumk isa float(promote_type(T, TM)) + cumk = @inferred(cumulant(x, k)) + @test cumk isa float(T) if k == 1 - @test cumk ≈ m + @test cumk ≈ mean(x) elseif k == 2 || k == 3 - @test cumk ≈ moment(x, k, m) + @test cumk ≈ moment(x, k) elseif k == 4 - @test cumk ≈ moment(x, 4, m) - 3*moment(x, 2, m)^2 + @test cumk ≈ moment(x, 4) - 3*moment(x, 2)^2 elseif k == 5 - @test cumk ≈ moment(x, 5, m) - 10*moment(x, 3, m)*moment(x, 2, m) + @test cumk ≈ moment(x, 5) - 10*moment(x, 3)*moment(x, 2) else @assert k == 6 - @test cumk ≈ moment(x, 6, m) - 15*moment(x, 4, m)*moment(x, 2, m) - 10*moment(x, 3, m)^2 + 30*moment(x, 2, m)^3 + @test cumk ≈ + moment(x, 6) - 15*moment(x, 4)*moment(x, 2) - 10*moment(x, 3)^2 + + 30*moment(x, 2)^3 end end - cumks = @inferred(cumulant(x, 1:6, m)) - @test cumks isa Vector{float(promote_type(T, TM))} - @test cumks == [cumulant(x, i, m) for i in 1:6] - end - - for T2 in (Int, Float32, Float64) - wv = f(T2[1, 1, 1, 1, 1, 0, 0]) - x2 = collect(T, 2:6) - for k in 1:6 - cumk = @inferred(cumulant(x, k, wv)) - @test cumk isa float(promote_type(T, T2)) - @test cumk ≈ cumulant(x2, k) rtol = cbrt(eps(typeof(cumk))) - end - cumks = @inferred(cumulant(x, 1:6, wv)) - @test cumks isa Vector{float(promote_type(T, T2))} - @test cumks == [cumulant(x, i, wv) for i in 1:6] + cumks = @inferred(cumulant(x, 1:6)) + @test cumks isa Vector{float(T)} + @test cumks == [cumulant(x, i) for i in 1:6] for TM in (Int, Float32, Float64) - m = TM(3) + m = TM(4) for k in 1:6 - cumk = @inferred(cumulant(x, k, wv, m)) - @test cumk isa float(promote_type(T, T2, TM)) - @test cumk ≈ cumulant(x2, k, m) rtol = cbrt(eps(typeof(cumk))) + cumk = @inferred(cumulant(x, k, m)) + @test cumk isa float(promote_type(T, TM)) + if k == 1 + @test cumk ≈ m + elseif k == 2 || k == 3 + @test cumk ≈ moment(x, k, m) + elseif k == 4 + @test cumk ≈ moment(x, 4, m) - 3*moment(x, 2, m)^2 + elseif k == 5 + @test cumk ≈ moment(x, 5, m) - 10*moment(x, 3, m)*moment(x, 2, m) + else + @assert k == 6 + @test cumk ≈ + moment(x, 6, m) - 15*moment(x, 4, m)*moment(x, 2, m) - + 10*moment(x, 3, m)^2 + 30*moment(x, 2, m)^3 + end end - cumks = @inferred(cumulant(x, 1:6, wv, m)) - @test cumks isa Vector{float(promote_type(T, T2, TM))} - @test cumks == [cumulant(x, i, wv, m) for i in 1:6] + cumks = @inferred(cumulant(x, 1:6, m)) + @test cumks isa Vector{float(promote_type(T, TM))} + @test cumks == [cumulant(x, i, m) for i in 1:6] end - end - # Invalid arguments - @test_throws ArgumentError cumulant(x, -1) - @test_throws ArgumentError cumulant(x, 0) - @test_throws ArgumentError cumulant(x, 0:3) - @test_throws ArgumentError cumulant(x, -1:3) - @test_throws ArgumentError cumulant(x, 1:0) + for T2 in (Int, Float32, Float64) + wv = f(T2[1, 1, 1, 1, 1, 0, 0]) + x2 = collect(T, 2:6) + for k in 1:6 + cumk = @inferred(cumulant(x, k, wv)) + @test cumk isa float(promote_type(T, T2)) + @test cumk ≈ cumulant(x2, k) rtol = cbrt(eps(typeof(cumk))) + end + cumks = @inferred(cumulant(x, 1:6, wv)) + @test cumks isa Vector{float(promote_type(T, T2))} + @test cumks == [cumulant(x, i, wv) for i in 1:6] + + for TM in (Int, Float32, Float64) + m = TM(3) + for k in 1:6 + cumk = @inferred(cumulant(x, k, wv, m)) + @test cumk isa float(promote_type(T, T2, TM)) + @test cumk ≈ cumulant(x2, k, m) rtol = cbrt(eps(typeof(cumk))) + end + cumks = @inferred(cumulant(x, 1:6, wv, m)) + @test cumks isa Vector{float(promote_type(T, T2, TM))} + @test cumks == [cumulant(x, i, wv, m) for i in 1:6] + end + end - for n in (length(x) - 1, length(x) + 1), krange in (1, 1:3) - @test_throws DimensionMismatch("Inconsistent array lengths.") cumulant(x, krange, f(ones(n))) - @test_throws DimensionMismatch("Inconsistent array lengths.") cumulant(x, krange, f(ones(n)), 0.0) + # Invalid arguments + @test_throws ArgumentError cumulant(x, -1) + @test_throws ArgumentError cumulant(x, 0) + @test_throws ArgumentError cumulant(x, 0:3) + @test_throws ArgumentError cumulant(x, -1:3) + @test_throws ArgumentError cumulant(x, 1:0) + + for n in (length(x) - 1, length(x) + 1), krange in (1, 1:3) + @test_throws DimensionMismatch("Inconsistent array lengths.") cumulant(x, + krange, + f(ones(n))) + @test_throws DimensionMismatch("Inconsistent array lengths.") cumulant(x, + krange, + f(ones(n)), + 0.0) + end end end -end - end # @testset "StatsBase.Moments" diff --git a/test/pairwise.jl b/test/pairwise.jl index aad724660..843af6e87 100644 --- a/test/pairwise.jl +++ b/test/pairwise.jl @@ -33,14 +33,16 @@ arbitrary_fun(x, y) = cor(x, y) res2 = zeros(AbstractFloat, size(res)) @test pairwise!(f, res2, Any[[1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0]]) === res2 @test res == res2 == - [f(xi, yi) for xi in ([1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0]), - yi in ([1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0])] + [f(xi, yi) + for xi in ([1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0]), + yi in ([1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0])] @test res isa Matrix{Float64} @inferred pairwise(f, x, y) @test_throws Union{ArgumentError,MethodError} pairwise(f, [Int[]], [Int[]]) - @test_throws Union{ArgumentError,MethodError} pairwise!(f, zeros(1, 1), [Int[]], [Int[]]) + @test_throws Union{ArgumentError,MethodError} pairwise!(f, zeros(1, 1), [Int[]], + [Int[]]) res = pairwise(f, [], []) @test size(res) == (0, 0) @@ -79,13 +81,13 @@ arbitrary_fun(x, y) = cor(x, y) res = pairwise(f, xm, ym) @test res isa Matrix{Missing} - res2 = zeros(Union{Float64, Missing}, size(res)) + res2 = zeros(Union{Float64,Missing}, size(res)) @test pairwise!(f, res2, xm, ym) === res2 @test res ≅ res2 ≅ [missing for xi in xm, yi in ym] res = pairwise(f, xm, ym, skipmissing=:pairwise) @test res isa Matrix{Float64} - res2 = zeros(Union{Float64, Missing}, size(res)) + res2 = zeros(Union{Float64,Missing}, size(res)) @test pairwise!(f, res2, xm, ym, skipmissing=:pairwise) === res2 @test res ≅ res2 @test isapprox(res, [f(collect.(skipmissings(xi, yi))...) for xi in xm, yi in ym], @@ -93,18 +95,18 @@ arbitrary_fun(x, y) = cor(x, y) res = pairwise(f, ym, zm, skipmissing=:pairwise) @test res isa Matrix{Float32} - res2 = zeros(Union{Float32, Missing}, size(res)) + res2 = zeros(Union{Float32,Missing}, size(res)) @test pairwise!(f, res2, ym, zm, skipmissing=:pairwise) === res2 @test res ≅ res2 @test isapprox(res, [f(collect.(skipmissings(yi, zi))...) for yi in ym, zi in zm], rtol=1e-6) nminds = mapreduce(x -> .!ismissing.(x), - (x, y) -> x .& y, - [xm; ym]) + (x, y) -> x .& y, + [xm; ym]) res = pairwise(f, xm, ym, skipmissing=:listwise) @test res isa Matrix{Float64} - res2 = zeros(Union{Float64, Missing}, size(res)) + res2 = zeros(Union{Float64,Missing}, size(res)) @test pairwise!(f, res2, xm, ym, skipmissing=:listwise) === res2 @test res ≅ res2 @test isapprox(res, [f(view(xi, nminds), view(yi, nminds)) for xi in xm, yi in ym], @@ -114,21 +116,29 @@ arbitrary_fun(x, y) = cor(x, y) # to check that pairwise itself is inferrable for skipmissing in (:none, :pairwise, :listwise) g(x, y=x) = pairwise((x, y) -> x[1] * y[1], x, y, skipmissing=skipmissing) - @test Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}}}) == - Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}}, - Vector{Vector{Union{Float64, Missing}}}}) == - Matrix{<: Union{Float64, Missing}} + @test Core.Compiler.return_type(g, + Tuple{Vector{Vector{Union{Float64,Missing}}}}) == + Core.Compiler.return_type(g, + Tuple{Vector{Vector{Union{Float64,Missing}}}, + Vector{Vector{Union{Float64,Missing}}}}) == + Matrix{<: Union{Float64,Missing}} if skipmissing in (:pairwise, :listwise) - @test_broken Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}}}) == - Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}}, - Vector{Vector{Union{Float64, Missing}}}}) == + @test_broken Core.Compiler.return_type(g, + Tuple{Vector{Vector{Union{Float64, + Missing}}}}) == + Core.Compiler.return_type(g, + Tuple{Vector{Vector{Union{Float64, + Missing}}}, + Vector{Vector{Union{Float64, + Missing}}}}) == Matrix{Float64} end end @test_throws ArgumentError pairwise(f, xm, ym, skipmissing=:something) - @test_throws ArgumentError pairwise!(f, zeros(Union{Float64, Missing}, - length(xm), length(ym)), xm, ym, + @test_throws ArgumentError pairwise!(f, + zeros(Union{Float64,Missing}, + length(xm), length(ym)), xm, ym, skipmissing=:something) # variable with only missings @@ -136,17 +146,21 @@ arbitrary_fun(x, y) = cor(x, y) ym = [rand(10), rand(10)] res = pairwise(f, xm, ym) - @test res isa Matrix{Union{Float64, Missing}} - res2 = zeros(Union{Float64, Missing}, size(res)) + @test res isa Matrix{Union{Float64,Missing}} + res2 = zeros(Union{Float64,Missing}, size(res)) @test pairwise!(f, res2, xm, ym) === res2 @test res ≅ res2 ≅ [f(xi, yi) for xi in xm, yi in ym] - @test_throws Union{ArgumentError,MethodError} pairwise(f, xm, ym, skipmissing=:pairwise) - @test_throws Union{ArgumentError,MethodError} pairwise(f, xm, ym, skipmissing=:listwise) + @test_throws Union{ArgumentError,MethodError} pairwise(f, xm, ym, + skipmissing=:pairwise) + @test_throws Union{ArgumentError,MethodError} pairwise(f, xm, ym, + skipmissing=:listwise) - res = zeros(Union{Float64, Missing}, length(xm), length(ym)) - @test_throws Union{ArgumentError,MethodError} pairwise!(f, res, xm, ym, skipmissing=:pairwise) - @test_throws Union{ArgumentError,MethodError} pairwise!(f, res, xm, ym, skipmissing=:listwise) + res = zeros(Union{Float64,Missing}, length(xm), length(ym)) + @test_throws Union{ArgumentError,MethodError} pairwise!(f, res, xm, ym, + skipmissing=:pairwise) + @test_throws Union{ArgumentError,MethodError} pairwise!(f, res, xm, ym, + skipmissing=:listwise) for sm in (:pairwise, :listwise) @test_throws ArgumentError pairwise(f, [[1, 2]], [1], skipmissing=sm) @@ -175,9 +189,9 @@ arbitrary_fun(x, y) = cor(x, y) y = (Iterators.drop(v, 1) for v in [rand(10) for _ in 1:4]) @test pairwise((x, y) -> f(collect(x), collect(y)), x, y) == - [f(collect(xi), collect(yi)) for xi in x, yi in y] + [f(collect(xi), collect(yi)) for xi in x, yi in y] @test pairwise((x, y) -> f(collect(x), collect(y)), x) == - [f(collect(xi1), collect(xi2)) for xi1 in x, xi2 in x] + [f(collect(xi1), collect(xi2)) for xi1 in x, xi2 in x] @test_throws ArgumentError pairwise((x, y) -> f(collect(x), collect(y)), x, y, skipmissing=:pairwise) @test_throws ArgumentError pairwise((x, y) -> f(collect(x), collect(y)), x, y, @@ -197,8 +211,8 @@ arbitrary_fun(x, y) = cor(x, y) y = [rand(10) for _ in 1:4] @test pairwise(f, x, x, symmetric=true) == - pairwise(f, x, symmetric=true) == - Symmetric(pairwise(f, x, x), :U) + pairwise(f, x, symmetric=true) == + Symmetric(pairwise(f, x, x), :U) res = zeros(4, 4) res2 = zeros(4, 4) @@ -225,12 +239,13 @@ arbitrary_fun(x, y) = cor(x, y) # missings are ignored for the diagonal res = pairwise(cor, [[1, 2, 7], [1, 5, missing]]) - @test res isa Matrix{Union{Float64, Missing}} + @test res isa Matrix{Union{Float64,Missing}} @test res ≅ [1.0 missing missing 1.0] - res = pairwise(cor, Vector{Union{Int, Missing}}[[missing, missing, missing], - [missing, missing, missing]]) - @test res isa Matrix{Union{Float64, Missing}} + res = pairwise(cor, + Vector{Union{Int,Missing}}[[missing, missing, missing], + [missing, missing, missing]]) + @test res isa Matrix{Union{Float64,Missing}} @test res ≅ [1.0 missing missing 1.0] # except when eltype is Missing @@ -245,8 +260,9 @@ arbitrary_fun(x, y) = cor(x, y) @test res isa Matrix{Float64} @test res ≅ [1.0 NaN NaN 1.0] - @test_throws ArgumentError pairwise(cor, [[missing, missing, missing], - [missing, missing, missing]], + @test_throws ArgumentError pairwise(cor, + [[missing, missing, missing], + [missing, missing, missing]], skipmissing=sm) end end @@ -254,25 +270,25 @@ arbitrary_fun(x, y) = cor(x, y) @testset "promote_type_union" begin @test StatsBase.promote_type_union(Int) === Int @test StatsBase.promote_type_union(Real) === Real - @test StatsBase.promote_type_union(Union{Int, Float64}) === Float64 - @test StatsBase.promote_type_union(Union{Int, Missing}) === Union{Int, Missing} - @test StatsBase.promote_type_union(Union{Int, String}) === Any + @test StatsBase.promote_type_union(Union{Int,Float64}) === Float64 + @test StatsBase.promote_type_union(Union{Int,Missing}) === Union{Int,Missing} + @test StatsBase.promote_type_union(Union{Int,String}) === Any @test StatsBase.promote_type_union(Vector) === Any @test StatsBase.promote_type_union(Union{}) === Union{} - @test StatsBase.promote_type_union(Tuple{Union{Int, Float64}}) === + @test StatsBase.promote_type_union(Tuple{Union{Int,Float64}}) === Tuple{Real} end @testset "type-unstable corner case (#771)" begin - v = [rand(5) for _=1:10] + v = [rand(5) for _ in 1:10] function f(v) pairwise(v) do x, y - (x[1] < 0 ? nothing : - x[1] > y[1] ? 1 : 1.5, - 0) + return (x[1] < 0 ? nothing : + x[1] > y[1] ? 1 : 1.5, + 0) end end res = f(v) - @test res isa Matrix{Tuple{Real, Int}} + @test res isa Matrix{Tuple{Real,Int}} end -end \ No newline at end of file +end diff --git a/test/partialcor.jl b/test/partialcor.jl index 77ae3cba7..41313bfa9 100644 --- a/test/partialcor.jl +++ b/test/partialcor.jl @@ -1,50 +1,48 @@ using StatsBase using Test -wechsler = Float32[ - 7 5 9 8 - 8 8 5 6 - 16 18 11 9 - 8 3 7 9 - 6 3 13 9 - 11 8 10 10 - 12 7 9 8 - 8 11 9 3 - 14 12 11 4 - 13 13 13 6 - 13 9 9 9 - 13 10 15 7 - 14 11 12 8 - 15 11 11 10 - 13 10 15 9 - 10 5 8 6 - 10 3 7 7 - 17 13 13 7 - 10 6 10 7 - 10 10 15 8 - 14 7 11 5 - 16 11 12 11 - 10 7 14 6 - 10 10 9 6 - 10 7 10 10 - 7 6 5 9 - 15 12 10 6 - 17 15 15 8 - 16 13 16 9 - 13 10 17 8 - 13 10 17 10 - 19 12 16 10 - 19 15 17 11 - 13 10 7 8 - 15 11 12 8 - 16 9 11 11 - 14 13 14 9 -] +wechsler = Float32[7 5 9 8 + 8 8 5 6 + 16 18 11 9 + 8 3 7 9 + 6 3 13 9 + 11 8 10 10 + 12 7 9 8 + 8 11 9 3 + 14 12 11 4 + 13 13 13 6 + 13 9 9 9 + 13 10 15 7 + 14 11 12 8 + 15 11 11 10 + 13 10 15 9 + 10 5 8 6 + 10 3 7 7 + 17 13 13 7 + 10 6 10 7 + 10 10 15 8 + 14 7 11 5 + 16 11 12 11 + 10 7 14 6 + 10 10 9 6 + 10 7 10 10 + 7 6 5 9 + 15 12 10 6 + 17 15 15 8 + 16 13 16 9 + 13 10 17 8 + 13 10 17 10 + 19 12 16 10 + 19 15 17 11 + 13 10 7 8 + 15 11 12 8 + 16 9 11 11 + 14 13 14 9] -@test @inferred(partialcor(wechsler[:,1], wechsler[:,2], wechsler[:,3:4])) ≈ 0.7118787 rtol=1e-6 +@test @inferred(partialcor(wechsler[:, 1], wechsler[:, 2], wechsler[:, 3:4])) ≈ 0.7118787 rtol=1e-6 -X = [ 2 1 0 - 4 2 0 +X = [2 1 0 + 4 2 0 15 3 1 20 4 1] -@test @inferred(partialcor(view(X,:,1), view(X,:,2), view(X,:,3))) ≈ 0.919145 rtol=1e-6 +@test @inferred(partialcor(view(X, :, 1), view(X, :, 2), view(X, :, 3))) ≈ 0.919145 rtol=1e-6 diff --git a/test/rankcorr.jl b/test/rankcorr.jl index dc0207ee1..85a846ae0 100644 --- a/test/rankcorr.jl +++ b/test/rankcorr.jl @@ -4,9 +4,9 @@ using Test X = Float64[1 0; 2 1; 3 0; 4 1; 5 10] Y = Float64[5 5 6; 3 4 1; 4 0 4; 2 6 1; 5 7 10] -x1 = X[:,1] -x2 = X[:,2] -y = Y[:,1] +x1 = X[:, 1] +x2 = X[:, 2] +y = Y[:, 1] # corspearman @@ -22,27 +22,28 @@ c22 = corspearman(x2, x2) @test c11 ≈ 1.0 @test c22 ≈ 1.0 @test corspearman(X, X) ≈ [c11 c12; c12 c22] -@test corspearman(X) ≈ [c11 c12; c12 c22] +@test corspearman(X) ≈ [c11 c12; c12 c22] @test corspearman(X, Y) == - [corspearman(X[:,i], Y[:,j]) for i in axes(X, 2), j in axes(Y, 2)] + [corspearman(X[:, i], Y[:, j]) for i in axes(X, 2), j in axes(Y, 2)] # corkendall # Check error, handling of NaN, Inf etc -@test_throws ErrorException("Vectors must have same length") corkendall([1,2,3,4], [1,2,3]) -@test isnan(corkendall([1,2], [3,NaN])) -@test isnan(corkendall([1,1,1], [1,2,3])) -@test corkendall([-Inf,-0.0,Inf],[1,2,3]) == 1.0 +@test_throws ErrorException("Vectors must have same length") corkendall([1, 2, 3, 4], + [1, 2, 3]) +@test isnan(corkendall([1, 2], [3, NaN])) +@test isnan(corkendall([1, 1, 1], [1, 2, 3])) +@test corkendall([-Inf, -0.0, Inf], [1, 2, 3]) == 1.0 # Test, with exact equality, some known results. # AbstractVector{<:Real}, AbstractVector{<:Real} @test corkendall(x1, y) == -1/sqrt(90) @test corkendall(x2, y) == -1/sqrt(72) # AbstractMatrix{<:Real}, AbstractVector{<:Real} -@test corkendall(X, y) == [-1/sqrt(90), -1/sqrt(72)] +@test corkendall(X, y) == [-1/sqrt(90), -1/sqrt(72)] # AbstractVector{<:Real}, AbstractMatrix{<:Real} -@test corkendall(y, X) == [-1/sqrt(90) -1/sqrt(72)] +@test corkendall(y, X) == [-1/sqrt(90) -1/sqrt(72)] # n = 78_000 tests for overflow errors on 32 bit # Testing for overflow errors on 64bit would require n be too large for practicality @@ -51,12 +52,12 @@ n = 78_000 # Test with many repeats @test corkendall(repeat(x1, n), repeat(y, n)) ≈ -1/sqrt(90) @test corkendall(repeat(x2, n), repeat(y, n)) ≈ -1/sqrt(72) -@test corkendall(repeat(X, n), repeat(y, n)) ≈ [-1/sqrt(90), -1/sqrt(72)] -@test corkendall(repeat(y, n), repeat(X, n)) ≈ [-1/sqrt(90) -1/sqrt(72)] -@test corkendall(repeat([0,1,1,0], n), repeat([1,0,1,0], n)) == 0.0 +@test corkendall(repeat(X, n), repeat(y, n)) ≈ [-1/sqrt(90), -1/sqrt(72)] +@test corkendall(repeat(y, n), repeat(X, n)) ≈ [-1/sqrt(90) -1/sqrt(72)] +@test corkendall(repeat([0, 1, 1, 0], n), repeat([1, 0, 1, 0], n)) == 0.0 # Test with no repeats, note testing for exact equality -@test corkendall(collect(1:n), collect(1:n)) == 1.0 +@test corkendall(collect(1:n), collect(1:n)) == 1.0 @test corkendall(collect(1:n), reverse(collect(1:n))) == -1.0 # All elements identical should yield NaN @@ -69,7 +70,7 @@ c22 = corkendall(x2, x2) # AbstractMatrix{<:Real}, AbstractMatrix{<:Real} @test corkendall(X, X) ≈ [c11 c12; c12 c22] # AbstractMatrix{<:Real} -@test corkendall(X) ≈ [c11 c12; c12 c22] +@test corkendall(X) ≈ [c11 c12; c12 c22] @test c11 == 1.0 @test c22 == 1.0 @@ -79,74 +80,72 @@ c22 = corkendall(x2, x2) n = 100 @test corkendall(repeat(X, n), repeat(X, n)) ≈ [c11 c12; c12 c22] -@test corkendall(repeat(X, n)) ≈ [c11 c12; c12 c22] +@test corkendall(repeat(X, n)) ≈ [c11 c12; c12 c22] # All eight three-element permutations -z = [1 1 1; - 1 1 2; - 1 2 2; - 1 2 2; - 1 2 1; - 2 1 2; - 1 1 2; - 2 2 2] - -@test corkendall(z) == [1 0 1/3; 0 1 0; 1/3 0 1] -@test corkendall(z, z) == [1 0 1/3; 0 1 0; 1/3 0 1] -@test corkendall(z[:,1], z) == [1 0 1/3] -@test corkendall(z, z[:,1]) == [1; 0; 1/3] +z = [1 1 1; + 1 1 2; + 1 2 2; + 1 2 2; + 1 2 1; + 2 1 2; + 1 1 2; + 2 2 2] + +@test corkendall(z) == [1 0 1/3; 0 1 0; 1/3 0 1] +@test corkendall(z, z) == [1 0 1/3; 0 1 0; 1/3 0 1] +@test corkendall(z[:, 1], z) == [1 0 1/3] +@test corkendall(z, z[:, 1]) == [1; 0; 1/3] z = float(z) -@test corkendall(z) == [1 0 1/3; 0 1 0; 1/3 0 1] -@test corkendall(z, z) == [1 0 1/3; 0 1 0; 1/3 0 1] -@test corkendall(z[:,1], z) == [1 0 1/3] -@test corkendall(z, z[:,1]) == [1; 0; 1/3] +@test corkendall(z) == [1 0 1/3; 0 1 0; 1/3 0 1] +@test corkendall(z, z) == [1 0 1/3; 0 1 0; 1/3 0 1] +@test corkendall(z[:, 1], z) == [1 0 1/3] +@test corkendall(z, z[:, 1]) == [1; 0; 1/3] w = repeat(z, n) -@test corkendall(w) == [1 0 1/3; 0 1 0; 1/3 0 1] -@test corkendall(w, w) == [1 0 1/3; 0 1 0; 1/3 0 1] -@test corkendall(w[:,1], w) == [1 0 1/3] -@test corkendall(w, w[:,1]) == [1; 0; 1/3] - -StatsBase.midpoint(1,10) == 5 -StatsBase.midpoint(1,widen(10)) == 5 +@test corkendall(w) == [1 0 1/3; 0 1 0; 1/3 0 1] +@test corkendall(w, w) == [1 0 1/3; 0 1 0; 1/3 0 1] +@test corkendall(w[:, 1], w) == [1 0 1/3] +@test corkendall(w, w[:, 1]) == [1; 0; 1/3] +StatsBase.midpoint(1, 10) == 5 +StatsBase.midpoint(1, widen(10)) == 5 # NaN handling Xnan = copy(X) -Xnan[1,1] = NaN +Xnan[1, 1] = NaN Ynan = copy(Y) -Ynan[2,1] = NaN +Ynan[2, 1] = NaN for f in (corspearman, corkendall) - @test isnan(f([1.0, NaN, 2.0], [2.0, 1.0, 3.4])) - @test all(isnan, f([1.0, NaN], [1 2; 3 4])) - @test all(isnan, f([1 2; 3 4], [1.0, NaN])) - @test isequal(f([1 NaN; NaN 4]), [1 NaN; NaN 1]) - @test all(isnan, f([1 NaN; NaN 4], [1 NaN; NaN 4])) - @test all(isnan, f([1 NaN; NaN 4], [NaN 1; NaN 4])) - - @test isequal(f(Xnan, Ynan), - [f(Xnan[:,i], Ynan[:,j]) for i in axes(Xnan, 2), j in axes(Ynan, 2)]) - @test isequal(f(Xnan), - [i == j ? 1.0 : f(Xnan[:,i], Xnan[:,j]) - for i in axes(Xnan, 2), j in axes(Xnan, 2)]) - for k in 1:2 - @test isequal(f(Xnan[:,k], Ynan), - [f(Xnan[:,k], Ynan[:,j]) for i in 1:1, j in axes(Ynan, 2)]) - # TODO: fix corkendall (PR#659) - if f === corspearman - @test isequal(f(Xnan, Ynan[:,k]), - [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2), j in 1:1]) - else - @test isequal(f(Xnan, Ynan[:,k]), - [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2)]) - end - end + @test isnan(f([1.0, NaN, 2.0], [2.0, 1.0, 3.4])) + @test all(isnan, f([1.0, NaN], [1 2; 3 4])) + @test all(isnan, f([1 2; 3 4], [1.0, NaN])) + @test isequal(f([1 NaN; NaN 4]), [1 NaN; NaN 1]) + @test all(isnan, f([1 NaN; NaN 4], [1 NaN; NaN 4])) + @test all(isnan, f([1 NaN; NaN 4], [NaN 1; NaN 4])) + + @test isequal(f(Xnan, Ynan), + [f(Xnan[:, i], Ynan[:, j]) for i in axes(Xnan, 2), j in axes(Ynan, 2)]) + @test isequal(f(Xnan), + [i == j ? 1.0 : f(Xnan[:, i], Xnan[:, j]) + for i in axes(Xnan, 2), j in axes(Xnan, 2)]) + for k in 1:2 + @test isequal(f(Xnan[:, k], Ynan), + [f(Xnan[:, k], Ynan[:, j]) for i in 1:1, j in axes(Ynan, 2)]) + # TODO: fix corkendall (PR#659) + if f === corspearman + @test isequal(f(Xnan, Ynan[:, k]), + [f(Xnan[:, i], Ynan[:, k]) for i in axes(Xnan, 2), j in 1:1]) + else + @test isequal(f(Xnan, Ynan[:, k]), + [f(Xnan[:, i], Ynan[:, k]) for i in axes(Xnan, 2)]) + end + end end - # Wrong dimensions @test_throws DimensionMismatch corspearman([1], [1, 2]) diff --git a/test/ranking.jl b/test/ranking.jl index fea0a48c1..01a0b0934 100644 --- a/test/ranking.jl +++ b/test/ranking.jl @@ -11,32 +11,31 @@ s = ["c", "a", "b", "d", "d", "b", "e", "d"] # s is a vector of strings ordered @test isequal(ordinalrank(xm), [4, 1, 2, 5, 6, 3, 8, 7, missing]) @test isequal(ordinalrank([missing, missing]), [missing, missing]) @test ordinalrank(s) == ordinalrank(x) -@test ordinalrank(x, rev = true) == ordinalrank(-x) -@test ordinalrank(x, lt = (x, y) -> isless(y, x)) == ordinalrank(-x) +@test ordinalrank(x, rev=true) == ordinalrank(-x) +@test ordinalrank(x, lt=(x, y) -> isless(y, x)) == ordinalrank(-x) @test competerank(a) == [1, 2, 2, 4, 5, 5, 5, 8] @test competerank(x) == [4, 1, 2, 5, 5, 2, 8, 5] @test isequal(competerank(xm), [4, 1, 2, 5, 5, 2, 8, 5, missing]) @test isequal(competerank([missing, missing]), [missing, missing]) @test competerank(s) == competerank(x) -@test competerank(x, rev = true) == competerank(-x) -@test competerank(x, lt = (x, y) -> isless(y, x)) == competerank(-x) +@test competerank(x, rev=true) == competerank(-x) +@test competerank(x, lt=(x, y) -> isless(y, x)) == competerank(-x) @test denserank(a) == [1, 2, 2, 3, 4, 4, 4, 5] @test denserank(x) == [3, 1, 2, 4, 4, 2, 5, 4] @test isequal(denserank(xm), [3, 1, 2, 4, 4, 2, 5, 4, missing]) @test isequal(denserank([missing, missing]), [missing, missing]) @test denserank(s) == denserank(x) -@test denserank(x, rev = true) == denserank(-x) -@test denserank(x, lt = (x, y) -> isless(y, x)) == denserank(-x) +@test denserank(x, rev=true) == denserank(-x) +@test denserank(x, lt=(x, y) -> isless(y, x)) == denserank(-x) @test tiedrank(a) == [1.0, 2.5, 2.5, 4.0, 6.0, 6.0, 6.0, 8.0] @test tiedrank(x) == [4.0, 1.0, 2.5, 6.0, 6.0, 2.5, 8.0, 6.0] @test isequal(tiedrank(xm), [4.0, 1.0, 2.5, 6.0, 6.0, 2.5, 8.0, 6.0, missing]) @test isequal(tiedrank([missing, missing]), [missing, missing]) @test tiedrank(s) == tiedrank(x) -@test tiedrank(x, rev = true) == tiedrank(-x) -@test tiedrank(x, lt = (x, y) -> isless(y, x)) == tiedrank(-x) +@test tiedrank(x, rev=true) == tiedrank(-x) +@test tiedrank(x, lt=(x, y) -> isless(y, x)) == tiedrank(-x) - -@test_throws DimensionMismatch StatsBase._check_randparams([1,2], [1,2], [1]) +@test_throws DimensionMismatch StatsBase._check_randparams([1, 2], [1, 2], [1]) diff --git a/test/reliability.jl b/test/reliability.jl index 916e097c4..75dd916a7 100644 --- a/test/reliability.jl +++ b/test/reliability.jl @@ -11,7 +11,7 @@ using LinearAlgebra, Random, Test @test cronbach_X isa CronbachAlpha{Float64} @test cronbach_X.alpha ≈ 0.8135593220338981 @test cronbach_X.dropped ≈ - [0.75, 0.7605633802816901, 0.7714285714285715, 0.782608695652174] + [0.75, 0.7605633802816901, 0.7714285714285715, 0.782608695652174] # testing Rational cov_rational = cov_X .// 1 @@ -19,7 +19,7 @@ using LinearAlgebra, Random, Test @test cronbach_rational isa CronbachAlpha{Rational{Int}} @test cronbach_rational.alpha == 48 // 59 @test cronbach_rational.dropped == - [3 // 4, 54 // 71, 27 // 35, 18 // 23] + [3 // 4, 54 // 71, 27 // 35, 18 // 23] # testing BigFloat cov_bigfloat = BigFloat.(cov_X) @@ -27,7 +27,7 @@ using LinearAlgebra, Random, Test @test cronbach_bigfloat isa CronbachAlpha{BigFloat} @test cronbach_bigfloat.alpha ≈ 0.8135593220338981 @test cronbach_bigfloat.dropped ≈ - [0.75, 0.7605633802816901, 0.7714285714285715, 0.782608695652174] + [0.75, 0.7605633802816901, 0.7714285714285715, 0.782608695652174] # testing corner cases @test_throws MethodError cronbachalpha([1.0, 2.0]) @@ -56,7 +56,6 @@ using LinearAlgebra, Random, Test missing 1] @test_throws MethodError cronbachalpha(cov_missing) - # testing Base.show cronbach_X = cronbachalpha(cov_X) io = IOBuffer() @@ -76,5 +75,4 @@ using LinearAlgebra, Random, Test show(io, cronbach_k2) str = String(take!(io)) @test str == "Cronbach's alpha for all items: 0.7273\n" - end # @testset "Cronbach's Alpha" diff --git a/test/robust.jl b/test/robust.jl index 59721291a..acffeb944 100644 --- a/test/robust.jl +++ b/test/robust.jl @@ -3,56 +3,55 @@ using Test, Random ### Trimming outliers -@test collect(trim([8,2,3,4,5,6,7,1], prop=0.1)) == [8,2,3,4,5,6,7,1] -@test collect(trim([8,2,3,4,5,6,7,1], prop=0.2)) == [2,3,4,5,6,7] -@test collect(trim([1,2,3,4,5,6,7,8,9], prop=0.4)) == [4,5,6] -@test collect(trim([8,7,6,5,4,3,2,1], count=1)) == [7,6,5,4,3,2] -@test collect(trim([1,2,3,4,5,6,7,8,9], count=3)) == [4,5,6] - +@test collect(trim([8, 2, 3, 4, 5, 6, 7, 1], prop=0.1)) == [8, 2, 3, 4, 5, 6, 7, 1] +@test collect(trim([8, 2, 3, 4, 5, 6, 7, 1], prop=0.2)) == [2, 3, 4, 5, 6, 7] +@test collect(trim([1, 2, 3, 4, 5, 6, 7, 8, 9], prop=0.4)) == [4, 5, 6] +@test collect(trim([8, 7, 6, 5, 4, 3, 2, 1], count=1)) == [7, 6, 5, 4, 3, 2] +@test collect(trim([1, 2, 3, 4, 5, 6, 7, 8, 9], count=3)) == [4, 5, 6] @test_throws ArgumentError trim([]) -@test_throws ArgumentError trim([1,2,3,4,5], prop=0.5) +@test_throws ArgumentError trim([1, 2, 3, 4, 5], prop=0.5) -@test collect(trim!([8,2,3,4,5,6,7,1], prop=0.1)) == [8,2,3,4,5,6,7,1] -@test collect(trim!([8,2,3,4,5,6,7,1], prop=0.2)) == [2,3,4,5,6,7] -@test collect(trim!([1,2,3,4,5,6,7,8,9], prop=0.4)) == [4,5,6] -@test collect(trim!([8,7,6,5,4,3,2,1], count=1)) == [7,6,5,4,3,2] -@test collect(trim!([1,2,3,4,5,6,7,8,9], count=3)) == [4,5,6] +@test collect(trim!([8, 2, 3, 4, 5, 6, 7, 1], prop=0.1)) == [8, 2, 3, 4, 5, 6, 7, 1] +@test collect(trim!([8, 2, 3, 4, 5, 6, 7, 1], prop=0.2)) == [2, 3, 4, 5, 6, 7] +@test collect(trim!([1, 2, 3, 4, 5, 6, 7, 8, 9], prop=0.4)) == [4, 5, 6] +@test collect(trim!([8, 7, 6, 5, 4, 3, 2, 1], count=1)) == [7, 6, 5, 4, 3, 2] +@test collect(trim!([1, 2, 3, 4, 5, 6, 7, 8, 9], count=3)) == [4, 5, 6] @test_throws ArgumentError trim!([]) -@test_throws ArgumentError trim!([1,2,3,4,5], prop=0.5) +@test_throws ArgumentError trim!([1, 2, 3, 4, 5], prop=0.5) -@test collect(winsor([8,2,3,4,5,6,7,1], prop=0.1)) == [8,2,3,4,5,6,7,1] -@test collect(winsor([8,2,3,4,5,6,7,1], prop=0.2)) == [7,2,3,4,5,6,7,2] -@test collect(winsor([1,2,3,4,5,6,7,8,9], prop=0.4)) == [4,4,4,4,5,6,6,6,6] -@test collect(winsor([1,2,3,4,5,6,7,8], count=1)) == [2,2,3,4,5,6,7,7] -@test collect(winsor([8,7,6,5,4,3,2,1], count=1)) == [7,7,6,5,4,3,2,2] -@test collect(winsor([1,2,3,4,5,6,7,8,9], count=3)) == [4,4,4,4,5,6,6,6,6] +@test collect(winsor([8, 2, 3, 4, 5, 6, 7, 1], prop=0.1)) == [8, 2, 3, 4, 5, 6, 7, 1] +@test collect(winsor([8, 2, 3, 4, 5, 6, 7, 1], prop=0.2)) == [7, 2, 3, 4, 5, 6, 7, 2] +@test collect(winsor([1, 2, 3, 4, 5, 6, 7, 8, 9], prop=0.4)) == [4, 4, 4, 4, 5, 6, 6, 6, 6] +@test collect(winsor([1, 2, 3, 4, 5, 6, 7, 8], count=1)) == [2, 2, 3, 4, 5, 6, 7, 7] +@test collect(winsor([8, 7, 6, 5, 4, 3, 2, 1], count=1)) == [7, 7, 6, 5, 4, 3, 2, 2] +@test collect(winsor([1, 2, 3, 4, 5, 6, 7, 8, 9], count=3)) == [4, 4, 4, 4, 5, 6, 6, 6, 6] @test_throws ArgumentError winsor([]) -@test_throws ArgumentError winsor([1,2,3,4,5], prop=0.5) +@test_throws ArgumentError winsor([1, 2, 3, 4, 5], prop=0.5) -@test collect(winsor!([8,2,3,4,5,6,7,1], prop=0.1)) == [8,2,3,4,5,6,7,1] -@test collect(winsor!([8,2,3,4,5,6,7,1], prop=0.2)) == [7,2,3,4,5,6,7,2] -@test collect(winsor!([1,2,3,4,5,6,7,8,9], prop=0.4)) == [4,4,4,4,5,6,6,6,6] -@test collect(winsor!([8,7,6,5,4,3,2,1], count=1)) == [7,7,6,5,4,3,2,2] -@test collect(winsor!([1,2,3,4,5,6,7,8,9], count=3)) == [4,4,4,4,5,6,6,6,6] +@test collect(winsor!([8, 2, 3, 4, 5, 6, 7, 1], prop=0.1)) == [8, 2, 3, 4, 5, 6, 7, 1] +@test collect(winsor!([8, 2, 3, 4, 5, 6, 7, 1], prop=0.2)) == [7, 2, 3, 4, 5, 6, 7, 2] +@test collect(winsor!([1, 2, 3, 4, 5, 6, 7, 8, 9], prop=0.4)) == [4, 4, 4, 4, 5, 6, 6, 6, 6] +@test collect(winsor!([8, 7, 6, 5, 4, 3, 2, 1], count=1)) == [7, 7, 6, 5, 4, 3, 2, 2] +@test collect(winsor!([1, 2, 3, 4, 5, 6, 7, 8, 9], count=3)) == [4, 4, 4, 4, 5, 6, 6, 6, 6] @test_throws ArgumentError winsor!([]) -@test_throws ArgumentError winsor!([1,2,3,4,5], prop=0.5) +@test_throws ArgumentError winsor!([1, 2, 3, 4, 5], prop=0.5) ### Variance -@test trimvar([1,1,1,1,1]) ≈ 0.0 -@test trimvar([2,3,4,5,6,7,8,9], prop=0.25) ≈ 1.0 +@test trimvar([1, 1, 1, 1, 1]) ≈ 0.0 +@test trimvar([2, 3, 4, 5, 6, 7, 8, 9], prop=0.25) ≈ 1.0 @test_throws ArgumentError trimvar([]) -@test_throws ArgumentError trimvar([1,2,3,4,5], prop=0.5) +@test_throws ArgumentError trimvar([1, 2, 3, 4, 5], prop=0.5) ### Other -@test mean(trim([-Inf,1,2,3,4], count=1)) == 2 -@test mean(winsor([-Inf,1,2,3,4], count=1)) == 2 +@test mean(trim([-Inf, 1, 2, 3, 4], count=1)) == 2 +@test mean(winsor([-Inf, 1, 2, 3, 4], count=1)) == 2 Random.seed!(1234) for n in 2100:2120, c in 0:1000 diff --git a/test/runtests.jl b/test/runtests.jl index da8212d9e..6073d0e3d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -25,8 +25,8 @@ tests = ["ambiguous", "partialcor", "transformations", # Test with JET after all other tests since it has side effects - "jet"] - #"statquiz"] + "jet"] +#"statquiz"] println("Running tests:") diff --git a/test/sampling.jl b/test/sampling.jl index cd2ff0960..5a0c53935 100644 --- a/test/sampling.jl +++ b/test/sampling.jl @@ -31,7 +31,8 @@ end #### sample with replacement -function check_sample_wrep(a::AbstractArray, vrgn, ptol::Real; ordered::Bool=false, rev::Bool=false) +function check_sample_wrep(a::AbstractArray, vrgn, ptol::Real; ordered::Bool=false, + rev::Bool=false) vmin, vmax = vrgn (amin, amax) = extrema(a) @test vmin <= amin <= amax <= vmax @@ -44,11 +45,11 @@ function check_sample_wrep(a::AbstractArray, vrgn, ptol::Real; ordered::Bool=fal end else @test !issorted(a; rev=rev) - ncols = size(a,2) + ncols = size(a, 2) if ncols == 1 @test isapprox(proportions(a, vmin:vmax), p0, atol=ptol) else - for j = 1:ncols + for j in 1:ncols aj = view(a, :, j) @test isapprox(proportions(aj, vmin:vmax), p0, atol=ptol) end @@ -72,7 +73,8 @@ test_rng_use(direct_sample!, 1:10, zeros(Int, 6)) a = sample(3:12, n) check_sample_wrep(a, (3, 12), 5.0e-3; ordered=false) -for rev in (true, false), T in (Int, Int16, Float64, Float16, BigInt, ComplexF64, Rational{Int}) +for rev in (true, false), + T in (Int, Int16, Float64, Float16, BigInt, ComplexF64, Rational{Int}) r = rev ? reverse(3:12) : (3:12) r = T===Int ? r : T.(r) aa = Int.(sample(r, n; ordered=true)) @@ -82,19 +84,19 @@ for rev in (true, false), T in (Int, Int16, Float64, Float16, BigInt, ComplexF64 check_sample_wrep(aa, (3, 12), 0; ordered=true, rev=rev) end -@test StatsBase._storeindices(1, 1, BigFloat) == StatsBase._storeindices(1, 1, BigFloat) == false +@test StatsBase._storeindices(1, 1, BigFloat) == StatsBase._storeindices(1, 1, BigFloat) == + false test_rng_use(sample, 1:10, 10) @testset "sampling pairs" begin - rng = StableRNG(1) - @test samplepair(rng, 2) === (2, 1) + @test samplepair(rng, 2) === (2, 1) @test samplepair(rng, 10) === (5, 6) @test samplepair(rng, [3, 4, 2, 6, 8]) === (3, 8) - @test samplepair(rng, [1, 2]) === (1, 2) + @test samplepair(rng, [1, 2]) === (1, 2) onetwo = samplepair(rng, UInt128(2)) @test extrema(onetwo) == (1, 2) @@ -105,7 +107,8 @@ test_rng_use(samplepair, 1000) #### sample without replacement -function check_sample_norep(a::AbstractArray, vrgn, ptol::Real; ordered::Bool=false, rev::Bool=false) +function check_sample_norep(a::AbstractArray, vrgn, ptol::Real; ordered::Bool=false, + rev::Bool=false) # each column of a for one run vmin, vmax = vrgn @@ -113,8 +116,8 @@ function check_sample_norep(a::AbstractArray, vrgn, ptol::Real; ordered::Bool=fa @test vmin <= amin <= amax <= vmax n = vmax - vmin + 1 - for j = 1:size(a,2) - aj = view(a,:,j) + for j in 1:size(a, 2) + aj = view(a, :, j) @assert allunique(aj) if ordered @assert issorted(aj, rev=rev) @@ -127,8 +130,8 @@ function check_sample_norep(a::AbstractArray, vrgn, ptol::Real; ordered::Bool=fa @test isapprox(proportions(a, vmin:vmax), p0, atol=ptol) else b = transpose(a) - for j = 1:size(b,2) - bj = view(b,:,j) + for j in 1:size(b, 2) + bj = view(b, :, j) @test isapprox(proportions(bj, vmin:vmax), p0, atol=ptol) end end @@ -139,48 +142,48 @@ import StatsBase: knuths_sample!, fisher_yates_sample!, self_avoid_sample! import StatsBase: seqsample_a!, seqsample_c!, seqsample_d! a = zeros(Int, 5, n) -for j = 1:size(a,2) - knuths_sample!(3:12, view(a,:,j)) +for j in 1:size(a, 2) + knuths_sample!(3:12, view(a, :, j)) end check_sample_norep(a, (3, 12), 5.0e-3; ordered=false) test_rng_use(knuths_sample!, 1:10, zeros(Int, 6)) a = zeros(Int, 5, n) -for j = 1:size(a,2) - fisher_yates_sample!(3:12, view(a,:,j)) +for j in 1:size(a, 2) + fisher_yates_sample!(3:12, view(a, :, j)) end check_sample_norep(a, (3, 12), 5.0e-3; ordered=false) test_rng_use(fisher_yates_sample!, 1:10, zeros(Int, 6)) a = zeros(Int, 5, n) -for j = 1:size(a,2) - self_avoid_sample!(3:12, view(a,:,j)) +for j in 1:size(a, 2) + self_avoid_sample!(3:12, view(a, :, j)) end check_sample_norep(a, (3, 12), 5.0e-3; ordered=false) test_rng_use(self_avoid_sample!, 1:10, zeros(Int, 6)) a = zeros(Int, 5, n) -for j = 1:size(a,2) - seqsample_a!(3:12, view(a,:,j)) +for j in 1:size(a, 2) + seqsample_a!(3:12, view(a, :, j)) end check_sample_norep(a, (3, 12), 5.0e-3; ordered=true) test_rng_use(seqsample_a!, 1:10, zeros(Int, 6)) a = zeros(Int, 5, n) -for j = 1:size(a,2) - seqsample_c!(3:12, view(a,:,j)) +for j in 1:size(a, 2) + seqsample_c!(3:12, view(a, :, j)) end check_sample_norep(a, (3, 12), 5.0e-3; ordered=true) test_rng_use(seqsample_c!, 1:10, zeros(Int, 6)) a = zeros(Int, 5, n) -for j = 1:size(a,2) - seqsample_d!(3:12, view(a,:,j)) +for j in 1:size(a, 2) + seqsample_d!(3:12, view(a, :, j)) end check_sample_norep(a, (3, 12), 5.0e-3; ordered=true) @@ -205,18 +208,18 @@ check_sample_norep(a, (3, 12), 0; ordered=false) # test of weighted sampling without replacement a = [1:10;] wv = Weights([zeros(6); 1:4]) -x = vcat([sample(a, wv, 1, replace=false) for j in 1:100000]...) +x = vcat([sample(a, wv, 1; replace=false) for j in 1:100000]...) @test minimum(x) == 7 @test maximum(x) == 10 @test maximum(abs, proportions(x) .- (1:4)/10) < 0.01 -x = vcat([sample(a, wv, 2, replace=false) for j in 1:50000]...) +x = vcat([sample(a, wv, 2; replace=false) for j in 1:50000]...) exact2 = [0.117261905, 0.220634921, 0.304166667, 0.357936508] @test minimum(x) == 7 @test maximum(x) == 10 @test maximum(abs, proportions(x) .- exact2) < 0.01 -x = vcat([sample(a, wv, 4, replace=false) for j in 1:10000]...) +x = vcat([sample(a, wv, 4; replace=false) for j in 1:10000]...) @test minimum(x) == 7 @test maximum(x) == 10 @test maximum(abs, proportions(x) .- 0.25) == 0 @@ -229,14 +232,13 @@ wv = Weights([zeros(5); 1:4; -1]) #### weighted sampling with dimension # weights respected; this works because of the 0-weight -@test sample([1, 2], Weights([0, 1]), (2,2)) == [2 2 ; 2 2] -wm = sample(collect(1:4), Weights(1:4), (2,2), replace=false) +@test sample([1, 2], Weights([0, 1]), (2, 2)) == [2 2; 2 2] +wm = sample(collect(1:4), Weights(1:4), (2, 2); replace=false) @test size(wm) == (2, 2) # correct shape @test length(Set(wm)) == 4 # no duplicates in elements - #### check that sample and sample! do the same thing -function test_same(;kws...) +function test_same(; kws...) wv = Weights(rand(20)) Random.seed!(1) x1 = sample(1:20, wv, 10; kws...) @@ -247,16 +249,16 @@ function test_same(;kws...) end test_same() -test_same(replace=true) -test_same(replace=false) -test_same(replace=true, ordered=true) -test_same(replace=false, ordered=true) -test_same(replace=true, ordered=false) -test_same(replace=false, ordered=false) +test_same(; replace=true) +test_same(; replace=false) +test_same(; replace=true, ordered=true) +test_same(; replace=false, ordered=true) +test_same(; replace=true, ordered=false) +test_same(; replace=false, ordered=false) @testset "validation of inputs" begin for f in (sample!, knuths_sample!, fisher_yates_sample!, self_avoid_sample!, - seqsample_a!, seqsample_c!, seqsample_d!) + seqsample_a!, seqsample_c!, seqsample_d!) x = rand(10) y = rand(10) ox = OffsetArray(x, -4:5) diff --git a/test/scalarstats.jl b/test/scalarstats.jl index eec64ad74..cdc98a9f2 100644 --- a/test/scalarstats.jl +++ b/test/scalarstats.jl @@ -7,28 +7,28 @@ using Statistics ## geomean -@test geomean([1, 2, 3]) ≈ cbrt(6.0) -@test geomean(1:3) ≈ cbrt(6.0) -@test geomean([2, 8]) ≈ 4.0 +@test geomean([1, 2, 3]) ≈ cbrt(6.0) +@test geomean(1:3) ≈ cbrt(6.0) +@test geomean([2, 8]) ≈ 4.0 @test geomean([4, 1, 1/32]) ≈ 0.5 @test geomean([1, 0, 2]) == 0.0 ## harmmean @test harmmean([1, 2, 3]) ≈ 3 / (1 + 1/2 + 1/3) -@test harmmean(1:3) ≈ 3 / (1 + 1/2 + 1/3) +@test harmmean(1:3) ≈ 3 / (1 + 1/2 + 1/3) @test harmmean([1, 2, 4]) ≈ 12 / 7 ## genmean -@test genmean([1,1,2,3], 1) ≈ 7/4 -@test genmean([1,4,2], -1) ≈ 12/7 -@test genmean([1,1,2,3], 0) ≈ (6.0)^(1/4) -@test genmean([1.2,-0.5,0], 2) ≈ sqrt(169/300) -@test genmean([16/9,0.25,1.0], 1.5) ≈ (755/648)^(2/3) +@test genmean([1, 1, 2, 3], 1) ≈ 7/4 +@test genmean([1, 4, 2], -1) ≈ 12/7 +@test genmean([1, 1, 2, 3], 0) ≈ (6.0)^(1/4) +@test genmean([1.2, -0.5, 0], 2) ≈ sqrt(169/300) +@test genmean([16/9, 0.25, 1.0], 1.5) ≈ (755/648)^(2/3) # Test numerical stability for `p` close to 0 (genmean should be close to geometric mean). -@test isapprox(genmean([1,1,2,3], -1e-8), (6.0)^(1/4), atol=1e-8) +@test isapprox(genmean([1, 1, 2, 3], -1e-8), (6.0)^(1/4), atol=1e-8) # Test numerical stability for large `p` (genmean should be close to max). -@test isapprox(genmean([0.98,1.02], 1e4), 1.02, atol=1e-4) +@test isapprox(genmean([0.98, 1.02], 1e4), 1.02, atol=1e-4) ## mode & modes @@ -51,8 +51,8 @@ wv = weights([0.1:0.1:0.7; 0.1]) @test mode(d2) == 'c' @test mode(d1, wv) == 5 @test mode(d2, wv) == 'e' -@test sort(modes(d1[1:end-1], weights(ones(7)))) == [3, 5] -@test sort(modes(d1, weights([.9, .1, .1, .1, .9, .1, .1, .1]))) == [1, 4] +@test sort(modes(d1[1:(end - 1)], weights(ones(7)))) == [3, 5] +@test sort(modes(d1, weights([0.9, 0.1, 0.1, 0.1, 0.9, 0.1, 0.1, 0.1]))) == [1, 4] @test_throws ArgumentError mode(Int[]) @test_throws ArgumentError modes(Int[]) @@ -68,93 +68,93 @@ wv = weights([0.1:0.1:0.7; 0.1]) @test zscore([-3:3;], 1.5, 0.5) == [-9.0:2.0:3.0;] a = [3 4 5 6; 7 8 1 2; 6 9 3 0] -z1 = [4. 6. 8. 10.; 5. 6. -1. 0.; 1.5 3.0 0.0 -1.5] -z2 = [8. 2. 3. 1.; 24. 10. -1. -1.; 20. 12. 1. -2.] +z1 = [4.0 6.0 8.0 10.0; 5.0 6.0 -1.0 0.0; 1.5 3.0 0.0 -1.5] +z2 = [8.0 2.0 3.0 1.0; 24.0 10.0 -1.0 -1.0; 20.0 12.0 1.0 -2.0] -@test zscore(a, [1, 2, 3], [0.5, 1.0, 2.0]) ≈ z1 +@test zscore(a, [1, 2, 3], [0.5, 1.0, 2.0]) ≈ z1 @test zscore(a, [1 3 2 4], [0.25 0.5 1.0 2.0]) ≈ z2 @test zscore!(collect(-3.0:3.0), 1.5, 0.5) == [-9.0:2.0:3.0;] -@test zscore!(float(a), [1, 2, 3], [0.5, 1.0, 2.0]) ≈ z1 +@test zscore!(float(a), [1, 2, 3], [0.5, 1.0, 2.0]) ≈ z1 @test zscore!(float(a), [1 3 2 4], [0.25 0.5 1.0 2.0]) ≈ z2 @test zscore!(zeros(7), [-3:3;], 1.5, 0.5) == [-9.0:2.0:3.0;] -@test zscore!(zeros(size(a)), a, [1, 2, 3], [0.5, 1.0, 2.0]) ≈ z1 +@test zscore!(zeros(size(a)), a, [1, 2, 3], [0.5, 1.0, 2.0]) ≈ z1 @test zscore!(zeros(size(a)), a, [1 3 2 4], [0.25 0.5 1.0 2.0]) ≈ z2 -@test zscore(a) ≈ zscore(a, mean(a), std(a)) +@test zscore(a) ≈ zscore(a, mean(a), std(a)) @test zscore(a, 1) ≈ zscore(a, mean(a, dims=1), std(a, dims=1)) @test zscore(a, 2) ≈ zscore(a, mean(a, dims=2), std(a, dims=2)) - ###### quantile & friends @test nquantile(1:5, 2) ≈ [1, 3, 5] @test nquantile(1:5, 4) ≈ [1:5;] @test nquantile(skipmissing([missing, 2, 5, missing]), 2) ≈ [2.0, 3.5, 5.0] -@test percentile([1:5;], 25) ≈ 2.0 +@test percentile([1:5;], 25) ≈ 2.0 @test percentile([1:5;], [25, 50, 75]) ≈ [2.0, 3.0, 4.0] @test percentile(skipmissing([missing, 2, 5, missing]), 25) ≈ 2.75 @test percentile(skipmissing([missing, 2, 5, missing]), [25, 50, 75]) ≈ [2.75, 3.5, 4.25] @testset "quantilerank and percentilerank" begin - @testset "value as number and array" begin - @testset ":inc and :exc" begin - v1 = [1, 1, 1, 2, 3, 4, 8, 11, 12, 13] - v2 = [1, 2, 3, 6, 6, 6, 7, 8, 9] - v3 = [1, 2, 4, 3, 4] - v4 = [1, 2, 1, 3, 4] - @test quantilerank(v1, 2, method=:inc) == 1/3 - @test quantilerank(v1, 4, method=:inc) == 5/9 - @test quantilerank(v1, 8, method=:inc) == 2/3 - @test quantilerank(v1, 5, method=:inc) == 7/12 - @test quantilerank(v2, 7, method=:exc) == 0.7 - @test quantilerank(v2, 5.43, method=:exc) == 0.381 - @test quantilerank(v3, 4, method=:exc) == 6/9 - @test quantilerank(v3, 4, method=:inc) == 3/4 - @test quantilerank(v4, 1, method=:exc) == 1/6 - @test quantilerank(v4, -100, method=:inc) == 0.0 - @test quantilerank(v4, 100, method=:inc) == 1.0 - @test quantilerank(v4, -100, method=:exc) == 0.0 - @test quantilerank(v4, 100, method=:exc) == 1.0 - @test percentilerank(v1, 2) == 100 * quantilerank(v1, 2) - @test percentilerank(v2, 7, method=:exc) == 100 * quantilerank(v2, 7, method=:exc) - end - @testset ":compete" begin - v = [0, 0, 1, 1, 2, 2, 2, 2, 4, 4] - @test quantilerank(v, 1, method=:compete) == 2/9 - @test quantilerank(v, 2, method=:compete) == 4/9 - @test quantilerank(v, 4, method=:compete) == 8/9 - @test quantilerank(v, -100, method=:compete) == 0.0 - @test quantilerank(v, 100, method=:compete) == 1.0 - end - @testset ":strict, :weak and :tied" begin - v = [7, 8, 2, 1, 3, 4, 5, 4, 6, 9] - for (method, res1, res2) in [(:tied, .4, [.4, .85]), - (:strict, .3, [.3, .8]), - (:weak, .5, [.5, .9])] - @test quantilerank(v, 4, method=method) == res1 - end - end - end - @testset "errors" begin - v1 = [1, 2, 3, 5, 6, missing, 8] - v2 = [missing, missing] - v3 = [1, 2, 3, 5, 6, NaN, 8] - v4 = [1, 2, 3, 3, 4] - for method in (:tied, :strict, :weak) - @test_throws ArgumentError quantilerank(v1, 4, method=method) - @test_throws ArgumentError quantilerank(v2, 4, method=method) - @test_throws ArgumentError quantilerank(v3, 4, method=method) - end - @test_throws ArgumentError quantilerank(v4, 3, method=:wrongargument) - @test_throws ArgumentError quantilerank(v4, NaN) - @test_throws ArgumentError quantilerank(v4, missing) - @test_throws ArgumentError quantilerank([], 3) - @test_throws ArgumentError quantilerank([1], 3) - end - end + @testset "value as number and array" begin + @testset ":inc and :exc" begin + v1 = [1, 1, 1, 2, 3, 4, 8, 11, 12, 13] + v2 = [1, 2, 3, 6, 6, 6, 7, 8, 9] + v3 = [1, 2, 4, 3, 4] + v4 = [1, 2, 1, 3, 4] + @test quantilerank(v1, 2, method=:inc) == 1/3 + @test quantilerank(v1, 4, method=:inc) == 5/9 + @test quantilerank(v1, 8, method=:inc) == 2/3 + @test quantilerank(v1, 5, method=:inc) == 7/12 + @test quantilerank(v2, 7, method=:exc) == 0.7 + @test quantilerank(v2, 5.43, method=:exc) == 0.381 + @test quantilerank(v3, 4, method=:exc) == 6/9 + @test quantilerank(v3, 4, method=:inc) == 3/4 + @test quantilerank(v4, 1, method=:exc) == 1/6 + @test quantilerank(v4, -100, method=:inc) == 0.0 + @test quantilerank(v4, 100, method=:inc) == 1.0 + @test quantilerank(v4, -100, method=:exc) == 0.0 + @test quantilerank(v4, 100, method=:exc) == 1.0 + @test percentilerank(v1, 2) == 100 * quantilerank(v1, 2) + @test percentilerank(v2, 7, method=:exc) == + 100 * quantilerank(v2, 7, method=:exc) + end + @testset ":compete" begin + v = [0, 0, 1, 1, 2, 2, 2, 2, 4, 4] + @test quantilerank(v, 1, method=:compete) == 2/9 + @test quantilerank(v, 2, method=:compete) == 4/9 + @test quantilerank(v, 4, method=:compete) == 8/9 + @test quantilerank(v, -100, method=:compete) == 0.0 + @test quantilerank(v, 100, method=:compete) == 1.0 + end + @testset ":strict, :weak and :tied" begin + v = [7, 8, 2, 1, 3, 4, 5, 4, 6, 9] + for (method, res1, res2) in [(:tied, 0.4, [0.4, 0.85]), + (:strict, 0.3, [0.3, 0.8]), + (:weak, 0.5, [0.5, 0.9])] + @test quantilerank(v, 4, method=method) == res1 + end + end + end + @testset "errors" begin + v1 = [1, 2, 3, 5, 6, missing, 8] + v2 = [missing, missing] + v3 = [1, 2, 3, 5, 6, NaN, 8] + v4 = [1, 2, 3, 3, 4] + for method in (:tied, :strict, :weak) + @test_throws ArgumentError quantilerank(v1, 4, method=method) + @test_throws ArgumentError quantilerank(v2, 4, method=method) + @test_throws ArgumentError quantilerank(v3, 4, method=method) + end + @test_throws ArgumentError quantilerank(v4, 3, method=:wrongargument) + @test_throws ArgumentError quantilerank(v4, NaN) + @test_throws ArgumentError quantilerank(v4, missing) + @test_throws ArgumentError quantilerank([], 3) + @test_throws ArgumentError quantilerank([1], 3) + end +end ##### Dispersion @@ -167,8 +167,8 @@ z2 = [8. 2. 3. 1.; 24. 10. -1. -1.; 20. 12. 1. -2.] @test isnan(variation(1)) @test variation(1; corrected=false) == 0 # Possibly deprecated -@test variation([1:5;],4) ≈ 0.4841229182759271 -@test variation([1:5;],4; corrected=false) ≈ 0.4330127018922193 +@test variation([1:5;], 4) ≈ 0.4841229182759271 +@test variation([1:5;], 4; corrected=false) ≈ 0.4330127018922193 @test @inferred(sem([1:5;])) ≈ 0.707106781186548 @test @inferred(sem(skipmissing([missing; 1:5; missing]))) ≈ 0.707106781186548 @@ -176,18 +176,18 @@ z2 = [8. 2. 3. 1.; 24. 10. -1. -1.; 20. 12. 1. -2.] @test @inferred(sem([1:5;], UnitWeights{Int}(5))) ≈ 0.707106781186548 @test @inferred(sem([1:5;], UnitWeights{Int}(5); mean=mean(1:5))) ≈ 0.707106781186548 @test_throws DimensionMismatch sem(1:5, UnitWeights{Int}(4)) -@test @inferred(sem([1:5;], ProbabilityWeights([1:5;]))) ≈ 0.6166 rtol=.001 +@test @inferred(sem([1:5;], ProbabilityWeights([1:5;]))) ≈ 0.6166 rtol=0.001 μ = mean(1:5, ProbabilityWeights([1:5;])) -@test @inferred(sem([1:5;], ProbabilityWeights([1:5;]); mean=μ)) ≈ 0.6166 rtol=.001 -@test @inferred(sem([10; 1:5;], ProbabilityWeights([0; 1:5;]); mean=μ)) ≈ 0.6166 rtol=.001 +@test @inferred(sem([1:5;], ProbabilityWeights([1:5;]); mean=μ)) ≈ 0.6166 rtol=0.001 +@test @inferred(sem([10; 1:5;], ProbabilityWeights([0; 1:5;]); mean=μ)) ≈ 0.6166 rtol=0.001 x = sort!(vcat([5:-1:i for i in 1:5]...)) μ = mean(x) @test @inferred(sem([1:5;], FrequencyWeights([1:5;]))) ≈ sem(x) @test @inferred(sem([1:5;], FrequencyWeights([1:5;]); mean=μ)) ≈ sem(x) -@inferred sem([1:5f0;]; mean=μ) ≈ sem(x) -@inferred sem([1:5f0;], ProbabilityWeights([1:5;]); mean=μ) -@inferred sem([1:5f0;], FrequencyWeights([1:5;]); mean=μ) +@inferred sem([1:5.0f0;]; mean=μ) ≈ sem(x) +@inferred sem([1:5.0f0;], ProbabilityWeights([1:5;]); mean=μ) +@inferred sem([1:5.0f0;], FrequencyWeights([1:5;]); mean=μ) # Broken: Bug to do with Statistics.jl's implementation of `var` # @inferred sem([1:5f0;], UnitWeights{Int}(5); mean=μ) @@ -195,16 +195,17 @@ x = sort!(vcat([5:-1:i for i in 1:5]...)) @test @inferred(isnan(sem(Int[], FrequencyWeights(Int[])))) @test @inferred(isnan(sem(Int[], ProbabilityWeights(Int[])))) -@test @inferred(isnan(sem(Int[]; mean=0f0))) -@test @inferred(isnan(sem(Int[], FrequencyWeights(Int[]); mean=0f0))) -@test @inferred(isnan(sem(Int[], ProbabilityWeights(Int[]); mean=0f0))) +@test @inferred(isnan(sem(Int[]; mean=0.0f0))) +@test @inferred(isnan(sem(Int[], FrequencyWeights(Int[]); mean=0.0f0))) +@test @inferred(isnan(sem(Int[], ProbabilityWeights(Int[]); mean=0.0f0))) @test @inferred(isnan(sem(skipmissing(Union{Int,Missing}[missing, missing])))) @test_throws Exception sem(Any[]) @test_throws Exception sem(skipmissing([missing])) @test mad(1:5; center=3, normalize=true) ≈ 1.4826022185056018 -@test mad(skipmissing([missing; 1:5; missing]); center=3, normalize=true) ≈ 1.4826022185056018 +@test mad(skipmissing([missing; 1:5; missing]); center=3, normalize=true) ≈ + 1.4826022185056018 @test StatsBase.mad!([1:5;]; center=3, normalize=true) ≈ 1.4826022185056018 @test mad(1:5, normalize=true) ≈ 1.4826022185056018 @test mad(1:5, normalize=false) ≈ 1.0 @@ -217,10 +218,10 @@ x = sort!(vcat([5:-1:i for i in 1:5]...)) @test mad((x for x in (1, 2.1)), normalize=false) ≈ 0.55 @test mad(Any[1, 2.1], normalize=false) ≈ 0.55 @test mad(Union{Int,Missing}[1, 2], normalize=false) ≈ 0.5 -@test_throws ArgumentError mad(Int[], normalize = true) +@test_throws ArgumentError mad(Int[], normalize=true) @test mad(Iterators.repeated(4, 10)) == 0 -@test mad(Integer[1,2,3,4]) === mad(1:4) -mad_allocs(itr) = @allocations(mad(itr; normalize = false)) +@test mad(Integer[1, 2, 3, 4]) === mad(1:4) +mad_allocs(itr) = @allocations(mad(itr; normalize=false)) let itr = (i for i in 1:10000) mad_allocs(itr) @test mad_allocs(itr) < 50 @@ -231,7 +232,7 @@ end @test iqr(1:5) ≈ 2.0 -nutrient = readdlm(joinpath(@__DIR__, "data", "nutrient.txt"))[:,2:end] +nutrient = readdlm(joinpath(@__DIR__, "data", "nutrient.txt"))[:, 2:end] @test @inferred(genvar(nutrient)) ≈ 2.8310418e19 rtol=1e-6 @test @inferred(totalvar(nutrient)) ≈ 2.83266877e6 rtol=1e-6 @@ -247,18 +248,17 @@ x = rand(Float32, 10) it = (xᵢ for xᵢ in x) @test genvar(it) == totalvar(it) == var(it) - ##### entropy -@test @inferred(entropy([0.5, 0.5])) ≈ 0.6931471805599453 -@test @inferred(entropy([1//2, 1//2])) ≈ 0.6931471805599453 +@test @inferred(entropy([0.5, 0.5])) ≈ 0.6931471805599453 +@test @inferred(entropy([1//2, 1//2])) ≈ 0.6931471805599453 @test @inferred(entropy([0.5f0, 0.5f0])) isa Float32 @test @inferred(entropy([0.2, 0.3, 0.5])) ≈ 1.0296530140645737 @test iszero(@inferred(entropy([0, 1]))) @test iszero(@inferred(entropy([0.0, 1.0]))) -@test @inferred(entropy([0.5, 0.5], 2)) ≈ 1.0 -@test @inferred(entropy([1//2, 1//2], 2)) ≈ 1.0 +@test @inferred(entropy([0.5, 0.5], 2)) ≈ 1.0 +@test @inferred(entropy([1//2, 1//2], 2)) ≈ 1.0 @test @inferred(entropy([0.2, 0.3, 0.5], 2)) ≈ 1.4854752972273344 # issue #924 @@ -275,7 +275,7 @@ dist = rand(nindiv) dist /= sum(dist) # Check Shannon entropy against Renyi entropy of order 1 -@test entropy(dist) ≈ renyientropy(dist, 1) +@test entropy(dist) ≈ renyientropy(dist, 1) @test renyientropy(dist, 1) ≈ renyientropy(dist, 1.0) # Check Renyi entropy of order 0 is the natural log of the count of non-zeros @@ -301,35 +301,37 @@ udist = ones(nindiv) / nindiv # And test generalised probability distributions (sum(p) != 1) scale = rand() -@test renyientropy(udist * scale, 0) ≈ renyientropy(udist, 0) - log(scale) -@test renyientropy(udist * scale, 1) ≈ renyientropy(udist, 1) - log(scale) -@test renyientropy(udist * scale, Inf) ≈ renyientropy(udist, Inf) - log(scale) +@test renyientropy(udist * scale, 0) ≈ renyientropy(udist, 0) - log(scale) +@test renyientropy(udist * scale, 1) ≈ renyientropy(udist, 1) - log(scale) +@test renyientropy(udist * scale, Inf) ≈ renyientropy(udist, Inf) - log(scale) @test renyientropy(udist * scale, order) ≈ renyientropy(udist, order) - log(scale) ##### Cross entropy -@test @inferred(crossentropy([0.2, 0.3, 0.5], [0.3, 0.4, 0.3])) ≈ 1.1176681825904018 +@test @inferred(crossentropy([0.2, 0.3, 0.5], [0.3, 0.4, 0.3])) ≈ 1.1176681825904018 @test @inferred(crossentropy([1//5, 3//10, 1//2], [0.3, 0.4, 0.3])) ≈ 1.1176681825904018 @test @inferred(crossentropy([1//5, 3//10, 1//2], [0.3f0, 0.4f0, 0.3f0])) isa Float32 -@test @inferred(crossentropy([0.2, 0.3, 0.5], [0.3, 0.4, 0.3], 2)) ≈ 1.6124543443825532 +@test @inferred(crossentropy([0.2, 0.3, 0.5], [0.3, 0.4, 0.3], 2)) ≈ 1.6124543443825532 @test @inferred(crossentropy([1//5, 3//10, 1//2], [0.3, 0.4, 0.3], 2)) ≈ 1.6124543443825532 -@test @inferred(crossentropy([1//5, 3//10, 1//2], [0.3f0, 0.4f0, 0.3f0], 2f0)) isa Float32 +@test @inferred(crossentropy([1//5, 3//10, 1//2], [0.3f0, 0.4f0, 0.3f0], 2.0f0)) isa Float32 # deprecated, should throw an `ArgumentError` at some point -logpattern = (:warn, "support for empty collections will be removed since they do not represent proper probability distributions") +logpattern = (:warn, + "support for empty collections will be removed since they do not represent proper probability distributions") @test iszero(@test_logs logpattern @inferred(crossentropy(Float64[], Float64[]))) @test iszero(@test_logs logpattern @inferred(crossentropy(Int[], Int[]))) ##### KL divergence -@test @inferred(kldivergence([0.2, 0.3, 0.5], [0.3, 0.4, 0.3])) ≈ 0.08801516852582819 +@test @inferred(kldivergence([0.2, 0.3, 0.5], [0.3, 0.4, 0.3])) ≈ 0.08801516852582819 @test @inferred(kldivergence([1//5, 3//10, 1//2], [0.3, 0.4, 0.3])) ≈ 0.08801516852582819 @test @inferred(kldivergence([1//5, 3//10, 1//2], [0.3f0, 0.4f0, 0.3f0])) isa Float32 -@test @inferred(kldivergence([0.2, 0.3, 0.5], [0.3, 0.4, 0.3], 2)) ≈ 0.12697904715521868 +@test @inferred(kldivergence([0.2, 0.3, 0.5], [0.3, 0.4, 0.3], 2)) ≈ 0.12697904715521868 @test @inferred(kldivergence([1//5, 3//10, 1//2], [0.3, 0.4, 0.3], 2)) ≈ 0.12697904715521868 -@test @inferred(kldivergence([1//5, 3//10, 1//2], [0.3f0, 0.4f0, 0.3f0], 2f0)) isa Float32 -@test iszero(@inferred(kldivergence([0, 1], [0f0, 1f0]))) +@test @inferred(kldivergence([1//5, 3//10, 1//2], [0.3f0, 0.4f0, 0.3f0], 2.0f0)) isa Float32 +@test iszero(@inferred(kldivergence([0, 1], [0.0f0, 1.0f0]))) # deprecated, should throw an `ArgumentError` at some point -logpattern = (:warn, "support for empty collections will be removed since they do not represent proper probability distributions") +logpattern = (:warn, + "support for empty collections will be removed since they do not represent proper probability distributions") @test iszero(@test_logs logpattern @inferred(kldivergence(Float64[], Float64[]))) @test iszero(@test_logs logpattern @inferred(kldivergence(Int[], Int[]))) @@ -339,39 +341,39 @@ s = summarystats(1:5) @test isa(s, StatsBase.SummaryStats) @test s.min == 1.0 @test s.max == 5.0 -@test s.nobs == 5 -@test s.nmiss == 0 -@test s.mean ≈ 3.0 +@test s.nobs == 5 +@test s.nmiss == 0 +@test s.mean ≈ 3.0 @test s.median ≈ 3.0 -@test s.q25 ≈ 2.0 -@test s.q75 ≈ 4.0 -@test s.sd ≈ 1.5811388300841898 +@test s.q25 ≈ 2.0 +@test s.q75 ≈ 4.0 +@test s.sd ≈ 1.5811388300841898 # Issue #631 s = summarystats([-2, -1, 0, 1, 2, missing]) @test isa(s, StatsBase.SummaryStats) @test s.min == -2.0 @test s.max == 2.0 -@test s.nobs == 6 -@test s.nmiss == 1 -@test s.mean ≈ 0.0 +@test s.nobs == 6 +@test s.nmiss == 1 +@test s.mean ≈ 0.0 @test s.median ≈ 0.0 -@test s.q25 ≈ -1.0 -@test s.q75 ≈ +1.0 -@test s.sd ≈ 1.5811388300841898 +@test s.q25 ≈ -1.0 +@test s.q75 ≈ +1.0 +@test s.sd ≈ 1.5811388300841898 # Issue #631 s = summarystats(zeros(10)) @test isa(s, StatsBase.SummaryStats) @test s.min == 0.0 @test s.max == 0.0 -@test s.nobs == 10 -@test s.nmiss == 0 -@test s.mean ≈ 0.0 +@test s.nobs == 10 +@test s.nmiss == 0 +@test s.mean ≈ 0.0 @test s.median ≈ 0.0 -@test s.q25 ≈ 0.0 -@test s.q75 ≈ 0.0 -@test s.sd ≈ 0.0 +@test s.q25 ≈ 0.0 +@test s.q75 ≈ 0.0 +@test s.sd ≈ 0.0 # Issue #631 s = summarystats(Union{Float64,Missing}[missing, missing]) diff --git a/test/signalcorr.jl b/test/signalcorr.jl index bfbe90fed..973685496 100644 --- a/test/signalcorr.jl +++ b/test/signalcorr.jl @@ -9,16 +9,16 @@ using Test # random data for testing -x = [-2.133252557240862 -.7445937365828654; - .1775816414485478 -.5834801838041446; - -.6264517920318317 -.68444205333293; - -.8809042583216906 .9071671734302398; - .09251017186697393 -1.0404476733379926; - -.9271887119115569 -.620728578941385; - 3.355819743178915 -.8325051361909978; - -.2834039258495755 -.22394811874731657; - .5354280026977677 .7481337671592626; - .39182285417742585 .3085762550821047] +x = [-2.133252557240862 -0.7445937365828654; + 0.1775816414485478 -0.5834801838041446; + -0.6264517920318317 -0.68444205333293; + -0.8809042583216906 0.9071671734302398; + 0.09251017186697393 -1.0404476733379926; + -0.9271887119115569 -0.620728578941385; + 3.355819743178915 -0.8325051361909978; + -0.2834039258495755 -0.22394811874731657; + 0.5354280026977677 0.7481337671592626; + 0.39182285417742585 0.3085762550821047] x1 = view(x, :, 1) x2 = view(x, :, 2) @@ -28,53 +28,52 @@ realx2 = convert(AbstractVector{Real}, x2) # autocov & autocorr -@test autocov([1:5;]) ≈ [2.0, 0.8, -0.2, -0.8, -0.8] +@test autocov([1:5;]) ≈ [2.0, 0.8, -0.2, -0.8, -0.8] @test autocor([1, 2, 3, 4, 5]) ≈ [1.0, 0.4, -0.1, -0.4, -0.4] -racovx1 = [1.839214242630635709475, +racovx1 = [1.839214242630635709475, -0.406784553146903871124, - 0.421772254824993531042, - 0.035874943792884653182, + 0.421772254824993531042, + 0.035874943792884653182, -0.255679775928512320604, - 0.231154400105831353551, + 0.231154400105831353551, -0.787016960267425180753, - 0.039909287349160660341, + 0.039909287349160660341, -0.110149697877911914579, -0.088687020167434751916] @test autocov(x1) ≈ racovx1 @test autocov(realx1) ≈ racovx1 -@test autocov(x) ≈ [autocov(x1) autocov(x2)] -@test autocov(realx) ≈ [autocov(realx1) autocov(realx2)] +@test autocov(x) ≈ [autocov(x1) autocov(x2)] +@test autocov(realx) ≈ [autocov(realx1) autocov(realx2)] racorx1 = [0.999999999999999888978, - -0.221173011668873431557, + -0.221173011668873431557, 0.229321981664153962122, 0.019505581764945757045, - -0.139015765538446717242, + -0.139015765538446717242, 0.125681062460244019618, - -0.427909344123907742219, + -0.427909344123907742219, 0.021699096507690283225, - -0.059889541590524189574, - -0.048220059475281865091] + -0.059889541590524189574, + -0.048220059475281865091] @test autocor(x1) ≈ racorx1 @test autocor(realx1) ≈ racorx1 -@test autocor(x) ≈ [autocor(x1) autocor(x2)] -@test autocor(realx) ≈ [autocor(realx1) autocor(realx2)] - +@test autocor(x) ≈ [autocor(x1) autocor(x2)] +@test autocor(realx) ≈ [autocor(realx1) autocor(realx2)] # crosscov & crosscor rcov0 = [0.320000000000000006661, - -0.319999999999999951150, + -0.319999999999999951150, 0.080000000000000029421, - -0.479999999999999982236, + -0.479999999999999982236, 0.000000000000000000000, 0.479999999999999982236, - -0.080000000000000029421, + -0.080000000000000029421, 0.319999999999999951150, - -0.320000000000000006661] + -0.320000000000000006661] @test crosscov([1, 2, 3, 4, 5], [1, -1, 1, -1, 1]) ≈ rcov0 @test crosscov([1:5;], [1:5;]) ≈ [-0.8, -0.8, -0.2, 0.8, 2.0, 0.8, -0.2, -0.8, -0.8] @@ -85,27 +84,27 @@ c21 = crosscov(x2, x1) c22 = crosscov(x2, x2) @test crosscov(realx1, realx2) ≈ c12 -@test crosscov(x, x1) ≈ [c11 c21] +@test crosscov(x, x1) ≈ [c11 c21] @test crosscov(realx, realx1) ≈ [c11 c21] -@test crosscov(x1, x) ≈ [c11 c12] -@test crosscov(realx1, realx) ≈ [c11 c12] -@test crosscov(x, x) ≈ cat([c11 c21], [c12 c22], dims=3) -@test crosscov(realx, realx) ≈ cat([c11 c21], [c12 c22], dims=3) +@test crosscov(x1, x) ≈ [c11 c12] +@test crosscov(realx1, realx) ≈ [c11 c12] +@test crosscov(x, x) ≈ cat([c11 c21], [c12 c22], dims=3) +@test crosscov(realx, realx) ≈ cat([c11 c21], [c12 c22], dims=3) # issue #805: avoid converting one input to the other's eltype @test crosscov([34566.5345, 3466.4566], Float16[1, 10]) ≈ - crosscov(Float16[1, 10], [34566.5345, 3466.4566]) ≈ - crosscov([34566.5345, 3466.4566], Float16[1, 10]) + crosscov(Float16[1, 10], [34566.5345, 3466.4566]) ≈ + crosscov([34566.5345, 3466.4566], Float16[1, 10]) rcor0 = [0.230940107675850, - -0.230940107675850, + -0.230940107675850, 0.057735026918963, - -0.346410161513775, + -0.346410161513775, 0.000000000000000, 0.346410161513775, - -0.057735026918963, + -0.057735026918963, 0.230940107675850, - -0.230940107675850] + -0.230940107675850] @test crosscor([1, 2, 3, 4, 5], [1, -1, 1, -1, 1]) ≈ rcor0 @test crosscor([1:5;], [1:5;]) ≈ [-0.4, -0.4, -0.1, 0.4, 1.0, 0.4, -0.1, -0.4, -0.4] @@ -116,31 +115,30 @@ c21 = crosscor(x2, x1) c22 = crosscor(x2, x2) @test crosscor(realx1, realx2) ≈ c12 -@test crosscor(x, x1) ≈ [c11 c21] +@test crosscor(x, x1) ≈ [c11 c21] @test crosscor(realx, realx1) ≈ [c11 c21] -@test crosscor(x1, x) ≈ [c11 c12] -@test crosscor(realx1, realx) ≈ [c11 c12] -@test crosscor(x, x) ≈ cat([c11 c21], [c12 c22], dims=3) -@test crosscor(realx, realx) ≈ cat([c11 c21], [c12 c22], dims=3) +@test crosscor(x1, x) ≈ [c11 c12] +@test crosscor(realx1, realx) ≈ [c11 c12] +@test crosscor(x, x) ≈ cat([c11 c21], [c12 c22], dims=3) +@test crosscor(realx, realx) ≈ cat([c11 c21], [c12 c22], dims=3) # issue #805: avoid converting one input to the other's eltype @test crosscor([34566.5345, 3466.4566], Float16[1, 10]) ≈ - crosscor(Float16[1, 10], [34566.5345, 3466.4566]) ≈ - crosscor([34566.5345, 3466.4566], Float16[1, 10]) - + crosscor(Float16[1, 10], [34566.5345, 3466.4566]) ≈ + crosscor([34566.5345, 3466.4566], Float16[1, 10]) ## pacf -rpacfr = [-0.218158122381419, - 0.195015316828711, - 0.144315804606139, - -0.199791229449779] +rpacfr = [-0.218158122381419, + 0.195015316828711, + 0.144315804606139, + -0.199791229449779] -@test pacf(x[:,1], 1:4) ≈ rpacfr +@test pacf(x[:, 1], 1:4) ≈ rpacfr rpacfy = [-0.221173011668873, - 0.189683314308021, - 0.111857020733719, + 0.189683314308021, + 0.111857020733719, -0.175020669835420] -@test pacf(x[:,1], 1:4, method=:yulewalker) ≈ rpacfy +@test pacf(x[:, 1], 1:4, method=:yulewalker) ≈ rpacfy diff --git a/test/statmodels.jl b/test/statmodels.jl index 70d864bec..9f75e01b2 100644 --- a/test/statmodels.jl +++ b/test/statmodels.jl @@ -48,18 +48,15 @@ x3 1.56734e-13 Bad 2 0.00 <1e-15 @test length(ct) === 3 @test eltype(ct) == - NamedTuple{(:Name, :Estimate, :Comments, :df, :t, :p), - Tuple{String,Float64,String,Int,Float64,Float64}} -@test collect(ct) == [ - (Name = "x1", Estimate = 1.45666, Comments = "Good", df = 1, t = -12.56, p = 0.12) - (Name = "x2", Estimate = -23.14, Comments = "Great", df = 56, t = 0.1326, p = 0.3467) - (Name = "x3", Estimate = 1.56734e-13, Comments = "Bad", df = 2, t = 2.68e-16, p = 1.345e-16) -] - - -m = [0.11258244478647295 0.05664544616214151 0.38181274408522614 0.8197779704008801 - 0.36831406658084287 0.12078054506961555 0.8151038332483567 0.6699313951612162 - 0.3444540231363058 0.17957407667101322 0.2422083248151139 0.4530583319523316] + NamedTuple{(:Name, :Estimate, :Comments, :df, :t, :p), + Tuple{String,Float64,String,Int,Float64,Float64}} +@test collect(ct) == [(Name="x1", Estimate=1.45666, Comments="Good", df=1, t=-12.56, p=0.12) + (Name="x2", Estimate=-23.14, Comments="Great", df=56, t=0.1326, p=0.3467) + (Name="x3", Estimate=1.56734e-13, Comments="Bad", df=2, t=2.68e-16, p=1.345e-16)] + +m = [0.11258244478647295 0.05664544616214151 0.38181274408522614 0.8197779704008801 + 0.36831406658084287 0.12078054506961555 0.8151038332483567 0.6699313951612162 + 0.3444540231363058 0.17957407667101322 0.2422083248151139 0.4530583319523316] ct = CoefTable(m, ["Estimate", "Stderror", "df", "p"], [], 4) @test sprint(show, "text/plain", ct) == """ ────────────────────────────────────────── @@ -71,16 +68,14 @@ ct = CoefTable(m, ["Estimate", "Stderror", "df", "p"], [], 4) ──────────────────────────────────────────""" @test length(ct) === 3 @test eltype(ct) == - NamedTuple{(:Estimate, :Stderror, :df, :p), - Tuple{Float64,Float64,Float64,Float64}} -@test collect(ct) == [ - (Estimate = 0.11258244478647295, Stderror = 0.05664544616214151, - df = 0.38181274408522614, p = 0.8197779704008801) - (Estimate = 0.36831406658084287, Stderror = 0.12078054506961555, - df = 0.8151038332483567, p = 0.6699313951612162) - (Estimate = 0.3444540231363058, Stderror = 0.17957407667101322, - df = 0.2422083248151139, p = 0.4530583319523316) -] + NamedTuple{(:Estimate, :Stderror, :df, :p), + Tuple{Float64,Float64,Float64,Float64}} +@test collect(ct) == [(Estimate=0.11258244478647295, Stderror=0.05664544616214151, + df=0.38181274408522614, p=0.8197779704008801) + (Estimate=0.36831406658084287, Stderror=0.12078054506961555, + df=0.8151038332483567, p=0.6699313951612162) + (Estimate=0.3444540231363058, Stderror=0.17957407667101322, + df=0.2422083248151139, p=0.4530583319523316)] @test sprint(show, PValue(1.0)) == "1.0000" @test sprint(show, PValue(1e-1)) == "0.1000" @@ -95,7 +90,7 @@ ct = CoefTable(m, ["Estimate", "Stderror", "df", "p"], [], 4) @test sprint(show, TestStat(π)) == "3.14" @testset "Union{PValue, TestStat} is Real" begin - vals = [0.0, Rational(1,3), NaN] + vals = [0.0, Rational(1, 3), NaN] for T in [PValue, TestStat], f in (==, <, ≤, >, ≥, isless, isequal), lhs in vals, rhs in vals @@ -108,24 +103,25 @@ ct = CoefTable(m, ["Estimate", "Stderror", "df", "p"], [], 4) # the (approximate) equality operators get a bit more attention for T in [PValue, TestStat] - @test T(Rational(1,3)) ≈ T(1/3) - @test Rational(1,3) ≈ T(1/3) atol=0.01 - @test T(Rational(1,3)) isa Real + @test T(Rational(1, 3)) ≈ T(1/3) + @test Rational(1, 3) ≈ T(1/3) atol=0.01 + @test T(Rational(1, 3)) isa Real @test T(T(0.05)) === T(0.05) @test hash(T(0.05)) == hash(0.05) @test hash(T(0.05), UInt(42)) == hash(0.05, UInt(42)) end end -@test sprint(showerror, ConvergenceException(10)) == "failure to converge after 10 iterations." +@test sprint(showerror, ConvergenceException(10)) == + "failure to converge after 10 iterations." @test sprint(showerror, ConvergenceException(10, 0.2, 0.1)) == - "failure to converge after 10 iterations. Last change (0.2) was greater than tolerance (0.1)." + "failure to converge after 10 iterations. Last change (0.2) was greater than tolerance (0.1)." @test sprint(showerror, ConvergenceException(10, 0.2, 0.1, "Try changing maxIter.")) == - "failure to converge after 10 iterations. Last change (0.2) was greater than tolerance (0.1). Try changing maxIter." + "failure to converge after 10 iterations. Last change (0.2) was greater than tolerance (0.1). Try changing maxIter." -err = @test_throws ArgumentError ConvergenceException(10,.1,.2) +err = @test_throws ArgumentError ConvergenceException(10, 0.1, 0.2) @test err.value.msg == "Change must be greater than tol." struct MyStatisticalModel <: StatisticalModel @@ -151,11 +147,11 @@ StatsAPI.nobs(::MyStatisticalModel) = 100 @test r2(m, :Nagelkerke) ≈ 0.24255074155803877 @test r2(m, :devianceratio) ≈ 0.375 - @test_throws Union{ErrorException, ArgumentError} r2(m, :err) + @test_throws Union{ErrorException,ArgumentError} r2(m, :err) @test_throws MethodError r2(m) @test adjr2(m, :McFadden) ≈ 1.5 @test adjr2(m, :devianceratio) ≈ 0.3486842105263158 - @test_throws Union{ErrorException, ArgumentError} adjr2(m, :err) + @test_throws Union{ErrorException,ArgumentError} adjr2(m, :err) @test r2 === r² @test adjr2 === adjr² @@ -186,4 +182,4 @@ end # Defined but not reexported @test StatsBase.params isa Function @test StatsBase.params! isa Function -end \ No newline at end of file +end diff --git a/test/statquiz.jl b/test/statquiz.jl index 6e91453a9..d73cdc749 100644 --- a/test/statquiz.jl +++ b/test/statquiz.jl @@ -9,20 +9,23 @@ using Printf testeps = sqrt(eps()) -nasty = DataFrame( label = ["One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine"], - x = collect(1.:9), - zero = fill(0.0,9), - miss = fill(NA, 9), - big = 99999990.0 + collect(1:9), - little = (99999990.0 + collect(1:9))/10^8, - huge = collect(1.:9)*1e12, - tiny = collect(1.:9)*1e-12, - round = collect(0.5:8.5)) +nasty = DataFrame(; + label=["One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", + "Nine"], + x=collect(1.0:9), + zero=fill(0.0, 9), + miss=fill(NA, 9), + big=99999990.0 + collect(1:9), + little=(99999990.0 + collect(1:9))/10^8, + huge=collect(1.0:9)*1e12, + tiny=collect(1.0:9)*1e-12, + round=collect(0.5:8.5)) println(nasty) println("\nII Real Numbers:\nII A") print("Test rounding: ") -@test [@sprintf("%1.0f", x) for x in nasty[:round]] == ["1","2","3","4","5","6","7","8","9"] +@test [@sprintf("%1.0f", x) for x in nasty[:round]] == + ["1", "2", "3", "4", "5", "6", "7", "8", "9"] println("OK") print("Test math: ") @test round(Int, 2.6*7 - 0.2) == 18 @@ -33,16 +36,16 @@ println("OK") print("Test means: ") for vars in names(nasty)[2:end] if vars == :miss - @test isna(mean(nasty[vars])) + @test isna(mean(nasty[vars])) else - @test mean(nasty[vars]) ≈ nasty[vars][5] + @test mean(nasty[vars]) ≈ nasty[vars][5] end end println("OK") print("Test standard deviation: ") -for vars in names(nasty)[[2;5:9]] -# @test (@sprintf("%.9e", std(nasty[vars])))[1:10] == "2.73861278" +for vars in names(nasty)[[2; 5:9]] + # @test (@sprintf("%.9e", std(nasty[vars])))[1:10] == "2.73861278" @test repr(std(nasty[vars]))[1:10] == "2.73861278" end println("OK") @@ -50,19 +53,19 @@ println("OK") println("\nII D") print("Test correlation: ") -cn = names(nasty)[[2;5:9]] +cn = names(nasty)[[2; 5:9]] for i in 1:5 - for j = i+1:6 - @test cor(nasty[cn[i]], nasty[cn[j]]) ≈ 1 + for j in (i + 1):6 + @test cor(nasty[cn[i]], nasty[cn[j]]) ≈ 1 end end println("OK") print("Test spearman correlation: ") -cn = names(nasty)[[2;5:9]] +cn = names(nasty)[[2; 5:9]] for i in 1:5 - for j = i+1:6 - @test corspearman(nasty[cn[i]], nasty[cn[j]]) ≈ 1 + for j in (i + 1):6 + @test corspearman(nasty[cn[i]], nasty[cn[j]]) ≈ 1 end end println("OK") @@ -84,17 +87,17 @@ println("OK") println("\nIV Regression:\nIV A") nasty[:x1] = nasty[:x] -nasty[:x2] = nasty[:x].^2 -nasty[:x3] = nasty[:x].^3 -nasty[:x4] = nasty[:x].^4 -nasty[:x5] = nasty[:x].^5 -nasty[:x6] = nasty[:x].^6 -nasty[:x7] = nasty[:x].^7 -nasty[:x8] = nasty[:x].^8 -nasty[:x9] = nasty[:x].^9 +nasty[:x2] = nasty[:x] .^ 2 +nasty[:x3] = nasty[:x] .^ 3 +nasty[:x4] = nasty[:x] .^ 4 +nasty[:x5] = nasty[:x] .^ 5 +nasty[:x6] = nasty[:x] .^ 6 +nasty[:x7] = nasty[:x] .^ 7 +nasty[:x8] = nasty[:x] .^ 8 +nasty[:x9] = nasty[:x] .^ 9 ## Is it intended that the least squares problem be overdetermined in the lm fit? ## n = 9 and p = 10 because of the implicit intercept. lm(x1~x2+x3+x4+x5+x6+x7+x8+x9, nasty) -@test coef(lm(x~x, nasty)) ≈ [0,1] +@test coef(lm(x~x, nasty)) ≈ [0, 1] println("OK") diff --git a/test/transformations.jl b/test/transformations.jl index 7d8e2b0a9..ee3f4ed9c 100644 --- a/test/transformations.jl +++ b/test/transformations.jl @@ -178,5 +178,4 @@ using Test @test isequal(X, Y) @test reconstruct!(t, Y) === Y @test Y ≈ X_ - end diff --git a/test/weights.jl b/test/weights.jl index c99f31c92..a60e5554c 100644 --- a/test/weights.jl +++ b/test/weights.jl @@ -1,650 +1,644 @@ using StatsBase using LinearAlgebra, Random, SparseArrays, Test - # minimal custom weights type for tests below -struct MyWeights <: AbstractWeights{Float64, Float64, Vector{Float64}} +struct MyWeights <: AbstractWeights{Float64,Float64,Vector{Float64}} values::Vector{Float64} sum::Float64 end MyWeights(values) = MyWeights(values, sum(values)) - @testset "StatsBase.Weights" begin -weight_funcs = (weights, aweights, fweights, pweights) - -## Construction - -@testset "$f" for f in weight_funcs - @test isa(f([1, 2, 3]), AbstractWeights{Int}) - @test isa(f([1., 2., 3.]), AbstractWeights{Float64}) - @test isa(f([1 2 3; 4 5 6]), AbstractWeights{Int}) - - @test isempty(f(Float64[])) - @test size(f([1, 2, 3])) == (3,) - @test axes(f([1, 2, 3])) == (Base.OneTo(3),) - @test IndexStyle(f([1, 2, 3])) == IndexLinear() - - w = [1., 2., 3.] - wv = f(w) - @test eltype(wv) === Float64 - @test length(wv) === 3 - @test wv == w - @test sum(wv) === 6.0 - @test !isempty(wv) - @test Base.mightalias(w, wv) - @test !Base.mightalias([1], wv) - - b = trues(3) - bv = f(b) - @test eltype(bv) === Bool - @test length(bv) === 3 - @test convert(Vector, bv) == b - @test sum(bv) === 3 - @test !isempty(bv) - - ba = BitArray([true, false, true]) - sa = sparsevec([1., 0., 2.]) - - @test sum(ba, wv) === 4.0 - @test sum(sa, wv) === 7.0 - - @test_throws ArgumentError f([0.1, Inf]) - @test_throws ArgumentError f([0.1, NaN]) + weight_funcs = (weights, aweights, fweights, pweights) + + ## Construction + + @testset "$f" for f in weight_funcs + @test isa(f([1, 2, 3]), AbstractWeights{Int}) + @test isa(f([1.0, 2.0, 3.0]), AbstractWeights{Float64}) + @test isa(f([1 2 3; 4 5 6]), AbstractWeights{Int}) + + @test isempty(f(Float64[])) + @test size(f([1, 2, 3])) == (3,) + @test axes(f([1, 2, 3])) == (Base.OneTo(3),) + @test IndexStyle(f([1, 2, 3])) == IndexLinear() + + w = [1.0, 2.0, 3.0] + wv = f(w) + @test eltype(wv) === Float64 + @test length(wv) === 3 + @test wv == w + @test sum(wv) === 6.0 + @test !isempty(wv) + @test Base.mightalias(w, wv) + @test !Base.mightalias([1], wv) + + b = trues(3) + bv = f(b) + @test eltype(bv) === Bool + @test length(bv) === 3 + @test convert(Vector, bv) == b + @test sum(bv) === 3 + @test !isempty(bv) + + ba = BitArray([true, false, true]) + sa = sparsevec([1.0, 0.0, 2.0]) + + @test sum(ba, wv) === 4.0 + @test sum(sa, wv) === 7.0 + + @test_throws ArgumentError f([0.1, Inf]) + @test_throws ArgumentError f([0.1, NaN]) + end -end + @testset "$f, setindex!" for f in weight_funcs + w = [1.0, 2.0, 3.0] + wv = f(w) + + # Check getindex & sum + @test wv[1] === 1.0 + @test sum(wv) === 6.0 + @test wv == w + + # Test setindex! success + @test (wv[1] = 4) === 4 # setindex! returns original val + @test wv[1] === 4.0 # value correctly converted and set + @test sum(wv) === 9.0 # sum updated + @test wv == [4.0, 2.0, 3.0] # Test state of all values + + # Test mulivalue setindex! + wv[1:2] = [3.0, 5.0] + @test wv[1] === 3.0 + @test wv[2] === 5.0 + @test sum(wv) === 11.0 + @test wv == [3.0, 5.0, 3.0] # Test state of all values + + @test_throws ArgumentError wv[1] = Inf + @test_throws ArgumentError wv[1] = NaN + + # Test failed setindex! due to conversion error + w = [1, 2, 3] + wv = f(w) + + @test_throws InexactError wv[1] = 1.5 # Returns original value + @test wv[1] === 1 # value not updated + @test sum(wv) === 6 # sum not corrupted + @test wv == [1, 2, 3] # Test state of all values + end -@testset "$f, setindex!" for f in weight_funcs - w = [1., 2., 3.] - wv = f(w) - - # Check getindex & sum - @test wv[1] === 1. - @test sum(wv) === 6. - @test wv == w - - # Test setindex! success - @test (wv[1] = 4) === 4 # setindex! returns original val - @test wv[1] === 4. # value correctly converted and set - @test sum(wv) === 9. # sum updated - @test wv == [4., 2., 3.] # Test state of all values - - # Test mulivalue setindex! - wv[1:2] = [3., 5.] - @test wv[1] === 3. - @test wv[2] === 5. - @test sum(wv) === 11. - @test wv == [3., 5., 3.] # Test state of all values - - @test_throws ArgumentError wv[1] = Inf - @test_throws ArgumentError wv[1] = NaN - - # Test failed setindex! due to conversion error - w = [1, 2, 3] - wv = f(w) - - @test_throws InexactError wv[1] = 1.5 # Returns original value - @test wv[1] === 1 # value not updated - @test sum(wv) === 6 # sum not corrupted - @test wv == [1, 2, 3] # Test state of all values -end + @testset "$f, isequal and ==" for f in weight_funcs + x = f([1, 2, 3]) -@testset "$f, isequal and ==" for f in weight_funcs - x = f([1, 2, 3]) + y = f([1, 2, 3]) # same values, type and parameters + @test isequal(x, y) + @test x == y - y = f([1, 2, 3]) # same values, type and parameters - @test isequal(x, y) - @test x == y + y = f([1.0, 2.0, 3.0]) # same values and type, different parameters + @test isequal(x, y) + @test x == y - y = f([1.0, 2.0, 3.0]) # same values and type, different parameters - @test isequal(x, y) - @test x == y + if f != fweights # same values and parameters, different types + y = fweights([1, 2, 3]) + @test !isequal(x, y) + @test x != y + end - if f != fweights # same values and parameters, different types - y = fweights([1, 2, 3]) + x = f([1.0, 2.0, 0.0]) # isequal and == treat ±0.0 differently + y = f([1.0, 2.0, -0.0]) @test !isequal(x, y) - @test x != y + @test x == y end - x = f([1.0, 2.0, 0.0]) # isequal and == treat ±0.0 differently - y = f([1.0, 2.0, -0.0]) - @test !isequal(x, y) - @test x == y -end - -@testset "Unit weights" begin - wv = uweights(Float64, 3) - @test wv[1] === 1. - @test wv[1:3] == fill(1.0, 3) - @test wv[:] == fill(1.0, 3) - @test !isempty(wv) - @test length(wv) === 3 - @test size(wv) === (3,) - @test axes(wv) === (Base.OneTo(3),) - @test sum(wv) === 3. - @test wv == fill(1.0, 3) - @test StatsBase.varcorrection(wv) == 1/3 - @test !isequal(wv, fweights(fill(1.0, 3))) - @test isequal(wv, uweights(3)) - @test wv != fweights(fill(1.0, 3)) - @test wv == uweights(3) - @test wv[[true, false, false]] == uweights(Float64, 1) - @test convert(Vector, wv) == ones(3) - @test !Base.mightalias(wv, uweights(Float64, 3)) - @test Base.dataids(wv) == () -end + @testset "Unit weights" begin + wv = uweights(Float64, 3) + @test wv[1] === 1.0 + @test wv[1:3] == fill(1.0, 3) + @test wv[:] == fill(1.0, 3) + @test !isempty(wv) + @test length(wv) === 3 + @test size(wv) === (3,) + @test axes(wv) === (Base.OneTo(3),) + @test sum(wv) === 3.0 + @test wv == fill(1.0, 3) + @test StatsBase.varcorrection(wv) == 1/3 + @test !isequal(wv, fweights(fill(1.0, 3))) + @test isequal(wv, uweights(3)) + @test wv != fweights(fill(1.0, 3)) + @test wv == uweights(3) + @test wv[[true, false, false]] == uweights(Float64, 1) + @test convert(Vector, wv) == ones(3) + @test !Base.mightalias(wv, uweights(Float64, 3)) + @test Base.dataids(wv) == () + end -## wsum + ## wsum -@testset "wsum" begin - x = [6., 8., 9.] - w = [2., 3., 4.] - p = [1. 2. ; 3. 4.] - q = [1., 2., 3., 4.] + @testset "wsum" begin + x = [6.0, 8.0, 9.0] + w = [2.0, 3.0, 4.0] + p = [1.0 2.0; 3.0 4.0] + q = [1.0, 2.0, 3.0, 4.0] - @test wsum(Float64[], Float64[]) === 0.0 - @test wsum(x, w) === 72.0 - @test wsum(p, q) === 29.0 + @test wsum(Float64[], Float64[]) === 0.0 + @test wsum(x, w) === 72.0 + @test wsum(p, q) === 29.0 - ## wsum along dimension + ## wsum along dimension - @test wsum(x, w, 1) == [72.0] + @test wsum(x, w, 1) == [72.0] - x = rand(6, 8) - w1 = rand(6) - w2 = rand(8) + x = rand(6, 8) + w1 = rand(6) + w2 = rand(8) - @test size(wsum(x, w1, 1)) == (1, 8) - @test size(wsum(x, w2, 2)) == (6, 1) + @test size(wsum(x, w1, 1)) == (1, 8) + @test size(wsum(x, w2, 2)) == (6, 1) - @test wsum(x, w1, 1) ≈ sum(x .* w1, dims=1) - @test wsum(x, w2, 2) ≈ sum(x .* w2', dims=2) + @test wsum(x, w1, 1) ≈ sum(x .* w1, dims=1) + @test wsum(x, w2, 2) ≈ sum(x .* w2', dims=2) - x = rand(6, 5, 4) - w1 = rand(6) - w2 = rand(5) - w3 = rand(4) + x = rand(6, 5, 4) + w1 = rand(6) + w2 = rand(5) + w3 = rand(4) - @test size(wsum(x, w1, 1)) == (1, 5, 4) - @test size(wsum(x, w2, 2)) == (6, 1, 4) - @test size(wsum(x, w3, 3)) == (6, 5, 1) + @test size(wsum(x, w1, 1)) == (1, 5, 4) + @test size(wsum(x, w2, 2)) == (6, 1, 4) + @test size(wsum(x, w3, 3)) == (6, 5, 1) - @test wsum(x, w1, 1) ≈ sum(x .* w1, dims=1) - @test wsum(x, w2, 2) ≈ sum(x .* w2', dims=2) - @test wsum(x, w3, 3) ≈ sum(x .* reshape(w3, 1, 1, 4), dims=3) + @test wsum(x, w1, 1) ≈ sum(x .* w1, dims=1) + @test wsum(x, w2, 2) ≈ sum(x .* w2', dims=2) + @test wsum(x, w3, 3) ≈ sum(x .* reshape(w3, 1, 1, 4), dims=3) - v = view(x, 2:4, :, :) + v = view(x,(2:4),:,:) - @test wsum(v, w1[1:3], 1) ≈ sum(v .* w1[1:3], dims=1) - @test wsum(v, w2, 2) ≈ sum(v .* w2', dims=2) - @test wsum(v, w3, 3) ≈ sum(v .* reshape(w3, 1, 1, 4), dims=3) + @test wsum(v, w1[1:3], 1) ≈ sum(v .* w1[1:3], dims=1) + @test wsum(v, w2, 2) ≈ sum(v .* w2', dims=2) + @test wsum(v, w3, 3) ≈ sum(v .* reshape(w3, 1, 1, 4), dims=3) - ## wsum for Arrays with non-BlasReal elements + ## wsum for Arrays with non-BlasReal elements - x = rand(1:100, 6, 8) - w1 = rand(6) - w2 = rand(8) + x = rand(1:100, 6, 8) + w1 = rand(6) + w2 = rand(8) - @test wsum(x, w1, 1) ≈ sum(x .* w1, dims=1) - @test wsum(x, w2, 2) ≈ sum(x .* w2', dims=2) + @test wsum(x, w1, 1) ≈ sum(x .* w1, dims=1) + @test wsum(x, w2, 2) ≈ sum(x .* w2', dims=2) - ## wsum! + ## wsum! - x = rand(6) - w = rand(6) + x = rand(6) + w = rand(6) - r = ones(1) - @test wsum!(r, x, w, 1; init=true) === r - @test r ≈ [dot(x, w)] + r = ones(1) + @test wsum!(r, x, w, 1; init=true) === r + @test r ≈ [dot(x, w)] - r = ones(1) - @test wsum!(r, x, w, 1; init=false) === r - @test r ≈ [dot(x, w) + 1.0] + r = ones(1) + @test wsum!(r, x, w, 1; init=false) === r + @test r ≈ [dot(x, w) + 1.0] - x = rand(6, 8) - w1 = rand(6) - w2 = rand(8) + x = rand(6, 8) + w1 = rand(6) + w2 = rand(8) - r = ones(1, 8) - @test wsum!(r, x, w1, 1; init=true) === r - @test r ≈ sum(x .* w1, dims=1) + r = ones(1, 8) + @test wsum!(r, x, w1, 1; init=true) === r + @test r ≈ sum(x .* w1, dims=1) - r = ones(1, 8) - @test wsum!(r, x, w1, 1; init=false) === r - @test r ≈ sum(x .* w1, dims=1) .+ 1.0 + r = ones(1, 8) + @test wsum!(r, x, w1, 1; init=false) === r + @test r ≈ sum(x .* w1, dims=1) .+ 1.0 - r = ones(6) - @test wsum!(r, x, w2, 2; init=true) === r - @test r ≈ sum(x .* w2', dims=2) + r = ones(6) + @test wsum!(r, x, w2, 2; init=true) === r + @test r ≈ sum(x .* w2', dims=2) - r = ones(6) - @test wsum!(r, x, w2, 2; init=false) === r - @test r ≈ sum(x .* w2', dims=2) .+ 1.0 + r = ones(6) + @test wsum!(r, x, w2, 2; init=false) === r + @test r ≈ sum(x .* w2', dims=2) .+ 1.0 - x = rand(8, 6, 5) - w1 = rand(8) - w2 = rand(6) - w3 = rand(5) + x = rand(8, 6, 5) + w1 = rand(8) + w2 = rand(6) + w3 = rand(5) - r = ones(1, 6, 5) - @test wsum!(r, x, w1, 1; init=true) === r - @test r ≈ sum(x .* w1, dims=1) + r = ones(1, 6, 5) + @test wsum!(r, x, w1, 1; init=true) === r + @test r ≈ sum(x .* w1, dims=1) - r = ones(1, 6, 5) - @test wsum!(r, x, w1, 1; init=false) === r - @test r ≈ sum(x .* w1, dims=1) .+ 1.0 + r = ones(1, 6, 5) + @test wsum!(r, x, w1, 1; init=false) === r + @test r ≈ sum(x .* w1, dims=1) .+ 1.0 - r = ones(8, 1, 5) - @test wsum!(r, x, w2, 2; init=true) === r - @test r ≈ sum(x .* w2', dims=2) + r = ones(8, 1, 5) + @test wsum!(r, x, w2, 2; init=true) === r + @test r ≈ sum(x .* w2', dims=2) - r = ones(8, 1, 5) - @test wsum!(r, x, w2, 2; init=false) === r - @test r ≈ sum(x .* w2', dims=2) .+ 1.0 + r = ones(8, 1, 5) + @test wsum!(r, x, w2, 2; init=false) === r + @test r ≈ sum(x .* w2', dims=2) .+ 1.0 - r = ones(8, 6) - @test wsum!(r, x, w3, 3; init=true) === r - @test r ≈ sum(x .* reshape(w3, (1, 1, 5)), dims=3) + r = ones(8, 6) + @test wsum!(r, x, w3, 3; init=true) === r + @test r ≈ sum(x .* reshape(w3, (1, 1, 5)), dims=3) - r = ones(8, 6) - @test wsum!(r, x, w3, 3; init=false) === r - @test r ≈ sum(x .* reshape(w3, (1, 1, 5)), dims=3) .+ 1.0 -end + r = ones(8, 6) + @test wsum!(r, x, w3, 3; init=false) === r + @test r ≈ sum(x .* reshape(w3, (1, 1, 5)), dims=3) .+ 1.0 + end -## sum, mean and quantile + ## sum, mean and quantile -a = reshape(1.0:27.0, 3, 3, 3) + a = reshape(1.0:27.0, 3, 3, 3) -@testset "Sum $f" for f in weight_funcs - @test sum([1.0, 2.0, 3.0], f([1.0, 0.5, 0.5])) ≈ 3.5 - @test sum(1:3, f([1.0, 1.0, 0.5])) ≈ 4.5 - @test sum([1 + 2im, 2 + 3im], f([1.0, 0.5])) ≈ 2 + 3.5im - @test sum([[1, 2], [3, 4]], f([2, 3])) == [11, 16] + @testset "Sum $f" for f in weight_funcs + @test sum([1.0, 2.0, 3.0], f([1.0, 0.5, 0.5])) ≈ 3.5 + @test sum(1:3, f([1.0, 1.0, 0.5])) ≈ 4.5 + @test sum([1 + 2im, 2 + 3im], f([1.0, 0.5])) ≈ 2 + 3.5im + @test sum([[1, 2], [3, 4]], f([2, 3])) == [11, 16] - for wt in ([1.0, 1.0, 1.0], [1.0, 0.2, 0.0], [0.2, 0.0, 1.0]) - @test sum(a, f(wt), dims=1) ≈ sum(a.*reshape(wt, length(wt), 1, 1), dims=1) - @test sum(a, f(wt), dims=2) ≈ sum(a.*reshape(wt, 1, length(wt), 1), dims=2) - @test sum(a, f(wt), dims=3) ≈ sum(a.*reshape(wt, 1, 1, length(wt)), dims=3) + for wt in ([1.0, 1.0, 1.0], [1.0, 0.2, 0.0], [0.2, 0.0, 1.0]) + @test sum(a, f(wt), dims=1) ≈ sum(a .* reshape(wt, length(wt), 1, 1), dims=1) + @test sum(a, f(wt), dims=2) ≈ sum(a .* reshape(wt, 1, length(wt), 1), dims=2) + @test sum(a, f(wt), dims=3) ≈ sum(a .* reshape(wt, 1, 1, length(wt)), dims=3) + end end -end -@testset "Mean $f" for f in weight_funcs - @test mean([1:3;], f([1.0, 1.0, 0.5])) ≈ 1.8 - @test mean(1:3, f([1.0, 1.0, 0.5])) ≈ 1.8 - @test mean([1 + 2im, 4 + 5im], f([1.0, 0.5])) ≈ 2 + 3im - - for wt in ([1.0, 1.0, 1.0], [1.0, 0.2, 0.0], [0.2, 0.0, 1.0]) - @test mean(a, f(wt), dims=1) ≈ sum(a.*reshape(wt, length(wt), 1, 1), dims=1)/sum(wt) - @test mean(a, f(wt), dims=2) ≈ sum(a.*reshape(wt, 1, length(wt), 1), dims=2)/sum(wt) - @test mean(a, f(wt), dims=3) ≈ sum(a.*reshape(wt, 1, 1, length(wt)), dims=3)/sum(wt) - @test_throws ErrorException mean(a, f(wt), dims=4) + @testset "Mean $f" for f in weight_funcs + @test mean([1:3;], f([1.0, 1.0, 0.5])) ≈ 1.8 + @test mean(1:3, f([1.0, 1.0, 0.5])) ≈ 1.8 + @test mean([1 + 2im, 4 + 5im], f([1.0, 0.5])) ≈ 2 + 3im + + for wt in ([1.0, 1.0, 1.0], [1.0, 0.2, 0.0], [0.2, 0.0, 1.0]) + @test mean(a, f(wt), dims=1) ≈ + sum(a .* reshape(wt, length(wt), 1, 1), dims=1)/sum(wt) + @test mean(a, f(wt), dims=2) ≈ + sum(a .* reshape(wt, 1, length(wt), 1), dims=2)/sum(wt) + @test mean(a, f(wt), dims=3) ≈ + sum(a .* reshape(wt, 1, 1, length(wt)), dims=3)/sum(wt) + @test_throws ErrorException mean(a, f(wt), dims=4) + end end -end -@testset "Quantile fweights" begin - data = ( - [7, 1, 2, 4, 10], - [7, 1, 2, 4, 10], - [7, 1, 2, 4, 10, 15], - [1, 2, 4, 7, 10, 15], - [0, 10, 20, 30], - [1, 2, 3, 4, 5], - [1, 2, 3, 4, 5], - [30, 40, 50, 60, 35], - [2, 0.6, 1.3, 0.3, 0.3, 1.7, 0.7, 1.7], - [1, 2, 2], - [3.7, 3.3, 3.5, 2.8], - [100, 125, 123, 60, 45, 56, 66], - [2, 2, 2, 2, 2, 2], - [2.3], - [-2, -3, 1, 2, -10], - [1, 2, 3, 4, 5], - [5, 4, 3, 2, 1], - [-2, 2, -1, 3, 6], - [-10, 1, 1, -10, -10], - ) - wt = ( - [3, 1, 1, 1, 3], - [1, 1, 1, 1, 1], - [3, 1, 1, 1, 3, 3], - [1, 1, 1, 3, 3, 3], - [30, 191, 9, 0], - [10, 1, 1, 1, 9], - [10, 1, 1, 1, 900], - [1, 3, 5, 4, 2], - [2, 2, 5, 0, 2, 2, 1, 6], - [1, 1, 8], - [5, 5, 4, 1], - [30, 56, 144, 24, 55, 43, 67], - [1, 2, 3, 4, 5, 6], - [12], - [7, 1, 1, 1, 6], - [1, 0, 0, 0, 2], - [1, 2, 3, 4, 5], - [1, 2, 3, 2, 1], - [0, 1, 1, 1, 1], - ) - p = [0.0, 0.25, 0.5, 0.75, 1.0] - function _rep(x::AbstractVector, lengths::AbstractVector{Int}) - res = similar(x, sum(lengths)) - i = 1 - for idx in 1:length(x) - tmp = x[idx] - for kdx in 1:lengths[idx] - res[i] = tmp - i += 1 + @testset "Quantile fweights" begin + data = ([7, 1, 2, 4, 10], + [7, 1, 2, 4, 10], + [7, 1, 2, 4, 10, 15], + [1, 2, 4, 7, 10, 15], + [0, 10, 20, 30], + [1, 2, 3, 4, 5], + [1, 2, 3, 4, 5], + [30, 40, 50, 60, 35], + [2, 0.6, 1.3, 0.3, 0.3, 1.7, 0.7, 1.7], + [1, 2, 2], + [3.7, 3.3, 3.5, 2.8], + [100, 125, 123, 60, 45, 56, 66], + [2, 2, 2, 2, 2, 2], + [2.3], + [-2, -3, 1, 2, -10], + [1, 2, 3, 4, 5], + [5, 4, 3, 2, 1], + [-2, 2, -1, 3, 6], + [-10, 1, 1, -10, -10]) + wt = ([3, 1, 1, 1, 3], + [1, 1, 1, 1, 1], + [3, 1, 1, 1, 3, 3], + [1, 1, 1, 3, 3, 3], + [30, 191, 9, 0], + [10, 1, 1, 1, 9], + [10, 1, 1, 1, 900], + [1, 3, 5, 4, 2], + [2, 2, 5, 0, 2, 2, 1, 6], + [1, 1, 8], + [5, 5, 4, 1], + [30, 56, 144, 24, 55, 43, 67], + [1, 2, 3, 4, 5, 6], + [12], + [7, 1, 1, 1, 6], + [1, 0, 0, 0, 2], + [1, 2, 3, 4, 5], + [1, 2, 3, 2, 1], + [0, 1, 1, 1, 1]) + p = [0.0, 0.25, 0.5, 0.75, 1.0] + function _rep(x::AbstractVector, lengths::AbstractVector{Int}) + res = similar(x, sum(lengths)) + i = 1 + for idx in 1:length(x) + tmp = x[idx] + for kdx in 1:lengths[idx] + res[i] = tmp + i += 1 + end end + return res + end + # quantile with fweights is the same as repeated vectors + for i in 1:length(data) + @test quantile(data[i], fweights(wt[i]), p) ≈ quantile(_rep(data[i], wt[i]), p) + end + # quantile with fweights = 1 is the same as quantile + for i in 1:length(data) + @test quantile(data[i], fweights(fill!(similar(wt[i]), 1)), p) ≈ + quantile(data[i], p) end - return res - end - # quantile with fweights is the same as repeated vectors - for i = 1:length(data) - @test quantile(data[i], fweights(wt[i]), p) ≈ quantile(_rep(data[i], wt[i]), p) - end - # quantile with fweights = 1 is the same as quantile - for i = 1:length(data) - @test quantile(data[i], fweights(fill!(similar(wt[i]), 1)), p) ≈ quantile(data[i], p) - end - # Issue #313 - @test quantile([1, 2, 3, 4, 5], fweights([0,1,2,1,0]), p) ≈ quantile([2, 3, 3, 4], p) - @test quantile([1, 2], fweights([1, 1]), 0.25) ≈ 1.25 - @test quantile([1, 2], fweights([2, 2]), 0.25) ≈ 1.0 + # Issue #313 + @test quantile([1, 2, 3, 4, 5], fweights([0, 1, 2, 1, 0]), p) ≈ + quantile([2, 3, 3, 4], p) + @test quantile([1, 2], fweights([1, 1]), 0.25) ≈ 1.25 + @test quantile([1, 2], fweights([2, 2]), 0.25) ≈ 1.0 - # test non integer frequency weights - quantile([1, 2], fweights([1.0, 2.0]), 0.25) == quantile([1, 2], fweights([1, 2]), 0.25) - @test_throws ArgumentError quantile([1, 2], fweights([1.5, 2.0]), 0.25) + # test non integer frequency weights + quantile([1, 2], fweights([1.0, 2.0]), 0.25) == + quantile([1, 2], fweights([1, 2]), 0.25) + @test_throws ArgumentError quantile([1, 2], fweights([1.5, 2.0]), 0.25) - @test_throws ArgumentError quantile([1, 2], fweights([1, 2]), nextfloat(1.0)) - @test_throws ArgumentError quantile([1, 2], fweights([1, 2]), prevfloat(0.0)) -end + @test_throws ArgumentError quantile([1, 2], fweights([1, 2]), nextfloat(1.0)) + @test_throws ArgumentError quantile([1, 2], fweights([1, 2]), prevfloat(0.0)) + end -@testset "Quantile aweights, pweights and weights" for f in (aweights, pweights, weights) - data = ( - [7, 1, 2, 4, 10], - [7, 1, 2, 4, 10], - [7, 1, 2, 4, 10, 15], - [1, 2, 4, 7, 10, 15], - [0, 10, 20, 30], - [1, 2, 3, 4, 5], - [1, 2, 3, 4, 5], - [30, 40, 50, 60, 35], - [2, 0.6, 1.3, 0.3, 0.3, 1.7, 0.7, 1.7], - [1, 2, 2], - [3.7, 3.3, 3.5, 2.8], - [100, 125, 123, 60, 45, 56, 66], - [2, 2, 2, 2, 2, 2], - [2.3], - [-2, -3, 1, 2, -10], - [1, 2, 3, 4, 5], - [5, 4, 3, 2, 1], - [-2, 2, -1, 3, 6], - [-10, 1, 1, -10, -10], - ) - wt = ( - [1, 1/3, 1/3, 1/3, 1], - [1, 1, 1, 1, 1], - [1, 1/3, 1/3, 1/3, 1, 1], - [1/3, 1/3, 1/3, 1, 1, 1], - [30, 191, 9, 0], - [10, 1, 1, 1, 9], - [10, 1, 1, 1, 900], - [1, 3, 5, 4, 2], - [2, 2, 5, 1, 2, 2, 1, 6], - [0.1, 0.1, 0.8], - [5, 5, 4, 1], - [30, 56, 144, 24, 55, 43, 67], - [0.1, 0.2, 0.3, 0.4, 0.5, 0.6], - [12], - [7, 1, 1, 1, 6], - [1, 0, 0, 0, 2], - [1, 2, 3, 4, 5], - [0.1, 0.2, 0.3, 0.2, 0.1], - [1, 1, 1, 1, 1], - ) - quantile_answers = ( - [1.0, 4.0, 6.0, 8.0, 10.0], - [1.0, 2.0, 4.0, 7.0, 10.0], - [1.0, 4.75, 7.5, 10.4166667, 15.0], - [1.0, 4.75, 7.5, 10.4166667, 15.0], - [0.0, 2.6178010, 5.2356021, 7.8534031, 20.0], - [1.0, 4.0, 4.3333333, 4.6666667, 5.0], - [1.0, 4.2475, 4.4983333, 4.7491667, 5.0], - [30.0, 37.5, 44.0, 51.25, 60.0], - [0.3, 0.7, 1.3, 1.7, 2.0], - [1.0, 2.0, 2.0, 2.0, 2.0], - [2.8, 3.15, 3.4, 3.56, 3.7], - [45.0, 62.149253, 102.875, 117.4097222, 125.0], - [2.0, 2.0, 2.0, 2.0, 2.0], - [2.3, 2.3, 2.3, 2.3, 2.3], - [-10.0, -2.7857143, -2.4285714, -2.0714286, 2.0], - [1.0, 2.0, 3.0, 4.0, 5.0], - [1.0, 1.625, 2.3333333, 3.25, 5.0], - [-2.0, -1.3333333, 0.5, 2.5, 6.0], - [-10.0, -10.0, -10.0, 1.0, 1.0] - ) - p = [0.0, 0.25, 0.5, 0.75, 1.0] - - Random.seed!(10) - for i = 1:length(data) - @test quantile(data[i], f(wt[i]), p) ≈ quantile_answers[i] atol = 1e-5 - for j = 1:10 - # order of p does not matter - reorder = sortperm(rand(length(p))) - @test quantile(data[i], f(wt[i]), p[reorder]) ≈ quantile_answers[i][reorder] atol = 1e-5 - end - for j = 1:10 - # order of w does not matter - reorder = sortperm(rand(length(data[i]))) - @test quantile(data[i][reorder], f(wt[i][reorder]), p) ≈ quantile_answers[i] atol = 1e-5 + @testset "Quantile aweights, pweights and weights" for f in + (aweights, pweights, weights) + data = ([7, 1, 2, 4, 10], + [7, 1, 2, 4, 10], + [7, 1, 2, 4, 10, 15], + [1, 2, 4, 7, 10, 15], + [0, 10, 20, 30], + [1, 2, 3, 4, 5], + [1, 2, 3, 4, 5], + [30, 40, 50, 60, 35], + [2, 0.6, 1.3, 0.3, 0.3, 1.7, 0.7, 1.7], + [1, 2, 2], + [3.7, 3.3, 3.5, 2.8], + [100, 125, 123, 60, 45, 56, 66], + [2, 2, 2, 2, 2, 2], + [2.3], + [-2, -3, 1, 2, -10], + [1, 2, 3, 4, 5], + [5, 4, 3, 2, 1], + [-2, 2, -1, 3, 6], + [-10, 1, 1, -10, -10]) + wt = ([1, 1/3, 1/3, 1/3, 1], + [1, 1, 1, 1, 1], + [1, 1/3, 1/3, 1/3, 1, 1], + [1/3, 1/3, 1/3, 1, 1, 1], + [30, 191, 9, 0], + [10, 1, 1, 1, 9], + [10, 1, 1, 1, 900], + [1, 3, 5, 4, 2], + [2, 2, 5, 1, 2, 2, 1, 6], + [0.1, 0.1, 0.8], + [5, 5, 4, 1], + [30, 56, 144, 24, 55, 43, 67], + [0.1, 0.2, 0.3, 0.4, 0.5, 0.6], + [12], + [7, 1, 1, 1, 6], + [1, 0, 0, 0, 2], + [1, 2, 3, 4, 5], + [0.1, 0.2, 0.3, 0.2, 0.1], + [1, 1, 1, 1, 1]) + quantile_answers = ([1.0, 4.0, 6.0, 8.0, 10.0], + [1.0, 2.0, 4.0, 7.0, 10.0], + [1.0, 4.75, 7.5, 10.4166667, 15.0], + [1.0, 4.75, 7.5, 10.4166667, 15.0], + [0.0, 2.6178010, 5.2356021, 7.8534031, 20.0], + [1.0, 4.0, 4.3333333, 4.6666667, 5.0], + [1.0, 4.2475, 4.4983333, 4.7491667, 5.0], + [30.0, 37.5, 44.0, 51.25, 60.0], + [0.3, 0.7, 1.3, 1.7, 2.0], + [1.0, 2.0, 2.0, 2.0, 2.0], + [2.8, 3.15, 3.4, 3.56, 3.7], + [45.0, 62.149253, 102.875, 117.4097222, 125.0], + [2.0, 2.0, 2.0, 2.0, 2.0], + [2.3, 2.3, 2.3, 2.3, 2.3], + [-10.0, -2.7857143, -2.4285714, -2.0714286, 2.0], + [1.0, 2.0, 3.0, 4.0, 5.0], + [1.0, 1.625, 2.3333333, 3.25, 5.0], + [-2.0, -1.3333333, 0.5, 2.5, 6.0], + [-10.0, -10.0, -10.0, 1.0, 1.0]) + p = [0.0, 0.25, 0.5, 0.75, 1.0] + + Random.seed!(10) + for i in 1:length(data) + @test quantile(data[i], f(wt[i]), p) ≈ quantile_answers[i] atol = 1e-5 + for j in 1:10 + # order of p does not matter + reorder = sortperm(rand(length(p))) + @test quantile(data[i], f(wt[i]), p[reorder]) ≈ quantile_answers[i][reorder] atol = 1e-5 + end + for j in 1:10 + # order of w does not matter + reorder = sortperm(rand(length(data[i]))) + @test quantile(data[i][reorder], f(wt[i][reorder]), p) ≈ quantile_answers[i] atol = 1e-5 + end end - end - # All equal weights corresponds to base quantile - for v in (1, 2, 345) - for i = 1:length(data) - w = f(fill(v, length(data[i]))) - @test quantile(data[i], w, p) ≈ quantile(data[i], p) atol = 1e-5 - for j = 1:10 - prandom = rand(4) - @test quantile(data[i], w, prandom) ≈ quantile(data[i], prandom) atol = 1e-5 + # All equal weights corresponds to base quantile + for v in (1, 2, 345) + for i in 1:length(data) + w = f(fill(v, length(data[i]))) + @test quantile(data[i], w, p) ≈ quantile(data[i], p) atol = 1e-5 + for j in 1:10 + prandom = rand(4) + @test quantile(data[i], w, prandom) ≈ quantile(data[i], prandom) atol = 1e-5 + end end end + # test zeros are removed + for i in 1:length(data) + @test quantile(vcat(1.0, data[i]), f(vcat(0.0, wt[i])), p) ≈ quantile_answers[i] atol = 1e-5 + end + # Syntax + v = [7, 1, 2, 4, 10] + w = [1, 1/3, 1/3, 1/3, 1] + answer = 6.0 + @test quantile(data[1], f(w), 0.5) ≈ answer atol = 1e-5 + + # Test non-Real eltype + @test_throws ArgumentError quantile([missing, 1], f([1, 2]), 0.5) + @test quantile(Union{Float64,Missing}[1, 2, 3, 4], f([1, 2, 2, 1]), 0.5) == + quantile(Any[1, 2, 3, 4], f([1, 2, 2, 1]), 0.5) == + quantile([1, 2, 3, 4], f([1, 2, 2, 1]), 0.5) + @test quantile([Date(2005, 01, 01), Date(2005, 01, 01)], f([1, 1]), 0.5) == + Date(2005, 01, 01) + + @test_throws ArgumentError quantile([missing, 1], f([1, 2]), [0.5, 0.75]) + @test quantile(Union{Float64,Missing}[1, 2, 3, 4], f([1, 2, 2, 1]), [0.5, 0.75]) == + quantile(Any[1, 2, 3, 4], f([1, 2, 2, 1]), [0.5, 0.75]) == + quantile([1, 2, 3, 4], f([1, 2, 2, 1]), [0.5, 0.75]) + @test quantile(fill(Date(2005, 01, 01), 3), f([1, 1, 1]), [0.5, 0.75]) == + fill(Date(2005, 01, 01), 2) end - # test zeros are removed - for i = 1:length(data) - @test quantile(vcat(1.0, data[i]), f(vcat(0.0, wt[i])), p) ≈ quantile_answers[i] atol = 1e-5 - end - # Syntax - v = [7, 1, 2, 4, 10] - w = [1, 1/3, 1/3, 1/3, 1] - answer = 6.0 - @test quantile(data[1], f(w), 0.5) ≈ answer atol = 1e-5 - - # Test non-Real eltype - @test_throws ArgumentError quantile([missing, 1], f([1, 2]), 0.5) - @test quantile(Union{Float64, Missing}[1, 2, 3, 4], f([1, 2, 2, 1]), 0.5) == - quantile(Any[1, 2, 3, 4], f([1, 2, 2, 1]), 0.5) == - quantile([1, 2, 3, 4], f([1, 2, 2, 1]), 0.5) - @test quantile([Date(2005, 01, 01), Date(2005, 01, 01)], f([1, 1]), 0.5) == - Date(2005, 01, 01) - - @test_throws ArgumentError quantile([missing, 1], f([1, 2]), [0.5, 0.75]) - @test quantile(Union{Float64, Missing}[1, 2, 3, 4], f([1, 2, 2, 1]), [0.5, 0.75]) == - quantile(Any[1, 2, 3, 4], f([1, 2, 2, 1]), [0.5, 0.75]) == - quantile([1, 2, 3, 4], f([1, 2, 2, 1]), [0.5, 0.75]) - @test quantile(fill(Date(2005, 01, 01), 3), f([1, 1, 1]), [0.5, 0.75]) == - fill(Date(2005, 01, 01), 2) -end -@testset "Median $f" for f in weight_funcs - data = [4, 3, 2, 1] - wt = [0, 0, 0, 0] - @test_throws ArgumentError median(data, f(wt)) - @test_throws ArgumentError median(Float64[], f(Float64[])) - wt = [1, 2, 3, 4, 5] - @test_throws ArgumentError median(data, f(wt)) - @test_throws MethodError median([4 3 2 1 0], f(wt)) - @test_throws MethodError median([[1 2] ; [4 5] ; [7 8] ; [10 11] ; [13 14]], f(wt)) - data = [1, 3, 2, NaN, 2] - @test isnan(median(data, f(wt))) - wt = [1, 2, NaN, 4, 5] - @test_throws ArgumentError median(data, f(wt)) - data = [1, 3, 2, 1, 2] - @test_throws ArgumentError median(data, f(wt)) - wt = [-1, -1, -1, -1, -1] - @test_throws ArgumentError median(data, f(wt)) - wt = [-1, -1, -1, 0, 0] - @test_throws ArgumentError median(data, f(wt)) - - data = [4, 3, 2, 1] - wt = [1, 2, 3, 4] - @test median(data, f(wt)) ≈ quantile(data, f(wt), 0.5) atol = 1e-5 - - # Test non-Real eltype - @test_throws ArgumentError median([missing, 1], f([1, 2])) - @test median(Union{Float64, Missing}[1, 2, 3, 4], f([1, 2, 2, 1])) == - median(Any[1, 2, 3, 4], f([1, 2, 2, 1])) == - median([1, 2, 3, 4], f([1, 2, 2, 1])) - @test median([Date(2005, 01, 01), Date(2005, 01, 01)], f([1, 1])) == - Date(2005, 01, 01) -end + @testset "Median $f" for f in weight_funcs + data = [4, 3, 2, 1] + wt = [0, 0, 0, 0] + @test_throws ArgumentError median(data, f(wt)) + @test_throws ArgumentError median(Float64[], f(Float64[])) + wt = [1, 2, 3, 4, 5] + @test_throws ArgumentError median(data, f(wt)) + @test_throws MethodError median([4 3 2 1 0], f(wt)) + @test_throws MethodError median([[1 2]; [4 5]; [7 8]; [10 11]; [13 14]], f(wt)) + data = [1, 3, 2, NaN, 2] + @test isnan(median(data, f(wt))) + wt = [1, 2, NaN, 4, 5] + @test_throws ArgumentError median(data, f(wt)) + data = [1, 3, 2, 1, 2] + @test_throws ArgumentError median(data, f(wt)) + wt = [-1, -1, -1, -1, -1] + @test_throws ArgumentError median(data, f(wt)) + wt = [-1, -1, -1, 0, 0] + @test_throws ArgumentError median(data, f(wt)) + + data = [4, 3, 2, 1] + wt = [1, 2, 3, 4] + @test median(data, f(wt)) ≈ quantile(data, f(wt), 0.5) atol = 1e-5 + + # Test non-Real eltype + @test_throws ArgumentError median([missing, 1], f([1, 2])) + @test median(Union{Float64,Missing}[1, 2, 3, 4], f([1, 2, 2, 1])) == + median(Any[1, 2, 3, 4], f([1, 2, 2, 1])) == + median([1, 2, 3, 4], f([1, 2, 2, 1])) + @test median([Date(2005, 01, 01), Date(2005, 01, 01)], f([1, 1])) == + Date(2005, 01, 01) + end -@testset "Mismatched eltypes" begin - @test round(mean(Union{Int,Missing}[1,2], weights([1,2])), digits=3) ≈ 1.667 -end + @testset "Mismatched eltypes" begin + @test round(mean(Union{Int,Missing}[1, 2], weights([1, 2])), digits=3) ≈ 1.667 + end -@testset "Sum, mean, quantiles and variance for unit weights" begin - wt = uweights(Float64, 3) + @testset "Sum, mean, quantiles and variance for unit weights" begin + wt = uweights(Float64, 3) - @test sum([1.0, 2.0, 3.0], wt) ≈ wsum([1.0, 2.0, 3.0], wt) ≈ 6.0 - @test mean([1.0, 2.0, 3.0], wt) ≈ 2.0 + @test sum([1.0, 2.0, 3.0], wt) ≈ wsum([1.0, 2.0, 3.0], wt) ≈ 6.0 + @test mean([1.0, 2.0, 3.0], wt) ≈ 2.0 - @test sum(a, wt, dims=1) ≈ sum(a, dims=1) - @test sum(a, wt, dims=2) ≈ sum(a, dims=2) - @test sum(a, wt, dims=3) ≈ sum(a, dims=3) + @test sum(a, wt, dims=1) ≈ sum(a, dims=1) + @test sum(a, wt, dims=2) ≈ sum(a, dims=2) + @test sum(a, wt, dims=3) ≈ sum(a, dims=3) - @test wsum(a, wt, 1) ≈ sum(a, dims=1) - @test wsum(a, wt, 2) ≈ sum(a, dims=2) - @test wsum(a, wt, 3) ≈ sum(a, dims=3) + @test wsum(a, wt, 1) ≈ sum(a, dims=1) + @test wsum(a, wt, 2) ≈ sum(a, dims=2) + @test wsum(a, wt, 3) ≈ sum(a, dims=3) - @test mean(a, wt, dims=1) ≈ mean(a, dims=1) - @test mean(a, wt, dims=2) ≈ mean(a, dims=2) - @test mean(a, wt, dims=3) ≈ mean(a, dims=3) + @test mean(a, wt, dims=1) ≈ mean(a, dims=1) + @test mean(a, wt, dims=2) ≈ mean(a, dims=2) + @test mean(a, wt, dims=3) ≈ mean(a, dims=3) - @test_throws DimensionMismatch sum(a, wt) - @test_throws DimensionMismatch sum(a, wt, dims=4) - @test_throws DimensionMismatch wsum(a, wt, 4) - @test_throws DimensionMismatch mean(a, wt, dims=4) + @test_throws DimensionMismatch sum(a, wt) + @test_throws DimensionMismatch sum(a, wt, dims=4) + @test_throws DimensionMismatch wsum(a, wt, 4) + @test_throws DimensionMismatch mean(a, wt, dims=4) - @test quantile([1.0, 4.0, 6.0, 8.0, 10.0], uweights(5), [0.5]) ≈ [6.0] - @test quantile([1.0, 4.0, 6.0, 8.0, 10.0], uweights(5), 0.5) ≈ 6.0 - @test median([1.0, 4.0, 6.0, 8.0, 10.0], uweights(5)) ≈ 6.0 + @test quantile([1.0, 4.0, 6.0, 8.0, 10.0], uweights(5), [0.5]) ≈ [6.0] + @test quantile([1.0, 4.0, 6.0, 8.0, 10.0], uweights(5), 0.5) ≈ 6.0 + @test median([1.0, 4.0, 6.0, 8.0, 10.0], uweights(5)) ≈ 6.0 - @test var(a, uweights(Float64, 27), corrected=false) ≈ var(a, corrected=false) - @test var(a, uweights(Float64, 27), corrected=true) ≈ var(a, corrected= true) -end + @test var(a, uweights(Float64, 27), corrected=false) ≈ var(a, corrected=false) + @test var(a, uweights(Float64, 27), corrected=true) ≈ var(a, corrected=true) + end -@testset "Exponential Weights" begin - λ = 0.2 - @testset "Usage" begin - v = [(1 - λ) ^ (4 - i) for i = 1:4] - w = Weights(v) + @testset "Exponential Weights" begin + λ = 0.2 + @testset "Usage" begin + v = [(1 - λ) ^ (4 - i) for i in 1:4] + w = Weights(v) - @test round.(w, digits=4) == [0.512, 0.64, 0.8, 1.0] + @test round.(w, digits=4) == [0.512, 0.64, 0.8, 1.0] - @testset "basic" begin - @test eweights(1:4, λ; scale=true) ≈ w - end + @testset "basic" begin + @test eweights(1:4, λ; scale=true) ≈ w + end - @testset "1:n" begin - @test eweights(4, λ; scale=true) ≈ w - end + @testset "1:n" begin + @test eweights(4, λ; scale=true) ≈ w + end - @testset "indexin" begin - v = [(1 - λ) ^ (10 - i) for i = 1:10] + @testset "indexin" begin + v = [(1 - λ) ^ (10 - i) for i in 1:10] - # Test that we should be able to skip indices easily - @test eweights([1, 3, 5, 7], 1:10, λ; scale=true) ≈ Weights(v[[1, 3, 5, 7]]) + # Test that we should be able to skip indices easily + @test eweights([1, 3, 5, 7], 1:10, λ; scale=true) ≈ Weights(v[[1, 3, 5, 7]]) - # This should also work with actual time types - t1 = DateTime(2019, 1, 1, 1) - tx = t1 + Hour(7) - tn = DateTime(2019, 1, 1, 10) + # This should also work with actual time types + t1 = DateTime(2019, 1, 1, 1) + tx = t1 + Hour(7) + tn = DateTime(2019, 1, 1, 10) - @test eweights(t1:Hour(2):tx, t1:Hour(1):tn, λ; scale=true) ≈ Weights(v[[1, 3, 5, 7]]) + @test eweights(t1:Hour(2):tx, t1:Hour(1):tn, λ; scale=true) ≈ + Weights(v[[1, 3, 5, 7]]) + end end - end - @testset "Empty" begin - @test eweights(0, 0.3; scale=true) == Weights(Float64[]) - @test eweights(1:0, 0.3; scale=true) == Weights(Float64[]) - @test eweights(Int[], 1:10, 0.4; scale=true) == Weights(Float64[]) - end + @testset "Empty" begin + @test eweights(0, 0.3; scale=true) == Weights(Float64[]) + @test eweights(1:0, 0.3; scale=true) == Weights(Float64[]) + @test eweights(Int[], 1:10, 0.4; scale=true) == Weights(Float64[]) + end - @testset "Failure Conditions" begin - # λ > 1.0 - @test_throws ArgumentError eweights(1, 1.1; scale=true) + @testset "Failure Conditions" begin + # λ > 1.0 + @test_throws ArgumentError eweights(1, 1.1; scale=true) - # time indices are not all positive non-zero integers - @test_throws ArgumentError eweights([0, 1, 2, 3], 0.3; scale=true) + # time indices are not all positive non-zero integers + @test_throws ArgumentError eweights([0, 1, 2, 3], 0.3; scale=true) - # Passing in an array of bools will work because Bool <: Integer, - # but any `false` values will trigger the same argument error as 0.0 - @test_throws ArgumentError eweights([true, false, true, true], 0.3; scale=true) - end + # Passing in an array of bools will work because Bool <: Integer, + # but any `false` values will trigger the same argument error as 0.0 + @test_throws ArgumentError eweights([true, false, true, true], 0.3; scale=true) + end - @testset "scale=false" begin - v = [λ * (1 - λ)^(1 - i) for i = 1:4] - w = Weights(v) + @testset "scale=false" begin + v = [λ * (1 - λ)^(1 - i) for i in 1:4] + w = Weights(v) - @test round.(w, digits=4) == [0.2, 0.25, 0.3125, 0.3906] + @test round.(w, digits=4) == [0.2, 0.25, 0.3125, 0.3906] - wv = eweights(1:10, λ; scale=false) - @test eweights(1:10, λ; scale=true) ≈ wv / maximum(wv) + wv = eweights(1:10, λ; scale=false) + @test eweights(1:10, λ; scale=true) ≈ wv / maximum(wv) + end end -end -@testset "allequal and allunique" begin - # General weights - for f in (weights, aweights, fweights, pweights) - @test allunique(f(Float64[])) - @test allunique(f([0.4])) - @test allunique(f([0.4, 0.3])) - @test !allunique(f([0.4, 0.4])) - @test allunique(f([0.4, 0.3, 0.5])) - @test !allunique(f([0.4, 0.4, 0.5])) - @test allunique(f([0.4, 0.3, 0.5, 0.35])) - @test !allunique(f([0.4, 0.3, 0.5, 0.4])) + @testset "allequal and allunique" begin + # General weights + for f in (weights, aweights, fweights, pweights) + @test allunique(f(Float64[])) + @test allunique(f([0.4])) + @test allunique(f([0.4, 0.3])) + @test !allunique(f([0.4, 0.4])) + @test allunique(f([0.4, 0.3, 0.5])) + @test !allunique(f([0.4, 0.4, 0.5])) + @test allunique(f([0.4, 0.3, 0.5, 0.35])) + @test !allunique(f([0.4, 0.3, 0.5, 0.4])) + + if isdefined(Base, :allequal) + @test allequal(f(Float64[])) + @test allequal(f([0.4])) + @test allequal(f([0.4, 0.4])) + @test !allequal(f([0.4, 0.3])) + @test allequal(f([0.4, 0.4, 0.4, 0.4])) + @test !allunique(f([0.4, 0.4, 0.3, 0.4])) + end + end + + # Uniform weights + @test allunique(uweights(0)) + @test allunique(uweights(1)) + @test !allunique(uweights(2)) + @test !allunique(uweights(5)) if isdefined(Base, :allequal) - @test allequal(f(Float64[])) - @test allequal(f([0.4])) - @test allequal(f([0.4, 0.4])) - @test !allequal(f([0.4, 0.3])) - @test allequal(f([0.4, 0.4, 0.4, 0.4])) - @test !allunique(f([0.4, 0.4, 0.3, 0.4])) + @test allequal(uweights(0)) + @test allequal(uweights(1)) + @test allequal(uweights(2)) + @test allequal(uweights(5)) end end - # Uniform weights - @test allunique(uweights(0)) - @test allunique(uweights(1)) - @test !allunique(uweights(2)) - @test !allunique(uweights(5)) - - if isdefined(Base, :allequal) - @test allequal(uweights(0)) - @test allequal(uweights(1)) - @test allequal(uweights(2)) - @test allequal(uweights(5)) + @testset "custom weight types" begin + @test mean([1, 2, 3], MyWeights([1, 4, 10])) ≈ 2.6 + @test isnan(mean([1, 2, 3], MyWeights([NaN, 4, 10]))) + @test mode([1, 2, 3], MyWeights([1, 4, 10])) == 3 + @test_throws ArgumentError mode([1, 2, 3], MyWeights([NaN, 4, 10])) end -end - -@testset "custom weight types" begin - @test mean([1, 2, 3], MyWeights([1, 4, 10])) ≈ 2.6 - @test mean([1, 2, 3], MyWeights([NaN, 4, 10])) |> isnan - @test mode([1, 2, 3], MyWeights([1, 4, 10])) == 3 - @test_throws ArgumentError mode([1, 2, 3], MyWeights([NaN, 4, 10])) -end - end # @testset StatsBase.Weights diff --git a/test/wsampling.jl b/test/wsampling.jl index efe9a608f..bac240dbd 100644 --- a/test/wsampling.jl +++ b/test/wsampling.jl @@ -20,11 +20,11 @@ function check_wsample_wrep(a::AbstractArray, vrgn, wv::AbstractWeights, ptol::R end else @test !issorted(a; rev=rev) - ncols = size(a,2) + ncols = size(a, 2) if ncols == 1 @test isapprox(proportions(a, vmin:vmax), p0, atol=ptol) else - for j = 1:ncols + for j in 1:ncols aj = view(a, :, j) @test isapprox(proportions(aj, vmin:vmax), p0, atol=ptol) end @@ -37,13 +37,11 @@ import StatsBase: direct_sample!, alias_sample! n = 10^6 wv = weights([0.2, 0.8, 0.4, 0.6]) -for wv in ( - weights([0.2, 0.8, 0.4, 0.6]), - weights([2, 8, 4, 6]), - weights(Float32[0.2, 0.8, 0.4, 0.6]), - Weights(Float32[0.2, 0.8, 0.4, 0.6], 2), - Weights([2, 8, 4, 6], 20.0), -) +for wv in (weights([0.2, 0.8, 0.4, 0.6]), + weights([2, 8, 4, 6]), + weights(Float32[0.2, 0.8, 0.4, 0.6]), + Weights(Float32[0.2, 0.8, 0.4, 0.6], 2), + Weights([2, 8, 4, 6], 20.0)) a = direct_sample!(4:7, wv, zeros(Int, n, 3)) check_wsample_wrep(a, (4, 7), wv, 5.0e-3; ordered=false) test_rng_use(direct_sample!, 4:7, wv, zeros(Int, 100)) @@ -58,7 +56,8 @@ end @test_throws ArgumentError alias_sample!(rand(10), weights(fill(0, 10)), rand(10)) @test_throws ArgumentError alias_sample!(rand(100), weights(randn(100)), rand(10)) -for rev in (true, false), T in (Int, Int16, Float64, Float16, BigInt, ComplexF64, Rational{Int}) +for rev in (true, false), + T in (Int, Int16, Float64, Float16, BigInt, ComplexF64, Rational{Int}) r = rev ? reverse(4:7) : (4:7) r = T===Int ? r : T.(r) aa = Int.(sample(r, wv, n; ordered=true)) @@ -78,8 +77,8 @@ function check_wsample_norep(a::AbstractArray, vrgn, wv::AbstractWeights, ptol:: @test vmin <= amin <= amax <= vmax n = vmax - vmin + 1 - for j = 1:size(a,2) - aj = view(a,:,j) + for j in 1:size(a, 2) + aj = view(a, :, j) @assert allunique(aj) if ordered @assert issorted(aj; rev=rev) @@ -89,7 +88,7 @@ function check_wsample_norep(a::AbstractArray, vrgn, wv::AbstractWeights, ptol:: if ptol > 0 p0 = wv ./ sum(wv) rev && reverse!(p0) - @test isapprox(proportions(a[1,:], vmin:vmax), p0, atol=ptol) + @test isapprox(proportions(a[1, :], vmin:vmax), p0, atol=ptol) end end @@ -100,29 +99,29 @@ n = 10^5 wv = weights([0.2, 0.8, 0.4, 0.6]) a = zeros(Int, 3, n) -for j = 1:n - naive_wsample_norep!(4:7, wv, view(a,:,j)) +for j in 1:n + naive_wsample_norep!(4:7, wv, view(a, :, j)) end check_wsample_norep(a, (4, 7), wv, 5.0e-3; ordered=false) test_rng_use(naive_wsample_norep!, 4:7, wv, zeros(Int, 2)) a = zeros(Int, 3, n) -for j = 1:n - efraimidis_a_wsample_norep!(4:7, wv, view(a,:,j)) +for j in 1:n + efraimidis_a_wsample_norep!(4:7, wv, view(a, :, j)) end check_wsample_norep(a, (4, 7), wv, 5.0e-3; ordered=false) test_rng_use(efraimidis_a_wsample_norep!, 4:7, wv, zeros(Int, 2)) a = zeros(Int, 3, n) -for j = 1:n - efraimidis_ares_wsample_norep!(4:7, wv, view(a,:,j)) +for j in 1:n + efraimidis_ares_wsample_norep!(4:7, wv, view(a, :, j)) end check_wsample_norep(a, (4, 7), wv, 5.0e-3; ordered=false) test_rng_use(efraimidis_ares_wsample_norep!, 4:7, wv, zeros(Int, 2)) a = zeros(Int, 3, n) -for j = 1:n - efraimidis_aexpj_wsample_norep!(4:7, wv, view(a,:,j)) +for j in 1:n + efraimidis_aexpj_wsample_norep!(4:7, wv, view(a, :, j)) end check_wsample_norep(a, (4, 7), wv, 5.0e-3; ordered=false) test_rng_use(efraimidis_aexpj_wsample_norep!, 4:7, wv, zeros(Int, 2)) @@ -130,7 +129,8 @@ test_rng_use(efraimidis_aexpj_wsample_norep!, 4:7, wv, zeros(Int, 2)) a = sample(4:7, wv, 3; replace=false, ordered=false) check_wsample_norep(a, (4, 7), wv, -1; ordered=false) -for rev in (true, false), T in (Int, Int16, Float64, Float16, BigInt, ComplexF64, Rational{Int}) +for rev in (true, false), + T in (Int, Int16, Float64, Float16, BigInt, ComplexF64, Rational{Int}) r = rev ? reverse(4:7) : (4:7) r = T===Int ? r : T.(r) aa = Int.(sample(r, wv, 3; replace=false, ordered=true)) @@ -148,7 +148,7 @@ end @test_throws ArgumentError sample(weights(ox)) for f in (sample!, wsample!, naive_wsample_norep!, efraimidis_a_wsample_norep!, - efraimidis_ares_wsample_norep!, efraimidis_aexpj_wsample_norep!) + efraimidis_ares_wsample_norep!, efraimidis_aexpj_wsample_norep!) # Test that offset arrays throw an error @test_throws ArgumentError f(ox, weights(y), z) @test_throws ArgumentError f(x, weights(oy), z) @@ -165,4 +165,4 @@ end # but it currently fails as Base.mightalias is not smart enough @test_broken f(y, weights(view(x, 5:6)), view(x, 2:4)) end -end \ No newline at end of file +end