@@ -6,16 +6,90 @@ using StatsBase
6
6
using Statistics
7
7
8
8
@testset " StatsBase support" begin
9
- r = rand (1 ,100 )
10
- V = VectorOfSimilarVectors {Float64} (r)
11
- w = FrequencyWeights (rand (100 ))
12
- w0 = FrequencyWeights (vec (ones (100 ,1 )))
13
- @test isapprox (@inferred (sum (V, w))[1 ], sum (r, w))
14
- @test isapprox (@inferred (mean (V,w))[1 ], mean (r, w))
15
- @test isapprox (@inferred (var (V,w, corrected= true ))[1 ], var (r, w, corrected= true ))
16
- @test isapprox (@inferred (std (V,w, corrected= true ))[1 ], sqrt (var (V,w, corrected= true )[1 ]))
17
- @test isapprox (@inferred (std (V, corrected= true ))[1 ], sqrt (var (V, w0, corrected= true )[1 ]))
18
- @test isapprox (@inferred (std (V, corrected= true ))[1 ], std (V, w0, corrected= true )[1 ])
19
- @test isapprox (@inferred (cov (V, w))[1 ], var (r, w, corrected= true ))
20
- @test isapprox (@inferred (cor (V,w))[1 ], 1.0 )
9
+ VV = [rand (3 ) for i in 1 : 10 ]
10
+ VV_aosa = ArrayOfSimilarArrays (VV)
11
+
12
+ VA = [rand (2 ,3 ,3 ) for i in 1 : 10 ]
13
+ VA_aosa = ArrayOfSimilarArrays (VA)
14
+
15
+ w = FrequencyWeights (rand (10 ))
16
+
17
+ array_cmp (A, B) = (A ≈ B) && (size (A) == size (B))
18
+
19
+
20
+ # sum and mean for Vector{Vector} with weights currently fail with
21
+ # the implementations in StatsBase. This should be considered a
22
+ # bug in StatsBase, since Base and Statistics support sum and mean
23
+ # for Vector{Vector} without weights. Also, adding products of vectors
24
+ # and weights is perfectly natural, mathematically.
25
+
26
+ _sum (A:: AbstractVector{<:AbstractArray} , w:: AbstractWeights ) =
27
+ sum (A .* w)
28
+
29
+ _mean (A:: AbstractVector{<:AbstractArray} , w:: AbstractWeights ) =
30
+ _sum (A, w) ./ sum (w)
31
+
32
+ @testset " sum and mean" begin
33
+
34
+ @test array_cmp (@inferred (sum (VV_aosa, w)), _sum (VV, w))
35
+ @test array_cmp (@inferred (sum (VA_aosa, w)), _sum (VA, w))
36
+
37
+ @test array_cmp (@inferred (mean (VV_aosa, w)), _mean (VV, w))
38
+ @test array_cmp (@inferred (mean (VA_aosa, w)), _mean (VA, w))
39
+ end
40
+
41
+
42
+ # Weighted var and std are currently not supported for Vector{Vector} by
43
+ # StatsBase. This should be considered a bug in StatsBase, since
44
+ # unweighted var and std for Vector{Vector} are supported by Statistics.
45
+
46
+ function _var (A:: AbstractVector{<:AbstractArray} , w:: FrequencyWeights ; corrected = true )
47
+ wmean_A = _mean (A, w)
48
+ wsum = sum (w)
49
+ wsum_corr = corrected ? - 1 : 0
50
+ sum ([(x .- wmean_A). ^ 2 for x in A] .* w) ./ (wsum + wsum_corr)
51
+ end
52
+
53
+ _std (A:: AbstractVector{<:AbstractArray} , w:: AbstractWeights ; corrected = true ) =
54
+ sqrt .(_var (A, w, corrected = corrected))
55
+
56
+ @testset " var and std" begin
57
+ @test array_cmp (@inferred (var (VV_aosa, w)), _var (VV_aosa, w))
58
+ @test array_cmp (@inferred (var (VV_aosa, w, corrected = false )), _var (VV_aosa, w, corrected = false ))
59
+ @test array_cmp (@inferred (var (VA_aosa, w)), _var (VA_aosa, w))
60
+ @test array_cmp (@inferred (var (VA_aosa, w, corrected = false )), _var (VA_aosa, w, corrected = false ))
61
+
62
+ @test array_cmp (@inferred (std (VV_aosa, w)), _std (VV_aosa, w))
63
+ @test array_cmp (@inferred (std (VV_aosa, w, corrected = false )), _std (VV_aosa, w, corrected = false ))
64
+ @test array_cmp (@inferred (std (VA_aosa, w)), _std (VA_aosa, w))
65
+ @test array_cmp (@inferred (std (VA_aosa, w, corrected = false )), _std (VA_aosa, w, corrected = false ))
66
+ end
67
+
68
+
69
+ # For weighted cov of Vector{Vector}, StatsBase currently returns a vector
70
+ # instead of a matrix, with `cov(VV, fill(1, 10)) != cov(VV)`.
71
+ # This should be considered a bug in StatsBase.
72
+
73
+ function _cov (A:: AbstractVector{<:AbstractVector} , w:: FrequencyWeights ; corrected = true )
74
+ wmean_A = _mean (A, w)
75
+ wsum = sum (w)
76
+ wsum_corr = corrected ? - 1 : 0
77
+ sum ([[(A[i][j] - wmean_A[j]) * (A[i][k] - wmean_A[k]) * w[i] for j in eachindex (A[i]), k in eachindex (A[i])] for i in eachindex (A)]) ./ (wsum + wsum_corr)
78
+ end
79
+
80
+ @testset " cov" begin
81
+ @test array_cmp (@inferred (cov (VV_aosa, w)), _cov (VV_aosa, w))
82
+ @test array_cmp (@inferred (cov (VV_aosa, w, corrected = false )), _cov (VV_aosa, w, corrected = false ))
83
+ end
84
+
85
+
86
+ # Weighted cor is currently not supported for Vector{Vector} by StatsBase.
87
+ # This should be considered a bug in StatsBase, since unweighted cor
88
+ # for Vector{Vector} is supported by Statistics.
89
+
90
+ _cor (A:: AbstractVector{<:AbstractVector} , w:: AbstractWeights ) = cov2cor (_cov (A, w), _std (A, w))
91
+
92
+ @testset " cor" begin
93
+ @test array_cmp (@inferred (cor (VV_aosa, w)), _cor (VV, w))
94
+ end
21
95
end
0 commit comments