Skip to content

Commit 6fe67cf

Browse files
authored
Merge pull request #350 from JuliaStats/an/issue280
Handle discrete distributions in ExactOneSampleKSTest
2 parents 0af0f1c + ca60808 commit 6fe67cf

File tree

2 files changed

+113
-85
lines changed

2 files changed

+113
-85
lines changed

src/kolmogorov_smirnov.jl

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,26 @@ default_tail(test::KSTest) = :both
3838
# compute supremum of differences between target and empirical cdf before and after the jump of the empirical cdf.
3939
function ksstats(x::AbstractVector{T}, d::UnivariateDistribution) where T<:Real
4040
n = length(x)
41-
cdfs = cdf.(Ref(d), sort(x))
42-
δp = maximum((1:n) / n - cdfs)
43-
δn = -minimum((0:n-1) / n - cdfs)
44-
δ = max(δn, δp)
45-
(n, δ, δp, δn)
41+
sx = sort(x)
42+
g = cdf.(Ref(d), sx)
43+
g₋ = if d isa DiscreteDistribution
44+
# http://www.stat.yale.edu/~jay/EmersonMaterials/DiscreteGOF.pdf page 2
45+
cdf.(Ref(d), prevfloat.(float.(sx)))
46+
else
47+
g
48+
end
49+
_ecdf = ecdf(sx)
50+
f = _ecdf(sx)
51+
δ₊ = zero(zero(eltype(f)) - zero(eltype(g)))
52+
δ₋ = zero(zero(eltype(g₋)) - zero(eltype(f)))
53+
f₋i = zero(eltype(f))
54+
for (fi, gi, g₋i) in zip(f, g, g₋)
55+
δ₊ = max(δ₊, fi - gi)
56+
δ₋ = max(δ₋, g₋i - f₋i)
57+
f₋i = fi
58+
end
59+
δ = max(δ₊, δ₋)
60+
(n, δ, δ₊, δ₋)
4661
end
4762

4863
### EXACT KOLMOGOROV SMIRNOV TEST
@@ -64,10 +79,6 @@ sample is not drawn from `d`.
6479
Implements: [`pvalue`](@ref)
6580
"""
6681
function ExactOneSampleKSTest(x::AbstractVector{<:Real}, d::UnivariateDistribution)
67-
if !allunique(x)
68-
@warn("This test is inaccurate with ties")
69-
end
70-
7182
ExactOneSampleKSTest(ksstats(x, d)...)
7283
end
7384

@@ -108,10 +119,6 @@ that the sample is not drawn from `d`.
108119
Implements: [`pvalue`](@ref)
109120
"""
110121
function ApproximateOneSampleKSTest(x::AbstractVector{<:Real}, d::UnivariateDistribution)
111-
if !allunique(x)
112-
@warn("This test is inaccurate with ties")
113-
end
114-
115122
ApproximateOneSampleKSTest(ksstats(x, d)...)
116123
end
117124

@@ -215,4 +222,4 @@ function ksstats(x::AbstractVector{T}, y::AbstractVector{S}) where {T<:Real, S<:
215222
end
216223

217224
(n_x, n_y, max(δp, -δn), δp, -δn)
218-
end
225+
end

test/kolmogorov_smirnov.jl

Lines changed: 92 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -8,84 +8,105 @@ x = [0.3500, 0.1966, 0.2511, 0.6160, 0.4733,
88
0.0759, 0.0540, 0.5308, 0.7792, 0.9340,
99
0.1299, 0.5688, 0.4694, 0.0119, 0.3371
1010
]
11-
t = ApproximateOneSampleKSTest(x, Uniform())
12-
@test t.δ 0.1440
13-
@test t.δn 0.0571
14-
@test t.δp 0.1440
15-
@test pvalue(t) 0.6777349664784745
16-
@test pvalue(t; tail=:left) 0.849573771973747
17-
@test pvalue(t; tail=:right) 0.3545875485608989
18-
@test default_tail(t) == :both
19-
show(IOBuffer(), t)
2011

21-
t = ApproximateTwoSampleKSTest(x, [(0:24)/25...])
22-
@test t.δ 0.12
23-
@test t.δn 0.08
24-
@test t.δp 0.12
25-
@test pvalue(t) 0.993764859699076
26-
@test pvalue(t; tail=:left) 0.8521437889662113
27-
@test pvalue(t; tail=:right) 0.697676326071031
28-
@test default_tail(t) == :both
29-
show(IOBuffer(), t)
12+
@testset "Uniform" begin
13+
t = ApproximateOneSampleKSTest(x, Uniform())
14+
@test t.δ 0.1440
15+
@test t.δn 0.0571
16+
@test t.δp 0.1440
17+
@test pvalue(t) 0.6777349664784745
18+
@test pvalue(t; tail=:left) 0.849573771973747
19+
@test pvalue(t; tail=:right) 0.3545875485608989
20+
@test default_tail(t) == :both
21+
show(IOBuffer(), t)
3022

31-
t = ExactOneSampleKSTest(x, Uniform())
32-
@test t.δ 0.1440
33-
@test t.δn 0.0571
34-
@test t.δp 0.1440
35-
@test pvalue(t) 0.6263437768244742
36-
@test pvalue(t; tail=:left) 0.8195705417998183
37-
@test pvalue(t; tail=:right) 0.32350648882777194
38-
@test default_tail(t) == :both
39-
show(IOBuffer(), t)
23+
t = ApproximateTwoSampleKSTest(x, [(0:24)/25...])
24+
@test t.δ 0.12
25+
@test t.δn 0.08
26+
@test t.δp 0.12
27+
@test pvalue(t) 0.993764859699076
28+
@test pvalue(t; tail=:left) 0.8521437889662113
29+
@test pvalue(t; tail=:right) 0.697676326071031
30+
@test default_tail(t) == :both
31+
show(IOBuffer(), t)
4032

41-
## check fit to normal distribution
42-
t = ApproximateOneSampleKSTest(x, Normal())
43-
@test t.δ 0.5047473010922947
44-
@test t.δn 0.5047473010922947
45-
@test t.δp 0.17515194649718513
46-
@test pvalue(t) 5.871827067532435e-6
47-
@test pvalue(t; tail=:left) 2.9359135337662175e-6
48-
@test pvalue(t; tail=:right) 0.21569061887162347
33+
t = ExactOneSampleKSTest(x, Uniform())
34+
@test t.δ 0.1440
35+
@test t.δn 0.0571
36+
@test t.δp 0.1440
37+
@test pvalue(t) 0.6263437768244742
38+
@test pvalue(t; tail=:left) 0.8195705417998183
39+
@test pvalue(t; tail=:right) 0.32350648882777194
40+
@test default_tail(t) == :both
41+
show(IOBuffer(), t)
42+
end
4943

50-
## check unequal sample size
51-
t = ApproximateTwoSampleKSTest(x, [(0:5)/6...])
52-
@test t.δ 0.22
53-
@test t.δn 0.22
54-
@test t.δp 0.09333333333333346
55-
@test pvalue(t) 0.973300892518972
56-
@test pvalue(t; tail=:left) 0.6260111498528065
57-
@test pvalue(t; tail=:right) 0.9191544797498837
44+
@testset "check fit to normal distribution" begin
45+
t = ApproximateOneSampleKSTest(x, Normal())
46+
@test t.δ 0.5047473010922947
47+
@test t.δn 0.5047473010922947
48+
@test t.δp 0.17515194649718513
49+
@test pvalue(t) 5.871827067532435e-6
50+
@test pvalue(t; tail=:left) 2.9359135337662175e-6
51+
@test pvalue(t; tail=:right) 0.21569061887162347
52+
end
5853

59-
# http://ocw.mit.edu/courses/mathematics/18-443-statistics-for-applications-fall-2006/lecture-notes/lecture14.pdf
60-
x = [0.58, 0.42, 0.52, 0.33, 0.43, 0.23, 0.58, 0.76, 0.53, 0.64]
61-
t = ApproximateOneSampleKSTest(x, Uniform())
62-
@test t.δ 0.26
63-
@test t.δn 0.23
64-
@test t.δp 0.26
65-
@test pvalue(t) 0.5084937988981307
66-
@test pvalue(t; tail=:left) 0.3471494153245104
67-
@test pvalue(t; tail=:right) 0.25872229825964005
54+
@testset "check unequal sample size" begin
55+
t = ApproximateTwoSampleKSTest(x, [(0:5)/6...])
56+
@test t.δ 0.22
57+
@test t.δn 0.22
58+
@test t.δp 0.09333333333333346
59+
@test pvalue(t) 0.973300892518972
60+
@test pvalue(t; tail=:left) 0.6260111498528065
61+
@test pvalue(t; tail=:right) 0.9191544797498837
62+
end
6863

69-
t = ApproximateTwoSampleKSTest(x, [(0:9)/10...])
70-
@test t.δ 0.3
71-
@test t.δn 0.3
72-
@test t.δp 0.2
73-
@test pvalue(t) 0.7590978384203948
74-
@test pvalue(t; tail=:left) 0.406569659740599
75-
@test pvalue(t; tail=:right) 0.6703200460356393
64+
@testset "MIT course examples" begin
65+
# http://ocw.mit.edu/courses/mathematics/18-443-statistics-for-applications-fall-2006/lecture-notes/lecture14.pdf
66+
x = [0.58, 0.42, 0.52, 0.33, 0.43, 0.23, 0.58, 0.76, 0.53, 0.64]
67+
t = ApproximateOneSampleKSTest(x, Uniform())
68+
@test t.δ 0.26
69+
@test t.δn 0.23
70+
@test t.δp 0.26
71+
@test pvalue(t) 0.5084937988981307
72+
@test pvalue(t; tail=:left) 0.3471494153245104
73+
@test pvalue(t; tail=:right) 0.25872229825964005
7674

77-
t = ExactOneSampleKSTest(x, Uniform())
78-
@test t.δ 0.26
79-
@test t.δn 0.23
80-
@test t.δp 0.26
81-
@test pvalue(t) 0.4351284228580825
82-
@test pvalue(t; tail=:left) 0.3013310572470338
83-
@test pvalue(t; tail=:right) 0.2193143479950862
75+
t = ApproximateTwoSampleKSTest(x, [(0:9)/10...])
76+
@test t.δ 0.3
77+
@test t.δn 0.3
78+
@test t.δp 0.2
79+
@test pvalue(t) 0.7590978384203948
80+
@test pvalue(t; tail=:left) 0.406569659740599
81+
@test pvalue(t; tail=:right) 0.6703200460356393
8482

85-
# Check two samples with ties
83+
t = ExactOneSampleKSTest(x, Uniform())
84+
@test t.δ 0.26
85+
@test t.δn 0.23
86+
@test t.δp 0.26
87+
@test pvalue(t) 0.4351284228580825
88+
@test pvalue(t; tail=:left) 0.3013310572470338
89+
@test pvalue(t; tail=:right) 0.2193143479950862
90+
end
8691

87-
t = ApproximateTwoSampleKSTest(ones(10), ones(10))
88-
@test isapprox(t.δ, 0., atol=1e-16)
89-
@test isapprox(t.δp, 0., atol=1e-16)
90-
@test isapprox(t.δn, 0., atol=1e-16)
92+
@testset "Check two samples with ties" begin
93+
t = ApproximateTwoSampleKSTest(ones(10), ones(10))
94+
@test isapprox(t.δ, 0., atol=1e-16)
95+
@test isapprox(t.δp, 0., atol=1e-16)
96+
@test isapprox(t.δn, 0., atol=1e-16)
97+
end
98+
99+
@testset "Issue 280" begin
100+
n = 50
101+
x = 0.5 .^ (1:n)
102+
d = DiscreteNonParametric(1:n, x)
103+
y = vcat(fill(1, 16), fill(2, 8), fill(3, 4), 4, 4, 5)
104+
t = ExactOneSampleKSTest(y, d)
105+
@test t.δ 0.03125
106+
@test t.δp 0.03125
107+
@test t.δn 0.0
108+
@test pvalue(t) 0.9999999999999348
109+
@test pvalue(t; tail=:left) 1.0
110+
@test pvalue(t; tail=:right) 0.9213372964737361
111+
end
91112
end

0 commit comments

Comments
 (0)