Skip to content

Commit d4bd5c4

Browse files
authored
Merge pull request #228 from JuliaAI/density-estimation
Add tests for density estimators
2 parents 463f3b7 + 37770a6 commit d4bd5c4

File tree

2 files changed

+82
-4
lines changed

2 files changed

+82
-4
lines changed

test/density_estimation.jl

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
using Test
2+
using MLJTuning
3+
using MLJBase
4+
using StatisticalMeasures
5+
using StableRNGs
6+
import MLJModelInterface
7+
import StatisticalMeasures: CategoricalDistributions, Distributions
8+
9+
10+
# We define a density estimator to fit a `UnivariateFinite` distribution to some
11+
# Categorical data, with a Laplace smoothing option, α.
12+
13+
mutable struct UnivariateFiniteFitter <: MLJModelInterface.Probabilistic
14+
alpha::Float64
15+
end
16+
UnivariateFiniteFitter(;alpha=1.0) = UnivariateFiniteFitter(alpha)
17+
18+
function MLJModelInterface.fit(model::UnivariateFiniteFitter,
19+
verbosity, X, y)
20+
21+
α = model.alpha
22+
N = length(y)
23+
_classes = classes(y)
24+
d = length(_classes)
25+
26+
frequency_given_class = Distributions.countmap(y)
27+
prob_given_class =
28+
Dict(c => (get(frequency_given_class, c, 0) + α)/(N + α*d) for c in _classes)
29+
30+
fitresult = CategoricalDistributions.UnivariateFinite(prob_given_class)
31+
32+
report = (params=Distributions.params(fitresult),)
33+
cache = nothing
34+
35+
verbosity > 0 && @info "Fitted a $fitresult"
36+
37+
return fitresult, cache, report
38+
end
39+
40+
MLJModelInterface.predict(model::UnivariateFiniteFitter,
41+
fitresult,
42+
X) = fitresult
43+
44+
45+
MLJModelInterface.input_scitype(::Type{<:UnivariateFiniteFitter}) =
46+
Nothing
47+
MLJModelInterface.target_scitype(::Type{<:UnivariateFiniteFitter}) =
48+
AbstractVector{<:Finite}
49+
50+
# This test will fail if MLJ test dependency MLJBase is < 1.11
51+
@testset "tuning for density estimators" begin
52+
y = coerce(collect("abbabbc"), Multiclass)
53+
X = nothing
54+
55+
train, test = partition(eachindex(y), 3/7)
56+
# For above train-test split, hand calculation determines, when optimizing against
57+
# log loss, that:
58+
best_alpha = 2.0
59+
best_loss = (4log(9) - log(3) - 2log(4) - log(2))/4
60+
61+
model = UnivariateFiniteFitter(alpha=0)
62+
r = range(model, :alpha, values=[0.1, 1, 1.5, 2, 2.5, 10])
63+
tmodel = TunedModel(
64+
model,
65+
tuning=Grid(shuffle=false),
66+
range=r,
67+
resampling=[(train, test),],
68+
measure=log_loss,
69+
compact_history=false,
70+
)
71+
72+
mach = machine(tmodel, X, y)
73+
fit!(mach, verbosity=0)
74+
best = report(mach).best_history_entry
75+
@test best.model.alpha == best_alpha
76+
@test best.evaluation.measurement[1] best_loss
77+
end
78+
79+
true

test/runtests.jl

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ end
6060
@test include("serialization.jl")
6161
end
6262

63-
# @testset "julia bug" begin
64-
# @test include("julia_bug.jl")
65-
# end
66-
63+
@testset "density estimatation" begin
64+
@test include("density_estimation.jl")
65+
end

0 commit comments

Comments
 (0)