Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
- uses: julia-actions/julia-runtest@v1
env:
# This environment variable enables the integration tests:
MLJ_TEST_REGISTRY: '1'
MLJ_TEST_REGISTRY: "false"
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v4
with:
Expand Down
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
CategoricalArrays = "0.9, 0.10"
CategoricalDistributions = "0.1"
CategoricalArrays = "1"
CategoricalDistributions = "0.2"
Combinatorics = "1.0"
Dates = "1"
Distances = "0.9,0.10"
Expand Down
3 changes: 1 addition & 2 deletions src/MLJModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ using Combinatorics
import Distributions
import REPL # stdlib, needed for `Term`
import PrettyPrinting
import CategoricalDistributions: UnivariateFinite, UnivariateFiniteArray,
classes
import CategoricalDistributions: UnivariateFinite, UnivariateFiniteArray
import StatisticalTraits # for `info`

# from loading.jl:
Expand Down
17 changes: 9 additions & 8 deletions src/builtins/Constant.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ function MLJModelInterface.fit(::ConstantClassifier,
y,
w=nothing)
d = Distributions.fit(UnivariateFinite, y, w)
C = classes(d)
C = levels(d)
fitresult = (C, Distributions.pdf([d, ], C))
cache = nothing
report = NamedTuple()
Expand All @@ -66,10 +66,10 @@ MLJModelInterface.fitted_params(::ConstantClassifier, fitresult) =
(target_distribution=fitresult,)

function MLJModelInterface.predict(::ConstantClassifier, fitresult, Xnew)
_classes, probs1 = fitresult
_levels, probs1 = fitresult
N = nrows(Xnew)
probs = reshape(vcat(fill(probs1, N)...), N, length(_classes))
return UnivariateFinite(_classes, probs)
probs = reshape(vcat(fill(probs1, N)...), N, length(_levels))
return UnivariateFinite(_levels, probs)
end


Expand Down Expand Up @@ -216,10 +216,11 @@ ConstantRegressor
This "dummy" probabilistic predictor always returns the same distribution, irrespective of
the provided input pattern. The distribution `d` returned is the `UnivariateFinite`
distribution based on frequency of classes observed in the training target data. So,
`pdf(d, level)` is the number of times the training target takes on the value `level`.
Use `predict_mode` instead of `predict` to obtain the training target mode instead. For
more on the `UnivariateFinite` type, see the CategoricalDistributions.jl package.
distribution based on frequency of levels (classes) observed in the training target
data. So, `pdf(d, level)` is the number of times the training target takes on the value
`level`. Use `predict_mode` instead of `predict` to obtain the training target mode
instead. For more on the `UnivariateFinite` type, see the CategoricalDistributions.jl
package.
Almost any reasonable model is expected to outperform `ConstantClassifier`, which is used
almost exclusively for testing and establishing performance baselines.
Expand Down
16 changes: 8 additions & 8 deletions src/builtins/ThresholdPredictors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,16 @@ const ThresholdSupported = Union{keys(_type_given_atom)...}

const ERR_MODEL_UNSPECIFIED = ArgumentError(
"Expecting atomic model as argument. None specified. ")
warn_classes(first_class, second_class) =
warn_levels(first_class, second_class) =
"Taking positive class as `$(second_class)` and negative class as"*
"`$(first_class)`."*
"Coerce target to `OrderedFactor{2}` to suppress this warning, "*
"ensuring that positive class > negative class. "
const ERR_CLASSES_DETECTOR = ArgumentError(
const ERR_LEVELS_DETECTOR = ArgumentError(
"Targets for detector models must be ordered. Consider coercing to "*
"`OrderedFactor`, ensuring that outlier class > inlier class. ")
const ERR_TARGET_NOT_BINARY = ArgumentError(
"Target `y` must have two classes in its pool, even if only one "*
"Target `y` must have two levels in its pool, even if only one "*
"class is manifest. ")
const err_unsupported_model_type(T) = ArgumentError(
"`BinaryThresholdPredictor` does not support atomic models with supertype `$T`. "*
Expand Down Expand Up @@ -208,9 +208,9 @@ function MMI.fit(model::ThresholdUnion, verbosity::Int, args...)
length(L) == 2 || throw(ERR_TARGET_NOT_BINARY)
first_class, second_class = L
if model.model isa Probabilistic
@warn warn_classes(first_class, second_class)
@warn warn_levels(first_class, second_class)
else
throw(ERR_CLASSES_DETECTOR)
throw(ERR_LEVELS_DETECTOR)
end
end
model_fitresult, model_cache, model_report = MMI.fit(
Expand Down Expand Up @@ -259,7 +259,7 @@ function _predict_threshold(yhat::UnivariateFinite, threshold)
dict = yhat.prob_given_ref
length(threshold) == length(dict) || throw(
ArgumentError(
"`length(threshold)` has to equal number of classes in specified "*
"`length(threshold)` has to equal number of levels in specified "*
"`UnivariateFinite` distribution."
)
)
Expand All @@ -277,14 +277,14 @@ function _predict_threshold(yhat::UnivariateFiniteArray{S,V,R,P,N},
dict = yhat.prob_given_ref
length(threshold) == length(dict) || throw(
ArgumentError(
"`length(threshold)` has to equal number of classes in specified "*
"`length(threshold)` has to equal number of levels in specified "*
"`UnivariateFiniteArray`."
)
)
d = yhat.decoder(1)
levs = levels(d)
ord = isordered(d)
# Array to house the predicted classes
# Array to house the predicted levels
ret = CategoricalArray{V, N, R}(undef, size(yhat), levels=levs, ordered=ord)
#ret = Array{CategoricalValue{V, R}, N}(undef, size(yhat))
# `temp` vector allocted once to be used for calculations in each loop
Expand Down
4 changes: 2 additions & 2 deletions test/builtins/Constant.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ end
d = MLJBase.UnivariateFinite([y[1], y[2], y[4]], [0.5, 0.25, 0.25])

yhat = MLJBase.predict_mode(model, fitresult, X)
@test MLJBase.classes(yhat[1]) == MLJBase.classes(y[1])
@test levels(yhat[1]) == levels(y[1])
@test yhat[5] == y[1]
@test length(yhat) == 10

yhat = MLJBase.predict(model, fitresult, X)
yhat1 = yhat[1]

for c in MLJBase.classes(d)
for c in levels(d)
Distributions.pdf(yhat1, c) ≈ Distributions.pdf(d, c)
end

Expand Down
14 changes: 7 additions & 7 deletions test/builtins/ThresholdPredictors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ y2_ = categorical(yraw[2:end], ordered=true)
)

# Check warning when `y` is not ordered:
@test_logs((:warn, MLJModels.warn_classes(levels(y_)...)),
@test_logs((:warn, MLJModels.warn_levels(levels(y_)...)),
MMI.fit(model, 1, MMI.reformat(model, X_, y1_)...))
# Check predictions containing two classes
# Check predictions containing two levels
@test_throws ArgumentError BinaryThresholdPredictor(ConstantRegressor())
@test_logs((:warn, r"`threshold` should be"),
BinaryThresholdPredictor(atom, threshold=-1))
Expand Down Expand Up @@ -88,13 +88,13 @@ end
v1 = categorical(['a', 'b', 'a'])
v2 = categorical(['a', 'b', 'a', 'c'])
# Test with UnivariateFinite object
d1 = UnivariateFinite(MMI.classes(v1), [0.4, 0.6])
d1 = UnivariateFinite(levels(v1), [0.4, 0.6])
@test_throws ArgumentError MLJModels._predict_threshold(d1, 0.7)
@test MLJModels._predict_threshold(d1, (0.7, 0.3)) == v1[2]
@test MLJModels._predict_threshold(d1, [0.5, 0.5]) == v1[2]
@test MLJModels._predict_threshold(d1, (0.4, 0.6)) == v1[1]
@test MLJModels._predict_threshold(d1, [0.2, 0.8]) == v1[1]
d2 = UnivariateFinite(MMI.classes(v2), [0.4, 0.3, 0.3])
d2 = UnivariateFinite(levels(v2), [0.4, 0.3, 0.3])
@test_throws ArgumentError MLJModels._predict_threshold(d2, (0.7, 0.3))
@test MLJModels._predict_threshold(d2, (0.2, 0.5, 0.3)) == v2[1]
@test MLJModels._predict_threshold(d2, [0.3, 0.2, 0.5]) == v2[2]
Expand All @@ -117,14 +117,14 @@ end

# Test with UnivariateFiniteArray oject
probs1 = [0.2 0.8; 0.7 0.3; 0.1 0.9]
unf_arr1 = UnivariateFinite(MMI.classes(v1), probs1)
unf_arr1 = UnivariateFinite(levels(v1), probs1)
@test_throws ArgumentError MLJModels._predict_threshold(unf_arr1, 0.7)
@test MLJModels._predict_threshold(unf_arr1, (0.7, 0.3)) == [v1[2], v1[1], v1[2]]
@test MLJModels._predict_threshold(unf_arr1, [0.5, 0.5]) == [v1[2], v1[1], v1[2]]
@test MLJModels._predict_threshold(unf_arr1, (0.4, 0.6)) == [v1[2], v1[1], v1[2]]
@test MLJModels._predict_threshold(unf_arr1, [0.2, 0.8]) == [v1[1], v1[1], v1[2]]
probs2 = [0.2 0.3 0.5;0.1 0.6 0.3; 0.4 0.0 0.6]
unf_arr2 = UnivariateFinite(MMI.classes(v2), probs2)
unf_arr2 = UnivariateFinite(levels(v2), probs2)
@test_throws ArgumentError MLJModels._predict_threshold(unf_arr2, (0.7, 0.3))
@test MLJModels._predict_threshold(unf_arr2, (0.2, 0.5, 0.3)) == [v2[4], v2[2], v2[1]]
@test MLJModels._predict_threshold(unf_arr2, [0.3, 0.2, 0.5]) == [v2[2], v2[2], v2[1]]
Expand All @@ -144,7 +144,7 @@ MMI.input_scitype(::Type{<:DummyDetector}) = MMI.Table

@testset "BinaryThresholdPredictor - ProbabilisticUnsupervisedDetector" begin
detector = BinaryThresholdPredictor(DummyDetector(), threshold=0.2)
@test_throws MLJModels.ERR_CLASSES_DETECTOR MMI.fit(
@test_throws MLJModels.ERR_LEVELS_DETECTOR MMI.fit(
detector, 1, MMI.reformat(detector, X_, y1_)...
)

Expand Down
115 changes: 0 additions & 115 deletions test/testutils.jl

This file was deleted.

Loading