Better tests (#2)

tiemvanderdeure · web-flow · commit 63d32f299962 · 2024-01-03T17:25:01.000+01:00
* add more tests

* separate file for MaxnetModel and its methods

* add an example to predict

* specify types of fields in MaxnetModel

* fix the order of loading jl files

* slightly increase tolerance so test passes
diff --git a/src/Maxnet.jl b/src/Maxnet.jl
@@ -9,17 +9,17 @@ using MLJModelInterface: Continuous, Binary, Multiclass, Count
 
 export IdentityLink, CloglogLink, LogitLink, LogLink # re-export relevant links
 export LassoBackend, GLMNetBackend
-export maxnet, predict
+export maxnet, predict, complexity
 export LinearFeature, CategoricalFeature, QuadraticFeature, ProductFeature, ThresholdFeature, HingeFeature
 export MaxnetBinaryClassifier
 
-# Write your package code here.
 
 include("utils.jl")
 include("lasso.jl")
 include("feature_classes.jl")
 include("model_matrix.jl")
 include("regularization.jl")
+include("MaxnetModel.jl")
 include("maxnet_function.jl")
 include("predict.jl")
 include("response_curves.jl")
diff --git a/src/MaxnetModel.jl b/src/MaxnetModel.jl
@@ -0,0 +1,25 @@
+struct MaxnetModel 
+    path::Union{GLMNet.GLMNetPath, Lasso.LassoPath}
+    features::Vector{<:AbstractFeatureClass}
+    columns::Vector{ModelMatrixColumn}
+    coefs::AbstractVector
+    alpha::Float64
+    entropy::Float64
+    predictor_data
+    categorical_predictors::NTuple{<:Any, Symbol}
+    continuous_predictors::NTuple{<:Any, Symbol}
+end
+
+function Base.show(io::IO, mime::MIME"text/plain", m::MaxnetModel)
+    vars_selected = mapreduce(Maxnet._var_keys, (x, y) -> unique(vcat(x, y)), selected_features(m))
+
+    println(io, "Fit Maxnet model")
+    
+    println(io, "Features classes: $(m.features)")
+    println(io, "Entropy: $(m.entropy)")
+    println(io, "Model complexity: $(complexity(m))")
+    println(io, "Variables selected: $vars_selected")
+end
+
+"Get the number of non-zero coefficients in the model"
+complexity(m::MaxnetModel) = length(m.coefs.nzval)
diff --git a/src/maxnet_function.jl b/src/maxnet_function.jl
@@ -1,26 +1,3 @@
-struct MaxnetModel 
-    path
-    features
-    columns
-    coefs
-    alpha
-    entropy
-    predictor_data
-    categorical_predictors
-    continuous_predictors
-end
-
-function Base.show(io::IO, mime::MIME"text/plain", m::MaxnetModel)
-    vars_selected = mapreduce(Maxnet._var_keys, (x, y) -> unique(vcat(x, y)), selected_features(m))
-
-    println(io, "Fit Maxnet model")
-    
-    println(io, "Features classes: $(m.features)")
-    println(io, "Entropy: $(m.entropy)")
-    println(io, "Model complexity: $(length(m.coefs.nzval))")
-    println(io, "Variables selected: $vars_selected")
-end
-
 """
     maxnet(
         presences, predictors; 
@@ -52,14 +29,11 @@ Lasso.jl is written in pure julia, but can be slower with large model matrices (
 - `model`: A model of type `MaxnetModel`
 
 # Examples
-```jldoctest
+```julia
 using Maxnet
-p_a, env = Maxnet.bradypus()
-
+p_a, env = Maxnet.bradypus();
 bradypus_model = maxnet(p_a, env; features = "lq", backend = GLMNetBackend())
 
-# output
-
 Fit Maxnet model
 Features classes: Maxnet.AbstractFeatureClass[LinearFeature(), CategoricalFeature(), QuadraticFeature()]
 Entropy: 6.114650341746531
diff --git a/src/predict.jl b/src/predict.jl
@@ -17,6 +17,13 @@
 # Returns
 A `Vector` with the resulting predictions.
 
+# Example
+```julia
+using Maxnet
+p_a, env = Maxnet.bradypus();
+bradypus_model = maxnet(p_a, env; features = "lq")
+prediction = Maxnet.predict(bradypus_model, env)
+```
 """
 function predict(m::MaxnetModel, x; link = CloglogLink(), clamp = false)
     predictors = Tables.columntable(x)
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,12 +1,19 @@
 using Maxnet, Test, MLJBase, Statistics
 
 p_a, env = Maxnet.bradypus()
-env1 = map(e -> [e[1]], env)
+env1 = map(e -> [e[1]], env) # just the first row
 
 @testset "utils" begin
     @test_throws ErrorException Maxnet.features_from_string("a")
+    # test each feature class is returned correctly
     @test Maxnet.features_from_string("l") == [LinearFeature(), CategoricalFeature()]
     @test Maxnet.features_from_string("q") == [QuadraticFeature()]
+    @test Maxnet.features_from_string("lq") == [LinearFeature(), CategoricalFeature(), QuadraticFeature()]
+    @test Maxnet.features_from_string("lqp") == [LinearFeature(), CategoricalFeature(), QuadraticFeature(), ProductFeature()]
+    @test Maxnet.features_from_string("lqph") == [LinearFeature(), CategoricalFeature(), QuadraticFeature(), ProductFeature(), HingeFeature()]
+    @test Maxnet.features_from_string("lqpt") == [LinearFeature(), CategoricalFeature(), QuadraticFeature(), ProductFeature(), ThresholdFeature()]
+
+    @test Maxnet.default_features(100) == [LinearFeature(), CategoricalFeature(), QuadraticFeature(), HingeFeature(), ProductFeature()]
 end
 
 @testset "Maxnet" begin
@@ -18,31 +25,34 @@ end
     @test all(isapprox.(model_glmnet.coefs, model_lasso.coefs; rtol = 0.1, atol = 0.1))
     @test Statistics.cor(model_glmnet.coefs, model_lasso.coefs) > 0.99
 
-    # select classes automatically
-    Maxnet.maxnet(p_a, env; backend = LassoBackend());
+    # test the result
+    @test model_glmnet.entropy ≈ 6.114650341746531
+    @test complexity(model_glmnet) == 21
 
-    # some class combinations
+    # some class combinations and keywords
     Maxnet.maxnet(p_a, env; features = "lq", backend = LassoBackend());
     Maxnet.maxnet(p_a, env; features = "lqp", regularization_multiplier = 2., backend = LassoBackend());
-    Maxnet.maxnet(p_a, env; features = "lqh", regularization_multiplier = 5., backend = LassoBackend());
-    Maxnet.maxnet(p_a, env; features = "lqph", backend = LassoBackend());
-    Maxnet.maxnet(p_a, env; features = "lqpt", backend = LassoBackend());
+    Maxnet.maxnet(p_a, env; features = "lqh", regularization_multiplier = 5., nknots = 10, backend = LassoBackend());
+    Maxnet.maxnet(p_a, env; features = "lqph", weight_factor = 10., backend = LassoBackend());
 
     # predictions
     prediction = Maxnet.predict(model_lasso, env)
     @test Statistics.mean(prediction[p_a]) > Statistics.mean(prediction[.~p_a])
     @test minimum(prediction) > 0.
     @test maximum(prediction) < 1.
+    @test mean(prediction) ≈ 0.243406167194403 atol=1e-4
 
+    # check that clamping works
     # clamp shouldn't change anything in this case
-    @test all(prediction .== Maxnet.predict(model_lasso, env; clamp = true))
+    @test prediction == Maxnet.predict(model_lasso, env; clamp = true)
     
     # predict with a crazy extrapolation
     env1_extrapolated = merge(env1, (;cld6190_ann = [100_000]))
-    # without clamp the prediction is crazy
-    @test abs(Maxnet.predict(model_lasso, env1_extrapolated; link = IdentityLink())[1]) > 100_000.
-    # without clamp the prediction is reasonable
-    @test abs(Maxnet.predict(model_lasso, env1_extrapolated; link = IdentityLink(), clamp = true)[1]) < 5.
+    env1_max_cld = merge(env1, (;cld6190_ann = [maximum(env.cld6190_ann)]))
+
+    # using clamp the prediction uses the highest cloud
+    @test Maxnet.predict(model_lasso, env1_extrapolated; link = IdentityLink(), clamp = true) == 
+        Maxnet.predict(model_lasso, env1_max_cld; link = IdentityLink()) 
 end
 
 @testset "MLJ" begin
@@ -59,10 +69,17 @@ end
     mach2 = machine(mn(features = "lqph", backend = GLMNetBackend()), env_typed, categorical(p_a))
     fit!(mach2)
     
+    # make the equivalent model without mlj
+    model = Maxnet.maxnet((p_a), env_typed; features = "lqph", backend = GLMNetBackend());
+
+
     # predict via MLJBase
     mljprediction = MLJBase.predict(mach2, env_typed)
     mlj_true_probability = pdf.(mljprediction, true)
 
+    # test that this predicts the same as the equivalent model without mlj
+    @test all(Maxnet.predict(model, env_typed) .≈ mlj_true_probability)
+
     @test Statistics.mean(mlj_true_probability[p_a]) > Statistics.mean(mlj_true_probability[.~p_a])
     @test minimum(mlj_true_probability) > 0.
     @test maximum(mlj_true_probability) < 1.