diff --git a/test/test_compute_loss.jl b/test/test_compute_loss.jl index eb53b44b..fb87339f 100644 --- a/test/test_compute_loss.jl +++ b/test/test_compute_loss.jl @@ -1,6 +1,10 @@ using EasyHybrid: _compute_loss, PerTarget, _apply_loss, loss_fn +using EasyHybrid: _get_target_nan, _get_target_y, _loss_name, compute_loss, LoggingLoss +using EasyHybrid: constructHybridModel, to_keyedArray using Statistics using DimensionalData +using Random +using DataFrames @testset "_compute_loss" begin # Test data setup @@ -109,3 +113,249 @@ using DimensionalData @test !isnan(loss) end end + +@testset "_get_target_nan" begin + # Test with function + y_nan_func(target) = target == :var1 ? [true, false, true] : [true, true, false] + @test _get_target_nan(y_nan_func, :var1) == [true, false, true] + @test _get_target_nan(y_nan_func, :var2) == [true, true, false] + + # Test with AbstractDimArray + y_nan_dim = DimArray([true false; true true; false true], (Ti(1:3), Dim{:col}([:var1, :var2]))) + @test _get_target_nan(y_nan_dim, :var1) == [true, true, false] + @test _get_target_nan(y_nan_dim, :var2) == [false, true, true] + + # Test with Vector of targets + y_nan_dim_multi = DimArray([true false; true true; false true], (Ti(1:3), Dim{:col}([:var1, :var2]))) + result = _get_target_nan(y_nan_dim_multi, [:var1, :var2]) + @test size(result) == (3, 2) + @test result[:, 1] == [true, true, false] + @test result[:, 2] == [false, true, true] +end + +@testset "_get_target_y" begin + # Test with function + y_func(target) = target == :var1 ? [1.0, 2.0, 3.0] : [2.0, 3.0, 4.0] + @test _get_target_y(y_func, :var1) == [1.0, 2.0, 3.0] + @test _get_target_y(y_func, :var2) == [2.0, 3.0, 4.0] + + # Test with AbstractDimArray + y_dim = DimArray([1.0 2.0; 2.0 3.0; 3.0 4.0], (Ti(1:3), Dim{:col}([:var1, :var2]))) + @test _get_target_y(y_dim, :var1) == [1.0, 2.0, 3.0] + @test _get_target_y(y_dim, :var2) == [2.0, 3.0, 4.0] + + # Test with Vector of targets + y_dim_multi = DimArray([1.0 2.0; 2.0 3.0; 3.0 4.0], (Ti(1:3), Dim{:col}([:var1, :var2]))) + result = _get_target_y(y_dim_multi, [:var1, :var2]) + @test size(result) == (3, 2) + @test result[:, 1] == [1.0, 2.0, 3.0] + @test result[:, 2] == [2.0, 3.0, 4.0] + + # Test with Tuple (y_obs, y_sigma) where y_sigma is a Number + y_obs_func(target) = target == :var1 ? [1.0, 2.0, 3.0] : [2.0, 3.0, 4.0] + y_sigma_num = 0.5 + y_tuple_num = (y_obs_func, y_sigma_num) + result = _get_target_y(y_tuple_num, :var1) + @test result isa Tuple + @test result[1] == [1.0, 2.0, 3.0] + @test result[2] == 0.5 + + # Test with Tuple (y_obs, y_sigma) where y_sigma is a function + y_sigma_func(target) = target == :var1 ? 0.3 : 0.7 + y_tuple_func = (y_obs_func, y_sigma_func) + result = _get_target_y(y_tuple_func, :var1) + @test result isa Tuple + @test result[1] == [1.0, 2.0, 3.0] + @test result[2] == 0.3 + result2 = _get_target_y(y_tuple_func, :var2) + @test result2[1] == [2.0, 3.0, 4.0] + @test result2[2] == 0.7 +end + +@testset "_loss_name" begin + # Test with Symbol + @test _loss_name(:mse) == :mse + @test _loss_name(:mae) == :mae + @test _loss_name(:rmse) == :rmse + + # Test with Function + custom_loss(ŷ, y) = mean(abs2, ŷ .- y) + loss_name_func = _loss_name(custom_loss) + @test loss_name_func isa Symbol + # The name should be cleaned (remove # if present) + @test !occursin("#", string(loss_name_func)) + + # Test with Tuple (function with args) + weighted_loss(ŷ, y, w) = w * mean(abs2, ŷ .- y) + loss_name_tuple = _loss_name((weighted_loss, (0.5,))) + @test loss_name_tuple isa Symbol + @test loss_name_tuple == _loss_name(weighted_loss) + + # Test with Tuple (function with kwargs) + scaled_loss(ŷ, y; scale = 1.0) = scale * mean(abs2, ŷ .- y) + loss_name_tuple_kw = _loss_name((scaled_loss, (scale = 2.0,))) + @test loss_name_tuple_kw isa Symbol + @test loss_name_tuple_kw == _loss_name(scaled_loss) + + # Test with Tuple (function with both args and kwargs) + complex_loss(ŷ, y, w; scale = 1.0) = scale * w * mean(abs2, ŷ .- y) + loss_name_tuple_both = _loss_name((complex_loss, (0.5,), (scale = 2.0,))) + @test loss_name_tuple_both isa Symbol + @test loss_name_tuple_both == _loss_name(complex_loss) +end + +@testset "compute_loss with extra_loss" begin + # Simple mechanistic model for testing + function test_mechanistic_model(; x1, a, b) + return (; var1 = a .* x1 .+ b, var2 = 2.0f0 .* a .* x1 .+ b) + end + + # Test parameters + test_parameters = ( + a = (1.0f0, 0.0f0, 5.0f0), + b = (2.0f0, 0.0f0, 10.0f0), + ) + + # Create hybrid model + predictors = [:x2, :x3] + forcing = [:x1] + targets = [:var1, :var2] + neural_param_names = [:a] + global_param_names = [:b] + + HM = constructHybridModel( + predictors, + forcing, + targets, + test_mechanistic_model, + test_parameters, + neural_param_names, + global_param_names; + hidden_layers = [8, 8], + activation = tanh + ) + + # Setup model parameters and state + rng = Random.default_rng(314159) + ps, st = LuxCore.setup(rng, HM) + + # Create test data as KeyedArray (all columns together) + n_samples = 3 + df_test = DataFrame( + x1 = Float32.([10.0, 11.0, 12.0]), + x2 = Float32.([1.0, 2.0, 3.0]), + x3 = Float32.([4.0, 5.0, 6.0]), + var1 = Float32.([1.1, 1.9, 3.2]), + var2 = Float32.([1.8, 3.1, 3.9]) + ) + x = to_keyedArray(df_test) + + # Create target data functions + y_t(target) = target == :var1 ? df_test.var1 : df_test.var2 + y_nan(target) = trues(n_samples) + + @testset "Training mode with extra_loss" begin + # Define extra loss function + extra_loss_func(ŷ) = [sum(abs, ŷ.var1), sum(abs, ŷ.var2)] + + logging = LoggingLoss( + loss_types = [:mse], + training_loss = :mse, + extra_loss = extra_loss_func, + train_mode = true + ) + + loss_value, st_out, stats = compute_loss(HM, ps, st, (x, (y_t, y_nan)); logging = logging) + + # Should be a single number (aggregated main loss + extra loss) + @test loss_value isa Number + @test stats == NamedTuple() + + # Get actual predictions from the model + ŷ_actual, _ = HM(x, ps, st) + + # Verify the loss includes extra loss + main_loss = _compute_loss( + ŷ_actual, y_t, y_nan, targets, :mse, sum + ) + extra_loss_vals = extra_loss_func(ŷ_actual) + expected_loss = sum([main_loss, extra_loss_vals...]) + @test loss_value ≈ expected_loss + end + + @testset "Training mode without extra_loss" begin + logging = LoggingLoss( + loss_types = [:mse], + training_loss = :mse, + extra_loss = nothing, + train_mode = true + ) + + loss_value, st_out, stats = compute_loss(HM, ps, st, (x, (y_t, y_nan)); logging = logging) + + @test loss_value isa Number + @test stats == NamedTuple() + + # Get actual predictions from the model + ŷ_actual, _ = HM(x, ps, st) + + # Should match the main loss only + main_loss = _compute_loss( + ŷ_actual, y_t, y_nan, targets, :mse, sum + ) + @test loss_value ≈ main_loss + end + + @testset "Evaluation mode with extra_loss" begin + # Define extra loss function that returns a NamedTuple + extra_loss_func(ŷ) = (var1_extra = sum(abs, ŷ.var1), var2_extra = sum(abs, ŷ.var2)) + + logging = LoggingLoss( + loss_types = [:mse, :mae], + training_loss = :mse, + extra_loss = extra_loss_func, + train_mode = false + ) + + loss_value, st_out, stats = compute_loss(HM, ps, st, (x, (y_t, y_nan)); logging = logging) + + # Should be a NamedTuple with loss_types and extra_loss + @test loss_value isa NamedTuple + @test haskey(loss_value, :mse) + @test haskey(loss_value, :mae) + @test haskey(loss_value, :extra_loss) + + # Check extra_loss structure + @test loss_value.extra_loss isa NamedTuple + @test haskey(loss_value.extra_loss, :var1_extra) + @test haskey(loss_value.extra_loss, :var2_extra) + @test haskey(loss_value.extra_loss, :sum) # aggregated extra loss + + # Check stats contains predictions + @test stats isa NamedTuple + @test haskey(stats, :var1) + @test haskey(stats, :var2) + end + + @testset "Evaluation mode without extra_loss" begin + logging = LoggingLoss( + loss_types = [:mse, :mae], + training_loss = :mse, + extra_loss = nothing, + train_mode = false + ) + + loss_value, st_out, stats = compute_loss(HM, ps, st, (x, (y_t, y_nan)); logging = logging) + + # Should be a NamedTuple with only loss_types + @test loss_value isa NamedTuple + @test haskey(loss_value, :mse) + @test haskey(loss_value, :mae) + @test !haskey(loss_value, :extra_loss) + + # Check stats contains predictions + @test stats isa NamedTuple + @test haskey(stats, :var1) + @test haskey(stats, :var2) + end +end diff --git a/test/test_loss_fn.jl b/test/test_loss_fn.jl index 4bbf3763..03e57453 100644 --- a/test/test_loss_fn.jl +++ b/test/test_loss_fn.jl @@ -1,4 +1,5 @@ using Statistics +using EasyHybrid: bestdirection, isbetter, check_training_loss, Minimize, Maximize @testset "loss_fn methods" begin # Test data setup @@ -31,6 +32,39 @@ using Statistics # NSE test nse = 1 - sum((ŷ .- y) .^ 2) / sum((y .- mean(y)) .^ 2) @test loss_fn(ŷ, y, y_nan, Val(:nse)) ≈ nse + + # PearsonLoss test (1 - Pearson correlation) + r = cor(ŷ, y) + @test loss_fn(ŷ, y, y_nan, Val(:pearsonLoss)) ≈ 1.0 - r + + # NSELoss test + nse_loss = sum((ŷ .- y) .^ 2) / sum((y .- mean(y)) .^ 2) + @test loss_fn(ŷ, y, y_nan, Val(:nseLoss)) ≈ nse_loss + + # KGE Loss test + μ_s = mean(ŷ) + μ_o = mean(y) + σ_s = std(ŷ) + σ_o = std(y) + r = cor(ŷ, y) + α = σ_s / σ_o + β = μ_s / μ_o + kge_loss = sqrt((r - 1.0)^2 + (α - 1.0)^2 + (β - 1.0)^2) + @test loss_fn(ŷ, y, y_nan, Val(:kgeLoss)) ≈ kge_loss + + # KGE test (1 - KGE Loss) + @test loss_fn(ŷ, y, y_nan, Val(:kge)) ≈ 1.0 - kge_loss + + # PBKGE Loss test (Partial Kling-Gupta Efficiency) + μ_s = mean(ŷ) + μ_o = mean(y) + r = cor(ŷ, y) + β = μ_s / μ_o + pbkge_loss = sqrt((r - 1.0)^2 + (β - 1.0)^2) + @test loss_fn(ŷ, y, y_nan, Val(:pbkgeLoss)) ≈ pbkge_loss + + # PBKGE test (1 - PBKGE Loss) + @test loss_fn(ŷ, y, y_nan, Val(:pbkge)) ≈ 1.0 - pbkge_loss end @testset "Generic loss functions" begin @@ -55,9 +89,107 @@ using Statistics # Test NaN handling for predefined functions @test loss_fn(ŷ, y, y_nan, Val(:mse)) ≈ mean(abs2, valid_ŷ .- valid_y) @test loss_fn(ŷ, y, y_nan, Val(:rmse)) ≈ sqrt(mean(abs2, valid_ŷ .- valid_y)) + @test loss_fn(ŷ, y, y_nan, Val(:mae)) ≈ mean(abs, valid_ŷ .- valid_y) + @test loss_fn(ŷ, y, y_nan, Val(:pearson)) ≈ cor(valid_ŷ, valid_y) + + r = cor(valid_ŷ, valid_y) + @test loss_fn(ŷ, y, y_nan, Val(:r2)) ≈ r^2 + + nse = 1 - sum((valid_ŷ .- valid_y) .^ 2) / sum((valid_y .- mean(valid_y)) .^ 2) + @test loss_fn(ŷ, y, y_nan, Val(:nse)) ≈ nse + @test loss_fn(ŷ, y, y_nan, Val(:pearsonLoss)) ≈ 1.0 - r + + nse_loss = sum((valid_ŷ .- valid_y) .^ 2) / sum((valid_y .- mean(valid_y)) .^ 2) + @test loss_fn(ŷ, y, y_nan, Val(:nseLoss)) ≈ nse_loss + + # KGE Loss with NaN handling + μ_s = mean(valid_ŷ) + μ_o = mean(valid_y) + σ_s = std(valid_ŷ) + σ_o = std(valid_y) + r = cor(valid_ŷ, valid_y) + α = σ_s / σ_o + β = μ_s / μ_o + kge_loss = sqrt((r - 1.0)^2 + (α - 1.0)^2 + (β - 1.0)^2) + @test loss_fn(ŷ, y, y_nan, Val(:kgeLoss)) ≈ kge_loss + @test loss_fn(ŷ, y, y_nan, Val(:kge)) ≈ 1.0 - kge_loss + + # PBKGE Loss with NaN handling + μ_s = mean(valid_ŷ) + μ_o = mean(valid_y) + r = cor(valid_ŷ, valid_y) + β = μ_s / μ_o + pbkge_loss = sqrt((r - 1.0)^2 + (β - 1.0)^2) + @test loss_fn(ŷ, y, y_nan, Val(:pbkgeLoss)) ≈ pbkge_loss + @test loss_fn(ŷ, y, y_nan, Val(:pbkge)) ≈ 1.0 - pbkge_loss # Test NaN handling for generic functions @test loss_fn(ŷ, y, y_nan, simple_loss) ≈ mean(abs2, valid_ŷ .- valid_y) @test loss_fn(ŷ, y, y_nan, (weighted_loss, (2.0,))) ≈ 2.0 * mean(abs2, valid_ŷ .- valid_y) end + + @testset "bestdirection" begin + # Test that metrics to be maximized return Maximize + @test bestdirection(Val(:pearson)) isa Maximize + @test bestdirection(Val(:r2)) isa Maximize + @test bestdirection(Val(:nse)) isa Maximize + @test bestdirection(Val(:kge)) isa Maximize + + # Test that losses to be minimized return Minimize + @test bestdirection(Val(:mse)) isa Minimize + @test bestdirection(Val(:rmse)) isa Minimize + @test bestdirection(Val(:mae)) isa Minimize + @test bestdirection(Val(:pearsonLoss)) isa Minimize + @test bestdirection(Val(:nseLoss)) isa Minimize + @test bestdirection(Val(:kgeLoss)) isa Minimize + @test bestdirection(Val(:pbkgeLoss)) isa Minimize + @test bestdirection(Val(:pbkge)) isa Minimize + + # Test default case (anything else should be Minimize) + @test bestdirection(Val(:unknown)) isa Minimize + end + + @testset "isbetter" begin + # Test isbetter for minimized metrics (smaller is better) + @test isbetter(0.5, 1.0, :mse) == true + @test isbetter(1.0, 0.5, :mse) == false + @test isbetter(0.5, 0.5, :mse) == false # equal is not better + @test isbetter(0.3, 0.5, :rmse) == true + @test isbetter(0.5, 0.3, :rmse) == false + + # Test isbetter for maximized metrics (larger is better) + @test isbetter(0.8, 0.5, :pearson) == true + @test isbetter(0.5, 0.8, :pearson) == false + @test isbetter(0.5, 0.5, :pearson) == false # equal is not better + @test isbetter(0.9, 0.7, :r2) == true + @test isbetter(0.7, 0.9, :r2) == false + @test isbetter(0.85, 0.75, :nse) == true + @test isbetter(0.75, 0.85, :nse) == false + @test isbetter(0.9, 0.8, :kge) == true + @test isbetter(0.8, 0.9, :kge) == false + + # Test with Minimize and Maximize types directly + @test isbetter(0.5, 1.0, Minimize()) == true + @test isbetter(1.0, 0.5, Minimize()) == false + @test isbetter(0.8, 0.5, Maximize()) == true + @test isbetter(0.5, 0.8, Maximize()) == false + end + + @testset "check_training_loss" begin + # Test that maximized metrics throw an error + @test_throws ErrorException check_training_loss(:pearson) + @test_throws ErrorException check_training_loss(:r2) + @test_throws ErrorException check_training_loss(:nse) + @test_throws ErrorException check_training_loss(:kge) + + # Test that minimized losses pass (return nothing) + @test check_training_loss(:mse) === nothing + @test check_training_loss(:rmse) === nothing + @test check_training_loss(:mae) === nothing + @test check_training_loss(:pearsonLoss) === nothing + @test check_training_loss(:nseLoss) === nothing + @test check_training_loss(:kgeLoss) === nothing + @test check_training_loss(:pbkgeLoss) === nothing + @test check_training_loss(:pbkge) === nothing + end end