diff --git a/docs/Project.toml b/docs/Project.toml index 52df2908..4e0c1720 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,6 +1,7 @@ [deps] CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc" +DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterVitepress = "4710194d-e776-4893-9690-8d956a29c365" EasyHybrid = "61bb816a-e6af-4913-ab9e-91bff2e122e3" diff --git a/docs/literate/tutorials/synthetic_old.txt b/docs/literate/tutorials/synthetic_old.txt new file mode 100644 index 00000000..ec7b9b32 --- /dev/null +++ b/docs/literate/tutorials/synthetic_old.txt @@ -0,0 +1,89 @@ +# +# Results Analysis +# +# Check the training differences for Q10 parameter +# This shows how close the model learned the true Q10 value +# out.train_diffs.Q10 + +# using Hyperopt +# using Distributed +# using WGLMakie + +# mspempty = ModelSpec() + +# nhyper = 4 +# ho = @thyperopt for i in nhyper, +# opt in [AdamW(0.01), AdamW(0.1), RMSProp(0.001), RMSProp(0.01)], +# input_batchnorm in [true, false] + +# hyper_parameters = (; opt, input_batchnorm) +# println("Hyperparameter run: \n", i, " of ", nhyper, "\t with hyperparameters \t", hyper_parameters, "\t") +# out = EasyHybrid.tune(hybrid_model, df, mspempty; hyper_parameters..., nepochs = 10, plotting = false, show_progress = false, file_name = "test$i.jld2") +# #out.best_loss +# # out.best_loss, has to be first element of the tuple, return a rich record for this trial (stored in ho.results[i]) +# (out.best_loss, hyperps = hyper_parameters, ps_st = (out.ps, out.st), i = i) +# end + +# losses = getfield.(ho.results, :best_loss) +# hyperps = getfield.(ho.results, :hyperps) + +# # Helper function to make optimizer names short and readable +# function short_opt_name(opt) +# if opt isa AdamW +# return "AdamW(η=$(opt.eta))" +# elseif opt isa RMSProp +# return "RMSProp(η=$(opt.eta))" +# else +# return string(typeof(opt)) +# end +# end + +# # Sort losses and associated data by increasing loss +# idx = sortperm(losses) +# sorted_losses = losses[idx] +# sorted_hyperps = hyperps[idx] + +# fig = Figure(figure_padding = 50) +# # Prepare tick labels with hyperparameter info for each trial (sorted) +# sorted_ticklabels = [ +# join( +# [ +# k == :opt ? "opt=$(short_opt_name(v))" : "$k=$(repr(v))" +# for (k, v) in pairs(hp) +# ], "\n" +# ) +# for hp in sorted_hyperps +# ] +# ax = Makie.Axis( +# fig[1, 1]; +# xlabel = "Trial", +# ylabel = "Loss", +# title = "Hyperparameter Tuning Results", +# xgridvisible = false, +# ygridvisible = false, +# xticks = (1:length(sorted_losses), sorted_ticklabels), +# xticklabelrotation = 45 +# ) +# scatter!(ax, 1:length(sorted_losses), sorted_losses; markersize = 15, color = :dodgerblue) + +# # Get the best trial +# best_idx = argmin(losses) +# best_trial = ho.results[best_idx] + +# best_params = best_trial.ps_st # (ps, st) + +# # Print the best hyperparameters +# printmin(ho) + +# # Plot the results +# import Plots +# using Plots.PlotMeasures +# # rebuild the ho object as intended by plot function for hyperopt object +# ho2 = deepcopy(ho) +# ho2.results = getfield.(ho.results, :best_loss) + +# Plots.plot(ho2, xrotation = 25, left_margin = [100mm 0mm], bottom_margin = 60mm, ylab = "loss", size = (900, 900)) + +# # Train the model with the best hyperparameters +# best_hyperp = best_hyperparams(ho) +# out = EasyHybrid.tune(hybrid_model, df, mspempty; best_hyperp..., nepochs = 100, train_from = best_params) \ No newline at end of file diff --git a/docs/literate/tutorials/synthetic_respiration.jl b/docs/literate/tutorials/synthetic_respiration.jl new file mode 100644 index 00000000..5aa030a7 --- /dev/null +++ b/docs/literate/tutorials/synthetic_respiration.jl @@ -0,0 +1,195 @@ +# [![CC BY-SA 4.0](https://img.shields.io/badge/License-CC%20BY--SA%204.0-lightgrey.svg)](https://creativecommons.org/licenses/by-sa/4.0/) +# +# # EasyHybrid Example: Synthetic Data Analysis +# +# This example demonstrates how to use EasyHybrid to train a hybrid model +# on synthetic data for respiration modeling with Q10 temperature sensitivity. +# + +using EasyHybrid + +# ## Data Loading and Preprocessing +# +# Load synthetic dataset from GitHub into DataFrame + +df = load_timeseries_netcdf("https://github.com/bask0/q10hybrid/raw/master/data/Synthetic4BookChap.nc"); + +# Select a subset of data for faster execution +df = df[1:20000, :]; +first(df, 5) + +# ### KeyedArray +# KeyedArray from AxisKeys.jl works, but cannot handle DateTime type +dfnot = Float32.(df[!, Not(:time)]); + +ka = to_keyedArray(dfnot); + +# ### DimensionalData +using DimensionalData +mat = Array(Matrix(dfnot)') +da = DimArray(mat, (Dim{:col}(Symbol.(names(dfnot))), Dim{:row}(1:size(dfnot, 1)))); + +# ## Define the Physical Model +# +# **RbQ10 model**: Respiration model with Q10 temperature sensitivity +# +# Parameters: +# - ta: air temperature [°C] +# - Q10: temperature sensitivity factor [-] +# - rb: basal respiration rate [μmol/m²/s] +# - tref: reference temperature [°C] (default: 15.0) + +function RbQ10(; ta, Q10, rb, tref = 15.0f0) + reco = rb .* Q10 .^ (0.1f0 .* (ta .- tref)) + return (; reco, Q10, rb) +end + +# ### Define Model Parameters +# +# Parameter specification: (default, lower_bound, upper_bound) + +parameters = ( + ## Parameter name | Default | Lower | Upper | Description + rb = (3.0f0, 0.0f0, 13.0f0), # Basal respiration [μmol/m²/s] + Q10 = (2.0f0, 1.0f0, 4.0f0), # Temperature sensitivity factor [-] +) + +# ## Configure Hybrid Model Components +# +# Define input variables +forcing = [:ta] # Forcing variables (temperature) + +# Target variable +target = [:reco] # Target variable (respiration) + +# Parameter classification +global_param_names = [:Q10] # Global parameters (same for all samples) +neural_param_names = [:rb] # Neural network predicted parameters + +# ## Single NN Hybrid Model Training +# +# using WGLMakie +# Create single NN hybrid model using the unified constructor + +predictors_single_nn = [:sw_pot, :dsw_pot] # Predictor variables (solar radiation, and its derivative) + +single_nn_hybrid_model = constructHybridModel( + predictors_single_nn, # Input features + forcing, # Forcing variables + target, # Target variables + RbQ10, # Process-based model function + parameters, # Parameter definitions + neural_param_names, # NN-predicted parameters + global_param_names, # Global parameters + hidden_layers = [16, 16], # Neural network architecture + activation = sigmoid, # Activation function + scale_nn_outputs = true, # Scale neural network outputs + input_batchnorm = true # Apply batch normalization to inputs +) + +# ### train on DataFrame + +extra_loss = function (ŷ) + return (; a = sum((5.0 .- ŷ.rb) .^ 2)) +end + +# Train the hybrid model +single_nn_out = train( + single_nn_hybrid_model, + df, + (); + nepochs = 10, # Number of training epochs + batchsize = 512, # Batch size for training + opt = AdamW(0.1), # Optimizer and learning rate + monitor_names = [:rb, :Q10], # Parameters to monitor during training + yscale = identity, # Scaling for outputs + shuffleobs = true, + loss_types = [:mse, :nse], + show_progress = false, + extra_loss = extra_loss +) + +# ### train on KeyedArray + +single_nn_out = train( + single_nn_hybrid_model, + ka, + (); + nepochs = 10, # Number of training epochs + batchsize = 512, # Batch size for training + opt = AdamW(0.1), # Optimizer and learning rate + monitor_names = [:rb, :Q10], # Parameters to monitor during training + yscale = identity, # Scaling for outputs + show_progress = false, + shuffleobs = true +) + +# ### train on DimensionalData + +single_nn_out = train( + single_nn_hybrid_model, + da, + (); + nepochs = 10, # Number of training epochs + batchsize = 512, # Batch size for training + opt = AdamW(0.1), # Optimizer and learning rate + monitor_names = [:rb, :Q10], # Parameters to monitor during training + yscale = identity, # Scaling for outputs + show_progress = false, + shuffleobs = true +) + +LuxCore.testmode(single_nn_out.st) +mean(df.dsw_pot) +mean(df.sw_pot) + +# ## Multi NN Hybrid Model Training + +predictors_multi_nn = (rb = [:sw_pot, :dsw_pot],) + +multi_nn_hybrid_model = constructHybridModel( + predictors_multi_nn, # Input features + forcing, # Forcing variables + target, # Target variables + RbQ10, # Process-based model function + parameters, # Parameter definitions + global_param_names, # Global parameters + hidden_layers = [16, 16], # Neural network architecture + activation = sigmoid, # Activation function + scale_nn_outputs = true, # Scale neural network outputs + input_batchnorm = true # Apply batch normalization to inputs +) + +multi_nn_out = train( + multi_nn_hybrid_model, + ka, + (); + nepochs = 10, # Number of training epochs + batchsize = 512, # Batch size for training + opt = AdamW(0.1), # Optimizer and learning rate + monitor_names = [:rb, :Q10], # Parameters to monitor during training + yscale = identity, # Scaling for outputs + show_progress = false, + shuffleobs = true +) + +LuxCore.testmode(multi_nn_out.st) +mean(df.dsw_pot) +mean(df.sw_pot) + +# ## Pure ML Single NN Model Training + +# TODO does not train well build on top of SingleNNHybridModel +predictors_single_nn_ml = [:sw_pot, :dsw_pot, :ta] + +single_nn_model = constructNNModel(predictors_single_nn_ml, target; input_batchnorm = true, activation = tanh) +single_nn_out = train(single_nn_model, da, (); nepochs = 10, batchsize = 512, opt = AdamW(0.01), yscale = identity, shuffleobs = true, show_progress = false) +LuxCore.testmode(single_nn_out.st) +mean(df.dsw_pot) +mean(df.sw_pot) + +single_nn_model.targets + +# ## Pure ML Multi NN Model Training + +# TODO does not train well build on top of MultiNNHybridModel diff --git a/docs/make.jl b/docs/make.jl index d7bd3f01..6d96295f 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -56,6 +56,7 @@ makedocs(; "Home" => "index.md", "Get Started" => "get_started.md", "Tutorial" => [ + "Synthetic Respiration" => "tutorials/synthetic_respiration.md", "Exponential Response" => "tutorials/exponential_res.md", "Hyperparameter Tuning" => "tutorials/hyperparameter_tuning.md", "Slurm" => "tutorials/slurm.md", diff --git a/projects/book_chapter/Project.toml b/projects/book_chapter/Project.toml deleted file mode 100644 index 2bccc509..00000000 --- a/projects/book_chapter/Project.toml +++ /dev/null @@ -1,10 +0,0 @@ -[deps] -AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5" -ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0" -EasyHybrid = "61bb816a-e6af-4913-ab9e-91bff2e122e3" -Hyperopt = "93e5fe13-2215-51db-baaf-2e9a34fb2712" -MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" -NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" -OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" -WGLMakie = "276b4fcb-3e11-5398-bf8b-a0c2d153d008" diff --git a/projects/book_chapter/example_synthetic.jl b/projects/book_chapter/example_synthetic.jl deleted file mode 100644 index 642b2c88..00000000 --- a/projects/book_chapter/example_synthetic.jl +++ /dev/null @@ -1,303 +0,0 @@ -# CC BY-SA 4.0 -# ============================================================================= -# EasyHybrid Example: Synthetic Data Analysis -# ============================================================================= -# This example demonstrates how to use EasyHybrid to train a hybrid model -# on synthetic data for respiration modeling with Q10 temperature sensitivity. -# ============================================================================= - -# ============================================================================= -# Project Setup and Environment -# ============================================================================= -using Pkg - -# Set project path and activate environment -project_path = "projects/book_chapter" -Pkg.activate(project_path) -EasyHybrid_path = joinpath(pwd()) -Pkg.develop(path = EasyHybrid_path) -#Pkg.resolve() -#Pkg.instantiate() - -using EasyHybrid -using AxisKeys -using DimensionalData - -# ============================================================================= -# Data Loading and Preprocessing -# ============================================================================= -# Load synthetic dataset from GitHub into DataFrame -df = load_timeseries_netcdf("https://github.com/bask0/q10hybrid/raw/master/data/Synthetic4BookChap.nc") - -# Select a subset of data for faster execution -df = df[1:20000, :] - -# KeyedArray from AxisKeys.jl works, but cannot handle DateTime type -dfnot = Float32.(df[!, Not(:time)]) - -ka = to_keyedArray(dfnot) - -# DimensionalData -mat = Array(Matrix(dfnot)') -da = DimArray(mat, (Dim{:col}(Symbol.(names(dfnot))), Dim{:row}(1:size(dfnot, 1)))) - -# ============================================================================= -# Define the Physical Model -# ============================================================================= -# RbQ10 model: Respiration model with Q10 temperature sensitivity -# Parameters: -# - ta: air temperature [°C] -# - Q10: temperature sensitivity factor [-] -# - rb: basal respiration rate [μmol/m²/s] -# - tref: reference temperature [°C] (default: 15.0) -function RbQ10(; ta, Q10, rb, tref = 15.0f0) - reco = rb .* Q10 .^ (0.1f0 .* (ta .- tref)) - return (; reco, Q10, rb) -end - -# ============================================================================= -# Define Model Parameters -# ============================================================================= -# Parameter specification: (default, lower_bound, upper_bound) -parameters = ( - # Parameter name | Default | Lower | Upper | Description - rb = (3.0f0, 0.0f0, 13.0f0), # Basal respiration [μmol/m²/s] - Q10 = (2.0f0, 1.0f0, 4.0f0), # Temperature sensitivity factor [-] -) - -# ============================================================================= -# Configure Hybrid Model Components -# ============================================================================= -# Define input variables -forcing = [:ta] # Forcing variables (temperature) - -# Target variable -target = [:reco] # Target variable (respiration) - -# Parameter classification -global_param_names = [:Q10] # Global parameters (same for all samples) -neural_param_names = [:rb] # Neural network predicted parameters - -# ============================================================================= -# Single NN Hybrid Model Training -# ============================================================================= -using WGLMakie -# Create single NN hybrid model using the unified constructor -predictors_single_nn = [:sw_pot, :dsw_pot] # Predictor variables (solar radiation, and its derivative) - -single_nn_hybrid_model = constructHybridModel( - predictors_single_nn, # Input features - forcing, # Forcing variables - target, # Target variables - RbQ10, # Process-based model function - parameters, # Parameter definitions - neural_param_names, # NN-predicted parameters - global_param_names, # Global parameters - hidden_layers = [16, 16], # Neural network architecture - activation = sigmoid, # Activation function - scale_nn_outputs = true, # Scale neural network outputs - input_batchnorm = true # Apply batch normalization to inputs -) - -# ============================================================================= -# train on DataFrame -# ============================================================================= - -extra_loss = function (ŷ) - return (; a = sum((5.0 .- ŷ.rb) .^ 2)) -end - -# Train the hybrid model -single_nn_out = train( - single_nn_hybrid_model, - df, - (); - nepochs = 10, # Number of training epochs - batchsize = 512, # Batch size for training - opt = AdamW(0.1), # Optimizer and learning rate - monitor_names = [:rb, :Q10], # Parameters to monitor during training - yscale = identity, # Scaling for outputs - shuffleobs = true, - loss_types = [:mse, :nse], - extra_loss = extra_loss -) - -# ============================================================================= -# train on KeyedArray -# ============================================================================= -single_nn_out = train( - single_nn_hybrid_model, - ka, - (); - nepochs = 10, # Number of training epochs - batchsize = 512, # Batch size for training - opt = AdamW(0.1), # Optimizer and learning rate - monitor_names = [:rb, :Q10], # Parameters to monitor during training - yscale = identity, # Scaling for outputs - shuffleobs = true -) - -# ============================================================================= -# train on DimensionalData -# ============================================================================= -single_nn_out = train( - single_nn_hybrid_model, - da, - (); - nepochs = 10, # Number of training epochs - batchsize = 512, # Batch size for training - opt = AdamW(0.1), # Optimizer and learning rate - monitor_names = [:rb, :Q10], # Parameters to monitor during training - yscale = identity, # Scaling for outputs - shuffleobs = true -) - -LuxCore.testmode(single_nn_out.st) -mean(df.dsw_pot) -mean(df.sw_pot) - -# ============================================================================= -# Multi NN Hybrid Model Training -# ============================================================================= -predictors_multi_nn = (rb = [:sw_pot, :dsw_pot],) - -multi_nn_hybrid_model = constructHybridModel( - predictors_multi_nn, # Input features - forcing, # Forcing variables - target, # Target variables - RbQ10, # Process-based model function - parameters, # Parameter definitions - global_param_names, # Global parameters - hidden_layers = [16, 16], # Neural network architecture - activation = sigmoid, # Activation function - scale_nn_outputs = true, # Scale neural network outputs - input_batchnorm = true # Apply batch normalization to inputs -) - -multi_nn_out = train( - multi_nn_hybrid_model, - ka, - (); - nepochs = 10, # Number of training epochs - batchsize = 512, # Batch size for training - opt = AdamW(0.1), # Optimizer and learning rate - monitor_names = [:rb, :Q10], # Parameters to monitor during training - yscale = identity, # Scaling for outputs - shuffleobs = true -) - -LuxCore.testmode(multi_nn_out.st) -mean(df.dsw_pot) -mean(df.sw_pot) - -# ============================================================================= -# Pure ML Single NN Model Training -# ============================================================================= - -# TODO does not train well build on top of SingleNNHybridModel -predictors_single_nn_ml = [:sw_pot, :dsw_pot, :ta] - -single_nn_model = constructNNModel(predictors_single_nn_ml, target; input_batchnorm = true, activation = tanh) -single_nn_out = train(single_nn_model, da, (); nepochs = 10, batchsize = 512, opt = AdamW(0.01), yscale = identity, shuffleobs = true) -LuxCore.testmode(single_nn_out.st) -mean(df.dsw_pot) -mean(df.sw_pot) - -single_nn_model.targets - -# ============================================================================= -# Pure ML Multi NN Model Training -# ============================================================================= - -# TODO does not train well build on top of MultiNNHybridModel - - -# ============================================================================= -# Results Analysis -# ============================================================================= -# Check the training differences for Q10 parameter -# This shows how close the model learned the true Q10 value -out.train_diffs.Q10 - -using Hyperopt -using Distributed -using WGLMakie - -mspempty = ModelSpec() - -nhyper = 4 -ho = @thyperopt for i in nhyper, - opt in [AdamW(0.01), AdamW(0.1), RMSProp(0.001), RMSProp(0.01)], - input_batchnorm in [true, false] - - hyper_parameters = (; opt, input_batchnorm) - println("Hyperparameter run: \n", i, " of ", nhyper, "\t with hyperparameters \t", hyper_parameters, "\t") - out = EasyHybrid.tune(hybrid_model, df, mspempty; hyper_parameters..., nepochs = 10, plotting = false, show_progress = false, file_name = "test$i.jld2") - #out.best_loss - # out.best_loss, has to be first element of the tuple, return a rich record for this trial (stored in ho.results[i]) - (out.best_loss, hyperps = hyper_parameters, ps_st = (out.ps, out.st), i = i) -end - -losses = getfield.(ho.results, :best_loss) -hyperps = getfield.(ho.results, :hyperps) - -# Helper function to make optimizer names short and readable -function short_opt_name(opt) - if opt isa AdamW - return "AdamW(η=$(opt.eta))" - elseif opt isa RMSProp - return "RMSProp(η=$(opt.eta))" - else - return string(typeof(opt)) - end -end - -# Sort losses and associated data by increasing loss -idx = sortperm(losses) -sorted_losses = losses[idx] -sorted_hyperps = hyperps[idx] - -fig = Figure(figure_padding = 50) -# Prepare tick labels with hyperparameter info for each trial (sorted) -sorted_ticklabels = [ - join( - [ - k == :opt ? "opt=$(short_opt_name(v))" : "$k=$(repr(v))" - for (k, v) in pairs(hp) - ], "\n" - ) - for hp in sorted_hyperps -] -ax = Makie.Axis( - fig[1, 1]; - xlabel = "Trial", - ylabel = "Loss", - title = "Hyperparameter Tuning Results", - xgridvisible = false, - ygridvisible = false, - xticks = (1:length(sorted_losses), sorted_ticklabels), - xticklabelrotation = 45 -) -scatter!(ax, 1:length(sorted_losses), sorted_losses; markersize = 15, color = :dodgerblue) - -# Get the best trial -best_idx = argmin(losses) -best_trial = ho.results[best_idx] - -best_params = best_trial.ps_st # (ps, st) - -# Print the best hyperparameters -printmin(ho) - -# Plot the results -import Plots -using Plots.PlotMeasures -# rebuild the ho object as intended by plot function for hyperopt object -ho2 = deepcopy(ho) -ho2.results = getfield.(ho.results, :best_loss) - -Plots.plot(ho2, xrotation = 25, left_margin = [100mm 0mm], bottom_margin = 60mm, ylab = "loss", size = (900, 900)) - -# Train the model with the best hyperparameters -best_hyperp = best_hyperparams(ho) -out = EasyHybrid.tune(hybrid_model, df, mspempty; best_hyperp..., nepochs = 100, train_from = best_params) diff --git a/src/EasyHybrid.jl b/src/EasyHybrid.jl index 8fb3ed43..20a35146 100644 --- a/src/EasyHybrid.jl +++ b/src/EasyHybrid.jl @@ -40,7 +40,7 @@ using Static: False, True using DataFrames using CSV using OptimizationOptimisers: OptimizationOptimisers, Optimisers, Adam, AdamW, RMSProp - using ComponentArrays + using ComponentArrays: ComponentArrays, ComponentArray end include("macro_hybrid.jl")