feat: first pass of HybridESN

MartinuzziFrancesco · MartinuzziFrancesco · commit eca86f29692b · 2025-09-01T21:23:43.000+02:00
diff --git a/README.md b/README.md
@@ -123,7 +123,7 @@ esn = ReservoirChain(
 
 ### 3. Train the Echo State Network
 
-ReservoirCOmputing.jl builds on Lux(Core), so in order to train the model
+ReservoirComputing.jl builds on Lux(Core), so in order to train the model
 we first need to instantiate the parameters and the states:
 
 ```julia
diff --git a/src/models/deepesn.jl b/src/models/deepesn.jl
@@ -16,8 +16,7 @@ function DeepESN(in_dims::Int,
     res_dims::AbstractVector{<:Int},
     out_dims,
     activation=tanh;
-    activations=nothing,
-    leaks=1.0,
+    leak_coefficient=1.0,
     init_reservoir=rand_sparse,
     init_input=weighted_init,
     init_bias=zeros32,
@@ -28,8 +27,8 @@ function DeepESN(in_dims::Int,
 
     num_reservoirs = length(res_dims)
 
-    acts = activations === nothing ? _asvec(activation, num_reservoirs) : _asvec(activations, num_reservoirs)
-    leaksv = _asvec(leaks, num_reservoirs)
+    acts = _asvec(activation, num_reservoirs)
+    leaksv = _asvec(leak_coefficient, num_reservoirs)
     inres = _asvec(init_reservoir, num_reservoirs)
     ininp = _asvec(init_input, num_reservoirs)
     inbias = _asvec(init_bias, num_reservoirs)
diff --git a/src/models/hybridesn.jl b/src/models/hybridesn.jl
@@ -1,16 +1,6 @@
-struct HybridESN{I,S,V,N,T,O,M,B,ST,W,IS}
-    res_size::I
-    train_data::S
-    model::V
-    nla_type::N
-    input_matrix::T
-    reservoir_driver::O
-    reservoir_matrix::M
-    bias_vector::B
-    states_type::ST
-    washout::W
-    states::IS
-end
+############################
+# Knowledge-model wrapper  #
+############################
 
 struct KnowledgeModel{T,K,O,I,S,D}
     prior_model::T
@@ -22,124 +12,95 @@ struct KnowledgeModel{T,K,O,I,S,D}
 end
 
 """
-KnowledgeModel(prior_model, u0, tspan, datasize)
-
-Constructs a `Hybrid` variation of Echo State Networks (ESNs) [^Pathak2018]
-integrating a knowledge-based model (`prior_model`) with ESNs.
-
-# Parameters
+    KnowledgeModel(prior_model, u0, tspan, datasize)
 
-  - `prior_model`: A knowledge-based model function for integration with ESNs.
-  - `u0`: Initial conditions for the model.
-  - `tspan`: Time span as a tuple, indicating the duration for model operation.
-  - `datasize`: The size of the data to be processed.
-
-[^Pathak2018]: Jaideep Pathak et al.
-    "Hybrid Forecasting of Chaotic Processes:
-    Using Machine Learning in Conjunction with a Knowledge-Based Model" (2018).
+Build a `KnowledgeModel` and precompute `model_data` on a time grid of length
+`datasize+1`. The extra step aligns with teacher-forced (xₜ → yₜ₊₁) usage.
 """
 function KnowledgeModel(prior_model, u0, tspan, datasize)
     trange = collect(range(tspan[1], tspan[2]; length=datasize))
+    @assert length(trange) ≥ 2 "datasize must be ≥ 2 to infer dt"
     dt = trange[2] - trange[1]
-    tsteps = push!(trange, dt + trange[end])
-    tspan_new = (tspan[1], dt + tspan[2])
-    model_data = prior_model(u0, tspan_new, tsteps)
-    return KnowledgeModel(prior_model, u0, tspan, dt, datasize, model_data)
+    tsteps = push!(trange, trange[end] + dt)
+    tspan2 = (tspan[1], tspan[2] + dt)
+    mdl = prior_model(u0, tspan2, tsteps)
+    return KnowledgeModel(prior_model, u0, tspan, dt, datasize, mdl)
 end
 
-"""
-    HybridESN(model, train_data, in_size, res_size; kwargs...)
-
-Construct a Hybrid Echo State Network (ESN) model that integrates
-traditional Echo State Networks with a predefined knowledge model [^Pathak2018].
-
-# Parameters
-
-  - `model`: A `KnowledgeModel` instance representing the knowledge-based model
-    to be integrated with the ESN.
-  - `train_data`: The training dataset used for the ESN. This data can be
-    preprocessed or raw data depending on the nature of the problem and the
-    preprocessing steps considered.
-  - `in_size`: The size of the input layer, i.e., the number of input units
-    to the ESN.
-  - `res_size`: The size of the reservoir, i.e., the number of neurons in
-    the hidden layer of the ESN.
-
-# Optional Keyword Arguments
-
-  - `input_layer`: A function to initialize the input matrix.
-    Default is `scaled_rand`.
-  - `reservoir`: A function to initialize the reservoir matrix.
-    Default is `rand_sparse`.
-  - `bias`: A function to initialize the bias vector.
-    Default is `zeros32`.
-  - `reservoir_driver`: The driving system for the reservoir.
-    Default is an RNN model.
-  - `nla_type`: The type of non-linear activation used in the reservoir.
-    Default is `NLADefault()`.
-  - `states_type`: Defines the type of states used in the
-    ESN. Default is `StandardStates()`.
-  - `washout`: The number of initial timesteps to be
-    discarded in the ESN's training phase. Default is 0.
-  - `rng`: Random number generator used for initializing weights.
-    Default is `Utils.default_rng()`.
-  - `T`: The data type for the matrices (e.g., `Float32`).
-  - `matrix_type`: The type of matrix used for storing the training data.
-    Default is inferred from `train_data`.
+# Helper: forecast a KB stream for `steps` auto-regressive steps beyond tspan
+function kb_forecast(km::KnowledgeModel, steps::Integer)
+    @assert steps ≥ 1
+    t0 = km.tspan[2] + km.dt
+    tgrid = collect(t0:km.dt:(t0+km.dt*(steps-1)))
+    tspan = (t0, tgrid[end])
+    u0 = km.model_data[:, end]
+    mdl = km.prior_model(u0, tspan, [t0; tgrid[2:end]])
+    return mdl
+end
 
-[^Pathak2018]: Jaideep Pathak et al.
-    "Hybrid Forecasting of Chaotic Processes:
-    Using Machine Learning in Conjunction with a Knowledge-Based Model" (2018).
-"""
-function HybridESN(model::KnowledgeModel, train_data::AbstractArray,
-    in_size::Int, res_size::Int; input_layer=scaled_rand, reservoir=rand_sparse,
-    bias=zeros32, reservoir_driver=RNN(),
-    nla_type=NLADefault(),
-    states_type=StandardStates(), washout::Int=0,
-    rng::AbstractRNG=Utils.default_rng(), T=Float32,
-    matrix_type=typeof(train_data))
-    train_data = vcat(train_data, model.model_data[:, 1:(end-1)])
+kb_stream_train(km::KnowledgeModel, T::Integer) = km.model_data[:, 1:T]
 
-    in_size = size(train_data, 1)
 
+# Concats a column from `stream` at each step:  z_t = vcat(x_t, stream[:, i])
+@concrete struct AttachStream <: AbstractLuxLayer
+    stream <: AbstractMatrix
+end
 
-    reservoir_matrix = reservoir(rng, T, res_size, res_size)
-    #different from ESN, why?
-    input_matrix = input_layer(rng, T, res_size, in_size)
-    bias_vector = bias(rng, res_size)
-    inner_res_driver = reservoir_driver_params(reservoir_driver, res_size, in_size)
-    states = create_states(inner_res_driver, train_data, washout, reservoir_matrix,
-        input_matrix, bias_vector)
-    train_data = train_data[:, (washout+1):end]
+initialparameters(::AbstractRNG, ::AttachStream) = NamedTuple()
+initialstates(::AbstractRNG, ::AttachStream) = (i=1,)
 
-    return HybridESN(res_size, train_data, model, nla_type, input_matrix,
-        inner_res_driver, reservoir_matrix, bias_vector, states_type, washout,
-        states)
+function (l::AttachStream)(x::AbstractVector, ps, st::NamedTuple)
+    @boundscheck (st.i ≤ size(l.stream, 2)) ||
+                 throw(BoundsError(l.stream, st.i))
+    out = vcat(x, @view l.stream[:, st.i])
+    return out, (i=st.i + 1,)
 end
 
-function (hesn::HybridESN)(prediction,
-    output_layer, last_state::AbstractArray=hesn.states[
-        :, [end]],
+"""
+    HybridESN(km::KnowledgeModel,
+              in_dims::Integer, res_dims::Integer, out_dims::Integer,
+              activation=tanh;
+              state_modifiers=(),
+              readout_activation=identity,
+              include_collect=true,
+              kwargs...)
+
+Build a hybrid ESN as a `ReservoirChain`:
+`StatefulLayer(ESNCell) → modifiers → AttachStream(train KB) → Readout`.
+"""
+function HybridESN(km::KnowledgeModel,
+    in_dims::Integer, res_dims::Integer, out_dims::Integer,
+    activation=tanh;
+    state_modifiers=(),
+    readout_activation=identity,
+    include_collect::Bool=true,
     kwargs...)
-    km = hesn.model
-    pred_len = prediction.prediction_len
+    cell = ESNCell(in_dims => res_dims, activation; kwargs...)
 
-    model = km.prior_model
-    predict_tsteps = [km.tspan[2] + km.dt]
-    [append!(predict_tsteps, predict_tsteps[end] + km.dt) for i in 1:pred_len]
-    tspan_new = (km.tspan[2] + km.dt, predict_tsteps[end])
-    u0 = km.model_data[:, end]
-    model_pred_data = model(u0, tspan_new, predict_tsteps)[:, 2:end]
+    mods = state_modifiers isa Tuple || state_modifiers isa AbstractVector ?
+           Tuple(state_modifiers) : (state_modifiers,)
+    stream_train = kb_stream_train(km, km.datasize)
+    d_kb = size(stream_train, 1)
 
-    return obtain_esn_prediction(hesn, prediction, last_state, output_layer,
-        model_pred_data;
-        kwargs...)
-end
+    ro = Readout((res_dims + d_kb) => out_dims, readout_activation;
+        include_collect=static(include_collect))
 
-function train(hesn::HybridESN, target_data::AbstractArray,
-    training_method=StandardRidge(); kwargs...)
-    states = vcat(hesn.states, hesn.model.model_data[:, 2:end])
-    states_new = hesn.states_type(hesn.nla_type, states, hesn.train_data[:, 1:end])
+    return ReservoirChain((StatefulLayer(cell), mods..., AttachStream(stream_train), ro)...)
+end
 
-    return train(training_method, states_new, target_data; kwargs...)
+function with_kb_stream(rc::ReservoirChain, new_stream::AbstractMatrix)
+    layers = rc.layers
+    names = propertynames(layers)
+    vals = collect(Tuple(layers))
+    found = false
+    for (k, v) in enumerate(vals)
+        if v isa AttachStream
+            vals[k] = AttachStream(new_stream)
+            found = true
+            break
+        end
+    end
+    @assert found "No AttachStream layer found in chain."
+    new_nt = NamedTuple{names}(Tuple(vals))
+    return ReservoirChain(new_nt, rc.name)
 end