refac: add ESNCell built on LuxCore

MartinuzziFrancesco · MartinuzziFrancesco · commit 152f83cb80e2 · 2025-08-22T21:25:43.000+02:00
diff --git a/Project.toml b/Project.toml
@@ -5,11 +5,15 @@ version = "0.11.4"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
+ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
+ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
+Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
 WeightInitializers = "d49dbf32-c5c2-4618-8acc-27bb2598ef2d"
 
 [weakdeps]
@@ -27,17 +31,21 @@ RCSparseArraysExt = "SparseArrays"
 [compat]
 Adapt = "4.1.1"
 Aqua = "0.8"
+ArrayInterface = "7.19.0"
 CellularAutomata = "0.0.6"
 Compat = "4.16.0"
+ConcreteStructs = "0.2.3"
 DifferentialEquations = "7.16.1"
 LIBSVM = "0.8"
 LinearAlgebra = "1.10"
+LuxCore = "1.3.0"
 MLJLinearModels = "0.9.2, 0.10"
 NNlib = "0.9.26"
 Random = "1.10"
 Reexport = "1.2.2"
 SafeTestsets = "0.1"
 SparseArrays = "1.10"
+Static = "1.2.0"
 Statistics = "1.10"
 Test = "1"
 WeightInitializers = "1.0.5"
diff --git a/src/ReservoirComputing.jl b/src/ReservoirComputing.jl
@@ -1,18 +1,33 @@
 module ReservoirComputing
 
 using Adapt: adapt
+using ArrayInterface: ArrayInterface
 using Compat: @compat
+using ConcreteStructs: @concrete
 using LinearAlgebra: eigvals, mul!, I, qr, Diagonal
+using LuxCore: AbstractLuxLayer, AbstractLuxContainerLayer, AbstractLuxWrapperLayer,
+    setup, apply, replicate
+import LuxCore: initialparameters, initialstates, statelength, outputsize
 using NNlib: fast_act, sigmoid
 using Random: Random, AbstractRNG, randperm
+using Static: StaticBool, StaticInt, StaticSymbol,
+    True, False, static, known, dynamic, StaticInteger
 using Reexport: Reexport, @reexport
 using WeightInitializers: DeviceAgnostic, PartialFunction, Utils
 @reexport using WeightInitializers
+@reexport using LuxCore: setup, apply
 
 abstract type AbstractReservoirComputer end
 
+const BoolType = Union{StaticBool,Bool,Val{true},Val{false}}
+const InputType = Tuple{<:AbstractArray,Tuple{<:AbstractArray}}
+const IntegerType = Union{Integer,StaticInteger}
+
 @compat(public, (create_states))
 
+#layers
+include("layers/lux_layers.jl")
+include("layers/esn_cell.jl")
 #general
 include("generics/states.jl")
 include("generics/predict.jl")
@@ -28,17 +43,21 @@ include("models/deepesn.jl")
 include("models/hybridesn.jl")
 include("models/esn_predict.jl")
 
+
+
+export ESNCell, StatefulLayer, Readout, ReservoirChain, Collect, collectstates, train!, predict
+
 export NLADefault, NLAT1, NLAT2, NLAT3, PartialSquare, ExtendedSquare
 export StandardStates, ExtendedStates, PaddedStates, PaddedExtendedStates
 export StandardRidge
 export chebyshev_mapping, informed_init, logistic_mapping, minimal_init,
-       modified_lm, scaled_rand, weighted_init, weighted_minimal
+    modified_lm, scaled_rand, weighted_init, weighted_minimal
 export block_diagonal, chaotic_init, cycle_jumps, delay_line, delay_line_backward,
-       double_cycle, forward_connection, low_connectivity, pseudo_svd, rand_sparse,
-       selfloop_cycle, selfloop_delayline_backward, selfloop_feedback_cycle,
-       selfloop_forward_connection, simple_cycle, true_double_cycle
+    double_cycle, forward_connection, low_connectivity, pseudo_svd, rand_sparse,
+    selfloop_cycle, selfloop_delayline_backward, selfloop_feedback_cycle,
+    selfloop_forward_connection, simple_cycle, true_double_cycle
 export add_jumps!, backward_connection!, delay_line!, reverse_simple_cycle!,
-       scale_radius!, self_loop!, simple_cycle!
+    scale_radius!, self_loop!, simple_cycle!
 export RNN, MRNN, GRU, GRUParams, FullyGated, Minimal
 export train
 export ESN, HybridESN, KnowledgeModel, DeepESN
diff --git a/src/generics/linear_regression.jl b/src/generics/linear_regression.jl
@@ -6,7 +6,7 @@ Returns a training method for `train` based on ridge regression.
 The equations for ridge regression are as follows:
 
 ```math
-\mathbf{w} = (\mathbf{X}^\top \mathbf{X} + 
+\mathbf{w} = (\mathbf{X}^\top \mathbf{X} +
 \lambda \mathbf{I})^{-1} \mathbf{X}^\top \mathbf{y}
 ```
 
@@ -21,20 +21,48 @@ struct StandardRidge
     reg::Number
 end
 
-function StandardRidge(::Type{T}, reg) where {T <: Number}
+function StandardRidge(::Type{T}, reg) where {T<:Number}
     return StandardRidge(T.(reg))
 end
 
 function StandardRidge()
     return StandardRidge(0.0)
 end
 
+function train!(rc::ReservoirChain, train_data::AbstractArray,
+    target_data::AbstractArray, ps, st::NamedTuple, sr::StandardRidge=StandardRidge(0.0);
+    return_states::Bool=false)
+    states = collectstates(rc, train_data, ps, st)
+    readout = train(sr, states, target_data)
+    ps, st = addreadout!(rc, readout, ps, st)
+
+    if return_states
+        return (ps, st), states
+    else
+        return ps, st
+    end
+end
+
 function train(sr::StandardRidge, states::AbstractArray, target_data::AbstractArray)
     n_states = size(states, 1)
     A = [states'; sqrt(sr.reg) * I(n_states)]
     b = [target_data'; zeros(n_states, size(target_data, 1))]
     F = qr(A)
     Wt = F \ b
     output_layer = Matrix(Wt')
-    return OutputLayer(sr, output_layer, size(target_data, 1), target_data[:, end])
+    return output_layer
 end
+
+function addreadout!(rc::ReservoirChain, readout_matrix::AbstractArray, ps, st::NamedTuple) #make sure the compile infers
+    ro_param = (; weight=readout_matrix)
+    new_ps = (;)
+    for ((name, layer), param) in zip(pairs(rc.layers), ps)
+        if layer isa Readout
+            param = merge(param, ro_param)
+        end
+        new_ps = merge(new_ps, (; name => param))
+    end
+    return new_ps, st
+end
+
+#use a recursion to make it more compiler safe
diff --git a/src/generics/predict.jl b/src/generics/predict.jl
@@ -2,7 +2,7 @@ abstract type AbstractOutputLayer end
 abstract type AbstractPrediction end
 
 #general output layer struct
-struct OutputLayer{T, I, S, L} <: AbstractOutputLayer
+struct OutputLayer{T,I,S,L} <: AbstractOutputLayer
     training_method::T
     output_matrix::I
     out_size::S
@@ -39,7 +39,7 @@ struct Generative{T} <: AbstractPrediction
     prediction_len::T
 end
 
-struct Predictive{I, T} <: AbstractPrediction
+struct Predictive{I,T} <: AbstractPrediction
     prediction_data::I
     prediction_len::T
 end
@@ -67,8 +67,8 @@ function Predictive(prediction_data::AbstractArray)
 end
 
 function obtain_prediction(rc::AbstractReservoirComputer, prediction::Generative,
-        x, output_layer::AbstractOutputLayer, args...;
-        initial_conditions = output_layer.last_value)
+    x, output_layer::AbstractOutputLayer, args...;
+    initial_conditions=output_layer.last_value)
     #x = last_state
     prediction_len = prediction.prediction_len
     train_method = output_layer.training_method
@@ -86,7 +86,7 @@ function obtain_prediction(rc::AbstractReservoirComputer, prediction::Generative
 end
 
 function obtain_prediction(rc::AbstractReservoirComputer, prediction::Predictive,
-        x, output_layer::AbstractOutputLayer, args...; kwargs...)
+    x, output_layer::AbstractOutputLayer, args...; kwargs...)
     prediction_len = prediction.prediction_len
     train_method = output_layer.training_method
     out_size = output_layer.out_size
@@ -117,3 +117,15 @@ function store_results!(training_method, out, output, i)
     output[:, i] = out
     return out
 end
+
+function predict(rc, steps::Int, ps, st; initialdata=nothing)
+    if initialdata == nothing
+        initialdata = rand(Float32, 3)
+    end
+    output = zeros(size(initialdata, 1), steps)
+    for step in 1:steps
+        initialdata, st = apply(rc, initialdata, ps, st)
+        output[:, step] = initialdata
+    end
+    return output, st
+end
diff --git a/src/generics/states.jl b/src/generics/states.jl
@@ -1,6 +1,6 @@
-abstract type AbstractStates end
+abstract type AbstractStates <: Function end
 abstract type AbstractPaddedStates <: AbstractStates end
-abstract type NonLinearAlgorithm end
+abstract type NonLinearAlgorithm <: Function end
 
 function pad_state!(states_type::AbstractPaddedStates, x_pad, x)
     x_pad[1, :] .= states_type.padding
@@ -60,7 +60,7 @@ julia> new_mat = states(test_mat)
 struct StandardStates <: AbstractStates end
 
 function (::StandardStates)(nla_type::NonLinearAlgorithm,
-        state, inp)
+    state, inp)
     return nla(nla_type, state)
 end
 
@@ -137,7 +137,7 @@ function (::ExtendedStates)(vect::AbstractVector, inp::AbstractVector)
 end
 
 function (states_type::ExtendedStates)(nla_type::NonLinearAlgorithm,
-        state::AbstractVecOrMat, inp::AbstractVecOrMat)
+    state::AbstractVecOrMat, inp::AbstractVecOrMat)
     return nla(nla_type, states_type(state, inp))
 end
 
@@ -194,7 +194,7 @@ struct PaddedStates{T} <: AbstractPaddedStates
     padding::T
 end
 
-function PaddedStates(; padding = 1.0)
+function PaddedStates(; padding=1.0)
     return PaddedStates(padding)
 end
 
@@ -209,7 +209,7 @@ function (states_type::PaddedStates)(vect::AbstractVector)
 end
 
 function (states_type::PaddedStates)(nla_type::NonLinearAlgorithm,
-        state::AbstractVecOrMat, inp::AbstractVecOrMat)
+    state::AbstractVecOrMat, inp::AbstractVecOrMat)
     return nla(nla_type, states_type(state))
 end
 
@@ -272,17 +272,17 @@ struct PaddedExtendedStates{T} <: AbstractPaddedStates
     padding::T
 end
 
-function PaddedExtendedStates(; padding = 1.0)
+function PaddedExtendedStates(; padding=1.0)
     return PaddedExtendedStates(padding)
 end
 
 function (states_type::PaddedExtendedStates)(nla_type::NonLinearAlgorithm,
-        state::AbstractVecOrMat, inp::AbstractVecOrMat)
+    state::AbstractVecOrMat, inp::AbstractVecOrMat)
     return nla(nla_type, states_type(state, inp))
 end
 
 function (states_type::PaddedExtendedStates)(state::AbstractVecOrMat,
-        inp::AbstractVecOrMat)
+    inp::AbstractVecOrMat)
     x_pad = PaddedStates(states_type.padding)(state)
     x_ext = ExtendedStates()(x_pad, inp)
     return x_ext
@@ -539,7 +539,7 @@ function (::NLAT2)(x_old::AbstractVector)
 
     for idx in eachindex(x_old)
         if firstindex(x_old) < idx < lastindex(x_old) && isodd(idx)
-            x_new[idx, :] .= x_old[idx - 1, :] .* x_old[idx - 2, :]
+            x_new[idx, :] .= x_old[idx-1, :] .* x_old[idx-2, :]
         end
     end
 
@@ -628,7 +628,7 @@ function (::NLAT3)(x_old::AbstractVector)
 
     for idx in eachindex(x_old)
         if firstindex(x_old) < idx < lastindex(x_old) && isodd(idx)
-            x_new[idx] = x_old[idx - 1] * x_old[idx + 1]
+            x_new[idx] = x_old[idx-1] * x_old[idx+1]
         end
     end
 
@@ -645,7 +645,7 @@ Implement a partial squaring of the states as described in [Barbosa2021](@cite).
 ```math
     \begin{equation}
     g(r_i) =
-    \begin{cases} 
+    \begin{cases}
         r_i^2, & \text{if } i \leq \eta_r N, \\
         r_i, & \text{if } i > \eta_r N.
     \end{cases}
diff --git a/src/layers/esn_cell.jl b/src/layers/esn_cell.jl
@@ -0,0 +1,61 @@
+@concrete struct ESNCell <: AbstractReservoirRecurrentCell
+    activation
+    in_dims <: IntegerType
+    out_dims <: IntegerType
+    init_bias
+    init_reservoir
+    init_input
+    #init_feedback::F
+    init_state
+    leak_coefficient
+    use_bias <: StaticBool
+end
+
+function ESNCell((in_dims, out_dims)::Pair{<:Int,<:Int}, activation=tanh;
+    use_bias::BoolType=False(), init_bias=zeros32, init_reservoir=rand_sparse,
+    init_input=weighted_init, init_state=randn32, leak_coefficient=1.0)
+    return ESNCell(activation, in_dims, out_dims, init_bias, init_reservoir,
+        init_input, init_state, leak_coefficient, use_bias)
+end
+
+function initialparameters(rng::AbstractRNG, esn::ESNCell)
+    ps = (input_matrix=esn.init_input(rng, esn.out_dims, esn.in_dims),
+        reservoir_matrix=esn.init_reservoir(rng, esn.out_dims, esn.out_dims))
+    if has_bias(esn)
+        ps = merge(ps, (bias=esn.init_bias(rng, esn.out_dims),))
+    end
+    return ps
+end
+
+function initialstates(rng::AbstractRNG, esn::ESNCell)
+    return (rng=sample_replicate(rng),)
+end
+
+function (esn::ESNCell)(inp::AbstractArray, ps, st::NamedTuple)
+    rng = replicate(st.rng)
+    hidden_state = init_hidden_state(rng, esn, inp)
+    return esn((inp, (hidden_state,)), ps, merge(st, (; rng)))
+end
+
+function (esn::ESNCell)((inp, (hidden_state,))::InputType, ps, st::NamedTuple)
+    T = eltype(inp)
+    if has_bias(esn)
+        candidate_h = esn.activation.(ps.input_matrix * inp .+
+                                      ps.reservoir_matrix * hidden_state .+ ps.bias)
+    else
+        candidate_h = esn.activation.(ps.input_matrix * inp .+
+                                      ps.reservoir_matrix * hidden_state)
+    end
+    h_new = (T(1.0) - esn.leak_coefficient) .* hidden_state .+
+            esn.leak_coefficient .* candidate_h
+    return (h_new, (h_new,)), st
+end
+
+function Base.show(io::IO, esn::ESNCell)
+    print(io, "ESNCell($(esn.in_dims) => $(esn.out_dims)")
+    if esn.leak_coefficient != eltype(esn.leak_coefficient)(1.0)
+        print(io, ", leak_coefficient=$(esn.leak_coefficient)")
+    end
+    has_bias(esn) || print(io, ", use_bias=false")
+    print(io, ")")
+end
diff --git a/src/layers/lux_layers.jl b/src/layers/lux_layers.jl