feat: more robust ESN formulation

MartinuzziFrancesco · MartinuzziFrancesco · commit ed10b6d7e851 · 2025-09-26T21:31:40.000+02:00
diff --git a/Project.toml b/Project.toml
@@ -7,6 +7,7 @@ version = "0.11.4"
 ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
 ConcreteStructs = "2569d6c7-a4a2-43d3-a901-331e8e4be471"
+Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
@@ -34,6 +35,7 @@ CellularAutomata = "0.0.6"
 Compat = "4.16.0"
 ConcreteStructs = "0.2.3"
 DifferentialEquations = "7.16.1"
+Functors = "0.5.2"
 JET = "0.9.20"
 LIBSVM = "0.8"
 LinearAlgebra = "1.10"
diff --git a/README.md b/README.md
@@ -103,6 +103,7 @@ We can either use the provided `ESN` or build one from scratch.
 We showcase the second option:
 
 ```julia
+using ReservoirComputing
 input_size = 3
 res_size = 300
 esn = ReservoirChain(
diff --git a/src/ReservoirComputing.jl b/src/ReservoirComputing.jl
@@ -3,6 +3,7 @@ module ReservoirComputing
 using ArrayInterface: ArrayInterface
 using Compat: @compat
 using ConcreteStructs: @concrete
+#using Functors
 using LinearAlgebra: eigvals, mul!, I, qr, Diagonal, diag
 using LuxCore: AbstractLuxLayer, AbstractLuxContainerLayer, AbstractLuxWrapperLayer,
     setup, apply, replicate
diff --git a/src/models/esn.jl b/src/models/esn.jl
@@ -1,12 +1,111 @@
+@concrete struct ESN <: AbstractLuxContainerLayer{(:cell, :states_modifiers, :readout)}
+    cell
+    states_modifiers
+    readout
+end
+
+_wrap_layer(x) = x isa Function ? WrappedFunction(x) : x
+_wrap_layers(xs::Tuple) = map(_wrap_layer, xs)
+
 function ESN(in_dims::IntegerType, res_dims::IntegerType, out_dims::IntegerType, activation=tanh;
     readout_activation=identity,
     state_modifiers=(),
     kwargs...)
-    cell = ESNCell(in_dims => res_dims, activation; kwargs...)
-    mods = state_modifiers isa Tuple || state_modifiers isa AbstractVector ?
-           Tuple(state_modifiers) : (state_modifiers,)
+    cell = StatefulLayer(ESNCell(in_dims => res_dims, activation; kwargs...))
+    mods_tuple = state_modifiers isa Tuple || state_modifiers isa AbstractVector ?
+                 Tuple(state_modifiers) : (state_modifiers,)
+    mods = _wrap_layers(mods_tuple)
     ro = LinearReadout(res_dims => out_dims, readout_activation)
-    return ReservoirChain((StatefulLayer(cell), mods..., ro)...)
+    return ESN(cell, mods, ro)
+end
+
+function initialparameters(rng::AbstractRNG, esn::ESN)
+    ps_cell = initialparameters(rng, esn.cell)
+    ps_mods = map(l -> initialparameters(rng, l), esn.states_modifiers) |> Tuple
+    ps_ro = initialparameters(rng, esn.readout)
+    return (cell=ps_cell, states_modifiers=ps_mods, readout=ps_ro)
+end
+
+function initialstates(rng::AbstractRNG, esn::ESN)
+    st_cell = initialstates(rng, esn.cell)
+    st_mods = map(l -> initialstates(rng, l), esn.states_modifiers) |> Tuple
+    st_ro = initialstates(rng, esn.readout)
+    return (cell=st_cell, states_modifiers=st_mods, readout=st_ro)
+end
+
+@inline function _apply_seq(layers::Tuple, x, ps::Tuple, st::Tuple)
+    n = length(layers)
+    new_st_parts = Vector{Any}(undef, n)
+    @inbounds for i in 1:n
+        x, sti = apply(layers[i], x, ps[i], st[i])
+        new_st_parts[i] = sti
+    end
+    return x, tuple(new_st_parts...)
+end
+
+function (m::ESN)(x, ps, st)
+    y, st_cell = apply(m.cell, x, ps.cell, st.cell)
+    y, st_mods = _apply_seq(m.states_modifiers, y, ps.states_modifiers, st.states_modifiers)
+    y, st_ro = apply(m.readout, y, ps.readout, st.readout)
+    return y, (cell=st_cell, states_modifiers=st_mods, readout=st_ro)
+end
+
+function reset_carry(esn::ESN, st; mode=:zeros, value=nothing, rng=nothing)
+    # Find current carry & infer shape/type
+    c = get(st.cell, :carry, nothing)
+    if c === nothing
+        outd = esn.cell.cell.out_dims
+        T = Float32
+        sz = (outd, 1)
+    else
+        h = c[1]                 # carry is usually a 1-tuple (h,)
+        T = eltype(h)
+        sz = size(h)
+    end
+
+    new_h = begin
+        if mode === :zeros
+            zeros(T, sz)
+        elseif mode === :randn
+            rng = rng === nothing ? Random.default_rng() : rng
+            randn(rng, T, sz...)
+        elseif mode === :value
+            @assert value !== nothing "Provide `value=` when mode=:value"
+            fill(T(value), sz)
+        else
+            error("Unknown mode=$(mode). Use :zeros, :randn, or :value.")
+        end
+    end
+
+    new_cell = merge(st.cell, (; carry=(new_h,)))
+    return (cell=new_cell, states_modifiers=st.states_modifiers, readout=st.readout)
+end
+
+_set_readout_weight(ps_readout::NamedTuple, W) = merge(ps_readout, (; weight=W))
+
+function train!(m::ESN, train_data::AbstractMatrix, target_data::AbstractMatrix,
+    ps, st, train_method=StandardRidge(0.0);
+    washout::Int=0, return_states::Bool=false)
+
+    newst = st
+    collected = Vector{Any}(undef, size(train_data, 2))
+    @inbounds for (t, x) in enumerate(eachcol(train_data))
+        y, st_cell = apply(m.cell, x, ps.cell, newst.cell)
+        y, st_mods = _apply_seq(m.states_modifiers, y, ps.states_modifiers, newst.states_modifiers)
+        collected[t] = copy(y)
+        newst = (cell=st_cell, states_modifiers=st_mods, readout=newst.readout)
+    end
+    states = eltype(train_data).(reduce(hcat, collected))
+
+    states_wo, targets_wo =
+        washout > 0 ? _apply_washout(states, target_data, washout) : (states, target_data)
+
+    W = train(train_method, states_wo, targets_wo)
+    ps2 = (cell=ps.cell,
+        states_modifiers=ps.states_modifiers,
+        readout=_set_readout_weight(ps.readout, W))
+
+    return return_states ? ((ps2, newst), states_wo) : (ps2, newst)
 end
 
 _basefuncstr(x) = sprint(show, x)
diff --git a/src/states.jl b/src/states.jl
@@ -54,7 +54,7 @@ point with the input that it receives.
         )
     ),
     NLAT2(),
-    Readout(300+3 => 3)
+    LinearReadout(300+3 => 3)
 )
  ```
 
diff --git a/src/train.jl b/src/train.jl
@@ -44,13 +44,13 @@ end
            washout::Int=0, return_states::Bool=false)
 
 Trains the Reservoir Computer by creating the reservoir states from `train_data`,
-and then fiting the last [`Readout`](@ref) layer by (ridge)
+and then fiting the last [`LinearReadout`](@ref) layer by (ridge)
 linear regression onto `target_data`. The learned weights are written into `ps`, and.
 The returned state is the final state after running through the full sequence.
 
 ## Arguments
 
-- `rc`: A [`ReservoirChain`](@ref) whose last trainable layer is a `Readout`.
+- `rc`: A [`ReservoirChain`](@ref) whose last trainable layer is a `LinearReadout`.
 - `train_data`: input sequence (columns are time steps).
 - `target_data`: targets aligned with `train_data`.
 - `ps, st`: current parameters and state.
@@ -71,7 +71,7 @@ The returned state is the final state after running through the full sequence.
 ## Notes
 
 - Features are produced by `collectstates(rc, train_data, ps, st)`. If you rely on
-  the implicit collection of a [`Readout`](@ref), make sure that readout was created with
+  the implicit collection of a [`LinearReadout`](@ref), make sure that readout was created with
   `include_collect=true`, or insert an explicit [`Collect()`](@ref) earlier in the chain.
 """
 function train!(rc::ReservoirChain, train_data, target_data, ps, st,
@@ -122,7 +122,7 @@ end
     Kq = _quote_keys(K)
     tailKq = _quote_keys(tailK)
 
-    head_val = :((getfield(layers, 1) isa Readout)
+    head_val = :((getfield(layers, 1) isa LinearReadout)
                  ? _setweight_rt(getfield(ps, 1), W)
                  : getfield(ps, 1))
 

Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@ point with the input that it receives.`
`54`	`54`	`)`
`55`	`55`	`),`
`56`	`56`	`NLAT2(),`
`57`		`- Readout(300+3 => 3)`
	`57`	`+ LinearReadout(300+3 => 3)`
`58`	`58`	`)`
`59`	`59`	```
`60`	`60`