feat: finish ESN, add reset_carry

MartinuzziFrancesco · MartinuzziFrancesco · commit 602ecf08354b · 2025-09-28T18:13:10.000+02:00
diff --git a/src/ReservoirComputing.jl b/src/ReservoirComputing.jl
@@ -6,20 +6,20 @@ using ConcreteStructs: @concrete
 #using Functors
 using LinearAlgebra: eigvals, mul!, I, qr, Diagonal, diag
 using LuxCore: AbstractLuxLayer, AbstractLuxContainerLayer, AbstractLuxWrapperLayer,
-    setup, apply, replicate
+               setup, apply, replicate
 import LuxCore: initialparameters, initialstates, statelength, outputsize
 using NNlib: fast_act, sigmoid
 using Random: Random, AbstractRNG, randperm
 using Static: StaticBool, StaticInt, StaticSymbol,
-    True, False, static, known, dynamic, StaticInteger
+              True, False, static, known, dynamic, StaticInteger
 using Reexport: Reexport, @reexport
 using WeightInitializers: DeviceAgnostic, PartialFunction, Utils
 @reexport using WeightInitializers
 @reexport using LuxCore: setup, apply, initialparameters, initialstates
 
-const BoolType = Union{StaticBool,Bool,Val{true},Val{false}}
-const InputType = Tuple{<:AbstractArray,Tuple{<:AbstractArray}}
-const IntegerType = Union{Integer,StaticInteger}
+const BoolType = Union{StaticBool, Bool, Val{true}, Val{false}}
+const InputType = Tuple{<:AbstractArray, Tuple{<:AbstractArray}}
+const IntegerType = Union{Integer, StaticInteger}
 
 #@compat(public, (initialparameters)) #do I need to add intialstates/parameters in compat?
 
@@ -42,18 +42,20 @@ include("models/hybridesn.jl")
 #extensions
 include("extensions/reca.jl")
 
-export ESNCell, StatefulLayer, LinearReadout, ReservoirChain, Collect, collectstates, train!, predict
+export ESNCell, StatefulLayer, LinearReadout, ReservoirChain, Collect, collectstates,
+       train!,
+       predict, reset_carry
 export SVMReadout
 export Pad, Extend, NLAT1, NLAT2, NLAT3, PartialSquare, ExtendedSquare
 export StandardRidge
 export chebyshev_mapping, informed_init, logistic_mapping, minimal_init,
-    modified_lm, scaled_rand, weighted_init, weighted_minimal
+       modified_lm, scaled_rand, weighted_init, weighted_minimal
 export block_diagonal, chaotic_init, cycle_jumps, delay_line, delay_line_backward,
-    double_cycle, forward_connection, low_connectivity, pseudo_svd, rand_sparse,
-    selfloop_cycle, selfloop_delayline_backward, selfloop_feedback_cycle,
-    selfloop_forward_connection, simple_cycle, true_double_cycle
+       double_cycle, forward_connection, low_connectivity, pseudo_svd, rand_sparse,
+       selfloop_cycle, selfloop_delayline_backward, selfloop_feedback_cycle,
+       selfloop_forward_connection, simple_cycle, true_double_cycle
 export add_jumps!, backward_connection!, delay_line!, reverse_simple_cycle!,
-    scale_radius!, self_loop!, simple_cycle!
+       scale_radius!, self_loop!, simple_cycle!
 export train
 export ESN, HybridESN, KnowledgeModel, DeepESN
 #reca
diff --git a/src/models/esn.jl b/src/models/esn.jl
@@ -1,16 +1,17 @@
 @concrete struct ESN <: AbstractLuxContainerLayer{(:cell, :states_modifiers, :readout)}
-    cell
-    states_modifiers
-    readout
+    cell::Any
+    states_modifiers::Any
+    readout::Any
 end
 
 _wrap_layer(x) = x isa Function ? WrappedFunction(x) : x
 _wrap_layers(xs::Tuple) = map(_wrap_layer, xs)
 
-function ESN(in_dims::IntegerType, res_dims::IntegerType, out_dims::IntegerType, activation=tanh;
-    readout_activation=identity,
-    state_modifiers=(),
-    kwargs...)
+function ESN(in_dims::IntegerType, res_dims::IntegerType,
+        out_dims::IntegerType, activation = tanh;
+        readout_activation = identity,
+        state_modifiers = (),
+        kwargs...)
     cell = StatefulLayer(ESNCell(in_dims => res_dims, activation; kwargs...))
     mods_tuple = state_modifiers isa Tuple || state_modifiers isa AbstractVector ?
                  Tuple(state_modifiers) : (state_modifiers,)
@@ -23,128 +24,84 @@ function initialparameters(rng::AbstractRNG, esn::ESN)
     ps_cell = initialparameters(rng, esn.cell)
     ps_mods = map(l -> initialparameters(rng, l), esn.states_modifiers) |> Tuple
     ps_ro = initialparameters(rng, esn.readout)
-    return (cell=ps_cell, states_modifiers=ps_mods, readout=ps_ro)
+    return (cell = ps_cell, states_modifiers = ps_mods, readout = ps_ro)
 end
 
 function initialstates(rng::AbstractRNG, esn::ESN)
     st_cell = initialstates(rng, esn.cell)
     st_mods = map(l -> initialstates(rng, l), esn.states_modifiers) |> Tuple
     st_ro = initialstates(rng, esn.readout)
-    return (cell=st_cell, states_modifiers=st_mods, readout=st_ro)
+    return (cell = st_cell, states_modifiers = st_mods, readout = st_ro)
 end
 
-@inline function _apply_seq(layers::Tuple, x, ps::Tuple, st::Tuple)
-    n = length(layers)
-    new_st_parts = Vector{Any}(undef, n)
-    @inbounds for i in 1:n
-        x, sti = apply(layers[i], x, ps[i], st[i])
-        new_st_parts[i] = sti
+@inline function _apply_seq(layers::Tuple, inp, ps::Tuple, st::Tuple)
+    new_st_parts = Vector{Any}(undef, length(layers))
+    for idx in eachindex(layers)
+        inp, sti = apply(layers[idx], inp, ps[idx], st[idx])
+        new_st_parts[idx] = sti
     end
-    return x, tuple(new_st_parts...)
+    return inp, tuple(new_st_parts...)
 end
 
-function (m::ESN)(x, ps, st)
-    y, st_cell = apply(m.cell, x, ps.cell, st.cell)
-    y, st_mods = _apply_seq(m.states_modifiers, y, ps.states_modifiers, st.states_modifiers)
-    y, st_ro = apply(m.readout, y, ps.readout, st.readout)
-    return y, (cell=st_cell, states_modifiers=st_mods, readout=st_ro)
+function (esn::ESN)(inp::AbstractVector, ps, st)
+    out, st_cell = apply(esn.cell, inp, ps.cell, st.cell)
+    out, st_mods = _apply_seq(
+        esn.states_modifiers, out, ps.states_modifiers, st.states_modifiers)
+    out, st_ro = apply(esn.readout, out, ps.readout, st.readout)
+    return out, (cell = st_cell, states_modifiers = st_mods, readout = st_ro)
 end
 
-function reset_carry(esn::ESN, st; mode=:zeros, value=nothing, rng=nothing)
-    # Find current carry & infer shape/type
-    c = get(st.cell, :carry, nothing)
-    if c === nothing
+function reset_carry(rng::AbstractRNG, esn::ESN, ps, st; init_carry = nothing)
+    carry = get(st.cell, :carry, nothing)
+    if carry === nothing
         outd = esn.cell.cell.out_dims
-        T = Float32
-        sz = (outd, 1)
+        sz = outd
     else
-        h = c[1]                 # carry is usually a 1-tuple (h,)
-        T = eltype(h)
-        sz = size(h)
+        state = first(carry)
+        sz = size(state, 1)
     end
 
-    new_h = begin
-        if mode === :zeros
-            zeros(T, sz)
-        elseif mode === :randn
-            rng = rng === nothing ? Random.default_rng() : rng
-            randn(rng, T, sz...)
-        elseif mode === :value
-            @assert value !== nothing "Provide `value=` when mode=:value"
-            fill(T(value), sz)
-        else
-            error("Unknown mode=$(mode). Use :zeros, :randn, or :value.")
-        end
+    if init_carry === nothing
+        new_state = nothing
+    else
+        new_state = init_carry(rng, sz, 1)
+        new_state = (new_state,)
     end
 
-    new_cell = merge(st.cell, (; carry=(new_h,)))
-    return (cell=new_cell, states_modifiers=st.states_modifiers, readout=st.readout)
+    new_cell = merge(st.cell, (; carry = new_state))
+    return ps,
+    (cell = new_cell, states_modifiers = st.states_modifiers, readout = st.readout)
 end
 
-_set_readout_weight(ps_readout::NamedTuple, W) = merge(ps_readout, (; weight=W))
-
-function train!(m::ESN, train_data::AbstractMatrix, target_data::AbstractMatrix,
-    ps, st, train_method=StandardRidge(0.0);
-    washout::Int=0, return_states::Bool=false)
+_set_readout_weight(ps_readout::NamedTuple, wro) = merge(ps_readout, (; weight = wro))
 
+function collectstates(esn::ESN, data::AbstractMatrix, ps, st::NamedTuple)
     newst = st
-    collected = Vector{Any}(undef, size(train_data, 2))
-    @inbounds for (t, x) in enumerate(eachcol(train_data))
-        y, st_cell = apply(m.cell, x, ps.cell, newst.cell)
-        y, st_mods = _apply_seq(m.states_modifiers, y, ps.states_modifiers, newst.states_modifiers)
-        collected[t] = copy(y)
-        newst = (cell=st_cell, states_modifiers=st_mods, readout=newst.readout)
+    collected = Any[]
+    for inp in eachcol(data)
+        cell_y, st_cell = apply(esn.cell, inp, ps.cell, newst.cell)
+        state_t, st_mods = _apply_seq(
+            esn.states_modifiers, cell_y, ps.states_modifiers, newst.states_modifiers)
+        push!(collected, copy(state_t))
+        newst = (cell = st_cell, states_modifiers = st_mods, readout = newst.readout)
     end
-    states = eltype(train_data).(reduce(hcat, collected))
-
-    states_wo, targets_wo =
-        washout > 0 ? _apply_washout(states, target_data, washout) : (states, target_data)
-
-    W = train(train_method, states_wo, targets_wo)
-    ps2 = (cell=ps.cell,
-        states_modifiers=ps.states_modifiers,
-        readout=_set_readout_weight(ps.readout, W))
-
-    return return_states ? ((ps2, newst), states_wo) : (ps2, newst)
+    states = eltype(data).(reduce(hcat, collected))
+    @assert !isempty(collected)
+    states_raw = reduce(hcat, collected)
+    states = eltype(data).(states_raw)
+    return states, newst
 end
 
-_basefuncstr(x) = sprint(show, x)
+function train!(esn::ESN, train_data::AbstractMatrix, target_data::AbstractMatrix,
+        ps, st, train_method = StandardRidge(0.0);
+        washout::Int = 0, return_states::Bool = false)
+    states, newst = collectstates(esn, train_data, ps, st)
+    states_wo, targets_wo = washout > 0 ? _apply_washout(states, target_data, washout) :
+                            (states, target_data)
+    wro = train(train_method, states_wo, targets_wo)
+    ps2 = (cell = ps.cell,
+        states_modifiers = ps.states_modifiers,
+        readout = _set_readout_weight(ps.readout, wro))
 
-_getflag(x, sym::Symbol, default=false) = begin
-    v = known(getproperty(x, Val(sym)))
-    v === nothing ? default : v
-end
-
-function Base.show(io::IO, ::MIME"text/plain", rc::ReservoirChain)
-    L = collect(pairs(rc.layers))
-    if !isempty(L) && (L[1][2] isa StatefulLayer) && (L[end][2] isa LinearReadout)
-        sl = L[1][2]
-        ro = L[end][2]
-        if sl.cell isa ESNCell
-            esn = sl.cell
-            mods = (length(L) > 2) ? map(x -> _basefuncstr(x[2]), L[2:end-1]) : String[]
-            print(io, "ESN($(esn.in_dims) => $(esn.out_dims); ",
-                "activation=", esn.activation,
-                ", leak=", esn.leak_coefficient,
-                ", readout=", ro.activation)
-            ic = _getflag(ro, :include_collect, false)
-            ic && print(io, ", include_collect=true")
-            if !_getflag(esn, :use_bias, false)
-                print(io, ", use_bias=false")
-            end
-            if !isempty(mods)
-                print(io, ", modifiers=[", join(mods, ", "), "]")
-            end
-            print(io, ")")
-            return
-        end
-    end
-    strs = map(x -> _basefuncstr(x[2]), L)
-    if length(strs) <= 2
-        print(io, "ReservoirChain(", join(strs, ", "), ")")
-    else
-        print(io, "ReservoirChain(\n  ", join(strs, ",\n  "), "\n)")
-    end
+    return return_states ? ((ps2, newst), states_wo) : (ps2, newst)
 end
-
-Base.show(io::IO, rc::ReservoirChain) = show(io, MIME"text/plain"(), rc)
diff --git a/src/train.jl b/src/train.jl
@@ -19,22 +19,21 @@ struct StandardRidge
     reg::Number
 end
 
-function StandardRidge(::Type{T}, reg) where {T<:Number}
+function StandardRidge(::Type{T}, reg) where {T <: Number}
     return StandardRidge(T.(reg))
 end
 
 function StandardRidge()
     return StandardRidge(0.0)
 end
 
-
 function _apply_washout(states::AbstractMatrix, targets::AbstractMatrix, washout::Integer)
-    @assert washout ≥ 0 "washout must be ≥ 0"
+    @assert washout≥0 "washout must be ≥ 0"
     len_states = size(states, 2)
-    @assert washout < len_states "washout=$washout is ≥ number of time steps=$len_states"
+    @assert washout<len_states "washout=$washout is ≥ number of time steps=$len_states"
     first_idx = washout + 1
-    states_wo = states[:, washout+1:end]
-    targets_wo = targets[:, washout+1:end]
+    states_wo = states[:, (washout + 1):end]
+    targets_wo = targets[:, (washout + 1):end]
     return states_wo, targets_wo
 end
 
@@ -75,10 +74,11 @@ The returned state is the final state after running through the full sequence.
   `include_collect=true`, or insert an explicit [`Collect()`](@ref) earlier in the chain.
 """
 function train!(rc::ReservoirChain, train_data, target_data, ps, st,
-    train_method=StandardRidge(0.0);
-    washout::Int=0, return_states::Bool=false)
+        train_method = StandardRidge(0.0);
+        washout::Int = 0, return_states::Bool = false)
     states, st_after = collectstates(rc, train_data, ps, st)
-    states_wo, traindata_wo = washout > 0 ? _apply_washout(states, target_data, washout) : (states, target_data)
+    states_wo, traindata_wo = washout > 0 ? _apply_washout(states, target_data, washout) :
+                              (states, target_data)
     output_matrix = train(train_method, states_wo, traindata_wo)
     ps2, st_after = addreadout!(rc, output_matrix, ps, st_after)
     return return_states ? ((ps2, st_after), states_wo) : (ps2, st_after)
@@ -134,9 +134,9 @@ end
 end
 
 function addreadout!(rc::ReservoirChain,
-    W::AbstractMatrix,
-    ps::NamedTuple,
-    st::NamedTuple)
+        W::AbstractMatrix,
+        ps::NamedTuple,
+        st::NamedTuple)
     @assert propertynames(rc.layers) == propertynames(ps)
     new_ps = _addreadout(rc.layers, ps, W)
     return new_ps, st