refactor multi_layer_feed_forward to be more useful

SebastianM-C · SebastianM-C · commit bf6d97b648c8 · 2025-07-16T04:10:34.000+03:00
diff --git a/src/utils.jl b/src/utils.jl
@@ -1,6 +1,22 @@
-function multi_layer_feed_forward(input_length, output_length; width::Int = 5,
-        depth::Int = 1, activation = tanh)
-    Lux.Chain(Lux.Dense(input_length, width, activation),
-        [Lux.Dense(width, width, activation) for _ in 1:(depth)]...,
-        Lux.Dense(width, output_length))
+"""
+    multi_layer_feed_forward(; n_input, n_output, width::Int = 4,
+        depth::Int = 1, activation = tanh, use_bias = true, initial_scaling_factor = 1e-8)
+
+Create a Lux.jl `Chain` for use in [`NeuralNetworkBlock`](@ref)s. The weights of the last layer
+are multipled by the `initial_scaling_factor` in order to make the initial contribution
+of the network small and thus help with acheiving a stable starting position for the training.
+"""
+function multi_layer_feed_forward(; n_input, n_output, width::Int = 4,
+        depth::Int = 1, activation = tanh, use_bias = true, initial_scaling_factor = 1e-8)
+    Lux.Chain(
+        Lux.Dense(n_input, width, activation; use_bias),
+        [Lux.Dense(width, width, activation; use_bias) for _ in 1:(depth)]...,
+        Lux.Dense(width, n_output;
+            init_weight = (rng, a...) -> initial_scaling_factor *
+                                         Lux.kaiming_uniform(rng, a...), use_bias)
+    )
+end
+
+function multi_layer_feed_forward(n_input, n_output; kwargs...)
+    multi_layer_feed_forward(; n_input, n_output, kwargs...)
 end