Merge branch 'main' into smc/test

SebastianM-C · web-flow · commit 3f29f9c3e6cd · 2025-08-27T00:50:46.000+03:00
diff --git a/.github/workflows/Downgrade.yml b/.github/workflows/Downgrade.yml
@@ -7,37 +7,30 @@ on:
       - 'docs/**'
   push:
     branches:
-      - main
+      - master
     paths-ignore:
       - 'docs/**'
-  schedule:
-    - cron: '44 4 * * 4'
 jobs:
   test:
-    runs-on: ${{ matrix.os }}
-    env:
-      GROUP: ${{ matrix.group }}
+    runs-on: ubuntu-latest
     strategy:
-      fail-fast: false
       matrix:
         group:
           - Core
-        version:
-          - '1.10'
-        os:
-          - ubuntu-latest
-          - macos-latest
-          - windows-latest
+        downgrade_mode: ['alldeps']
+        julia-version: ['1.10']
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2
         with:
-          version: ${{ matrix.version }}
-      - uses: julia-actions/julia-downgrade-compat@v1
+          version: ${{ matrix.julia-version }}
+      - uses: julia-actions/julia-downgrade-compat@v2
+#        if: ${{ matrix.version == '1.6' }}
         with:
           skip: Pkg,TOML
-      - uses: julia-actions/cache@v2
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
+        with:
+          ALLOW_RERESOLVE: false
+        env:
+          GROUP: ${{ matrix.group }}
diff --git a/.github/workflows/Invalidations.yml b/.github/workflows/Invalidations.yml
diff --git a/.typos.toml b/.typos.toml
@@ -1,2 +1,14 @@
 [default.extend-words]
 nin = "nin"
+# Additional SciML terms
+setp = "setp"
+getp = "getp"
+indexin = "indexin"
+ists = "ists"
+ispcs = "ispcs"
+eqs = "eqs"
+rhs = "rhs"
+MTK = "MTK"
+
+# Julia data handling terms
+Missings = "Missings"  # Julia's Missing data type (plural form)
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,5 +2,5 @@
 
 ## Breaking changes
 
-- The `NeuralNetworkBlock` no longer uses `RealInputArray` & `RealOutputArray`,
-the ports are now `inputs` and `outputs` and they are normal vector variables.
+  - The `NeuralNetworkBlock` no longer uses `RealInputArray` & `RealOutputArray`,
+    the ports are now `inputs` and `outputs` and they are normal vector variables.
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -20,19 +20,27 @@ SymbolicIndexingInterface = "2efcf032-c050-4f8e-a9bb-153293bab1f5"
 SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
-[sources]
-ModelingToolkitNeuralNets = {path = ".."}
+[sources.ModelingToolkitNeuralNets]
+path = ".."
 
 [compat]
 Documenter = "1.3"
+LineSearches = "7"
 Lux = "1"
 ModelingToolkit = "10"
 ModelingToolkitNeuralNets = "2"
 ModelingToolkitStandardLibrary = "2.7"
 Optimization = "4.0"
+OptimizationOptimJL = "0.4"
 OptimizationOptimisers = "0.3"
+OrdinaryDiffEqTsit5 = "1"
+OrdinaryDiffEqVerner = "1"
 Plots = "1"
+SciMLBase = "2"
+SciMLSensitivity = "7"
 SciMLStructures = "1.1.0"
 StableRNGs = "1"
 Statistics = "1"
 SymbolicIndexingInterface = "0.3.15"
+SymbolicRegression = "1"
+Zygote = "0.7"
diff --git a/docs/src/nnblock.md b/docs/src/nnblock.md
@@ -2,14 +2,14 @@
 
 [`ModelingToolkitNeuralNets`](https://github.com/SciML/ModelingToolkitNeuralNets.jl) provides 2 main interfaces for representing neural networks symbolically:
 
-* The [`NeuralNetworkBlock`](@ref), which represents the neural network as a block component
-* The [`SymbolicNeuralNetwork`](@ref), which represents the neural network via callable parameters
+  - The [`NeuralNetworkBlock`](@ref), which represents the neural network as a block component
+  - The [`SymbolicNeuralNetwork`](@ref), which represents the neural network via callable parameters
 
 This tutorial will introduce the [`NeuralNetworkBlock`](@ref). This representation is useful in the context of hierarchical acausal component-based model.
 
 For such models we have a component representation that is converted to a a differential-algebraic equation (DAE) system, where the algebraic equations are given by the constraints and equalities between different component variables.
-The process of going from the component representation to the full DAE system at the end is reffered to as [structural simplification](https://docs.sciml.ai/ModelingToolkit/stable/API/model_building/#System-simplification).
-In order to formulate Universal Differential Equations (UDEs) in this context, we could operate eiter operate before the structural simplification step or after that, on the
+The process of going from the component representation to the full DAE system at the end is referred to as [structural simplification](https://docs.sciml.ai/ModelingToolkit/stable/API/model_building/#System-simplification).
+In order to formulate Universal Differential Equations (UDEs) in this context, we could operate either operate before the structural simplification step or after that, on the
 resulting DAE system. We call these the component UDE formulation and the system UDE formulation.
 
 The advantage of the component UDE formulation is that it allows us to represent the model
@@ -46,13 +46,13 @@ input_f(t) = (1+sin(0.005 * t^2))/2
         C2 = 15
     end
     @components begin
-        input = Blocks.TimeVaryingFunction(f=input_f)
-        source = PrescribedHeatFlow(T_ref=373.15)
-        plate = HeatCapacitor(C=C1, T=273.15)
-        pot = HeatCapacitor(C=C2, T=273.15)
-        conduction = ThermalConductor(G=1)
-        air = ThermalConductor(G=0.1)
-        env = FixedTemperature(T=293.15)
+        input = Blocks.TimeVaryingFunction(f = input_f)
+        source = PrescribedHeatFlow(T_ref = 373.15)
+        plate = HeatCapacitor(C = C1, T = 273.15)
+        pot = HeatCapacitor(C = C2, T = 273.15)
+        conduction = ThermalConductor(G = 1)
+        air = ThermalConductor(G = 0.1)
+        env = FixedTemperature(T = 293.15)
         Tsensor = TemperatureSensor()
     end
     @equations begin
@@ -70,11 +70,11 @@ end
         C2 = 15
     end
     @components begin
-        input = Blocks.TimeVaryingFunction(f=input_f)
-        source = PrescribedHeatFlow(T_ref=373.15)
-        pot = HeatCapacitor(C=C2, T=273.15)
-        air = ThermalConductor(G=0.1)
-        env = FixedTemperature(T=293.15)
+        input = Blocks.TimeVaryingFunction(f = input_f)
+        source = PrescribedHeatFlow(T_ref = 373.15)
+        pot = HeatCapacitor(C = C2, T = 273.15)
+        air = ThermalConductor(G = 0.1)
+        env = FixedTemperature(T = 293.15)
         Tsensor = TemperatureSensor()
     end
     @equations begin
@@ -91,11 +91,11 @@ end
 ## solve and plot the temperature of the pot in the 2 systems
 
 prob1 = ODEProblem(sys1, Pair[], (0, 100.0))
-sol1 = solve(prob1, Tsit5(), reltol=1e-6)
+sol1 = solve(prob1, Tsit5(), reltol = 1e-6)
 prob2 = ODEProblem(sys2, Pair[], (0, 100.0))
-sol2 = solve(prob2, Tsit5(), reltol=1e-6)
-plot(sol1, idxs=sys1.pot.T, label="pot.T in original system")
-plot!(sol2, idxs=sys1.pot.T, label="pot.T in simplified system")
+sol2 = solve(prob2, Tsit5(), reltol = 1e-6)
+plot(sol1, idxs = sys1.pot.T, label = "pot.T in original system")
+plot!(sol2, idxs = sys1.pot.T, label = "pot.T in simplified system")
 ```
 
 If we take a closer look at the 2 models, the original system has 2 unknowns,
@@ -105,6 +105,7 @@ unknowns(sys1)
 ```
 
 while the simplified system only has 1 unknown
+
 ```@example potplate
 unknowns(sys2)
 ```
@@ -127,12 +128,13 @@ always output positive numbers for positive inputs, so this also makes physical
     begin
         n_input = 2
         n_output = 1
-        chain = multi_layer_feed_forward(; n_input, n_output, depth=1, width=4, activation=Lux.swish)
+        chain = multi_layer_feed_forward(;
+            n_input, n_output, depth = 1, width = 4, activation = Lux.swish)
     end
     @components begin
         port_a = HeatPort()
         port_b = HeatPort()
-        nn = NeuralNetworkBlock(; n_input, n_output, chain, rng=StableRNG(1337))
+        nn = NeuralNetworkBlock(; n_input, n_output, chain, rng = StableRNG(1337))
     end
     @parameters begin
         T0 = 273.15
@@ -160,11 +162,11 @@ end
         C2 = 15
     end
     @components begin
-        input = Blocks.TimeVaryingFunction(f=input_f)
-        source = PrescribedHeatFlow(T_ref=373.15)
-        pot = HeatCapacitor(C=C2, T=273.15)
-        air = ThermalConductor(G=0.1)
-        env = FixedTemperature(T=293.15)
+        input = Blocks.TimeVaryingFunction(f = input_f)
+        source = PrescribedHeatFlow(T_ref = 373.15)
+        pot = HeatCapacitor(C = C2, T = 273.15)
+        air = ThermalConductor(G = 0.1)
+        env = FixedTemperature(T = 293.15)
         Tsensor = TemperatureSensor()
         thermal_nn = ThermalNN()
     end
@@ -181,19 +183,19 @@ end
 @named model = NeuralPot()
 sys3 = mtkcompile(model)
 
-# Let's check that we can succesfully simulate the system in the
+# Let's check that we can successfully simulate the system in the
 # initial state
 prob3 = ODEProblem(sys3, Pair[], (0, 100.0))
-sol3 = solve(prob3, Tsit5(), abstol=1e-6, reltol=1e-6)
+sol3 = solve(prob3, Tsit5(), abstol = 1e-6, reltol = 1e-6)
 @assert SciMLBase.successful_retcode(sol3)
 ```
 
 Now that we have the system with the embedded neural network, we can start training the network.
 The training will be formulated as an optimization problem where we will minimize the mean absolute squared distance
 between the predictions of the new system and the data obtained from the original system.
 In order to gain some insight into the training process we will also add a callback that will plot various quantities
-in the system versus their equivalents in the original system. In a more realistic scenarion we would not have access
-to the original system, but we could still monitor how well we fit the traning data and the system predictions.
+in the system versus their equivalents in the original system. In a more realistic scenario we would not have access
+to the original system, but we could still monitor how well we fit the training data and the system predictions.
 
 ```@example potplate
 using SymbolicIndexingInterface
@@ -209,23 +211,28 @@ x0 = prob3.ps[tp]
 
 oop_update = setsym_oop(prob3, tp);
 
-plot_cb = (opt_state, loss) -> begin
+plot_cb = (opt_state,
+    loss) -> begin
     opt_state.iter % 1000 ≠ 0 && return false
     @info "step $(opt_state.iter), loss: $loss"
 
     (new_u0, new_p) = oop_update(prob3, opt_state.u)
-    new_prob = remake(prob3, u0=new_u0, p=new_p)
-    sol = solve(new_prob, Tsit5(), abstol=1e-8, reltol=1e-8)
-
-    plt = plot(sol, layout=(2,3), idxs=[
-        sys3.thermal_nn.nn.inputs[1], sys3.thermal_nn.x,
-        sys3.thermal_nn.nn.outputs[1], sys3.thermal_nn.port_b.T,
-        sys3.pot.T, sys3.pot.port.Q_flow],
-        size=(950,800))
-    plot!(plt, sol1, idxs=[
-        (sys1.conduction.port_a.T-273.15)/10, sys1.conduction.port_a.T,
-        sys1.conduction.port_a.Q_flow, sys1.conduction.port_b.T,
-        sys1.pot.T, sys1.pot.port.Q_flow])
+    new_prob = remake(prob3, u0 = new_u0, p = new_p)
+    sol = solve(new_prob, Tsit5(), abstol = 1e-8, reltol = 1e-8)
+
+    plt = plot(sol,
+        layout = (2, 3),
+        idxs = [
+            sys3.thermal_nn.nn.inputs[1], sys3.thermal_nn.x,
+            sys3.thermal_nn.nn.outputs[1], sys3.thermal_nn.port_b.T,
+            sys3.pot.T, sys3.pot.port.Q_flow],
+        size = (950, 800))
+    plot!(plt,
+        sol1,
+        idxs = [
+            (sys1.conduction.port_a.T-273.15)/10, sys1.conduction.port_a.T,
+            sys1.conduction.port_a.Q_flow, sys1.conduction.port_b.T,
+            sys1.pot.T, sys1.pot.port.Q_flow])
     display(plt)
     false
 end
@@ -236,7 +243,8 @@ function cost(x, opt_ps)
     u0, p = oop_update(prob, x)
     new_prob = remake(prob; u0, p)
 
-    new_sol = solve(new_prob, Tsit5(), saveat=ts, abstol=1e-8, reltol=1e-8, verbose=false, sensealg=GaussAdjoint())
+    new_sol = solve(new_prob, Tsit5(), saveat = ts, abstol = 1e-8,
+        reltol = 1e-8, verbose = false, sensealg = GaussAdjoint())
 
     !SciMLBase.successful_retcode(new_sol) && return Inf
 
@@ -249,39 +257,41 @@ data = sol1[sys1.pot.T]
 get_T = getsym(prob3, sys3.pot.T)
 opt_ps = (prob3, oop_update, data, sol1.t, get_T);
 
-op = OptimizationProblem(of, x0, opt_ps,)
+op = OptimizationProblem(of, x0, opt_ps)
 
-res = solve(op, Adam(); maxiters=10_000, callback=plot_cb)
+res = solve(op, Adam(); maxiters = 10_000, callback = plot_cb)
 op2 = OptimizationProblem(of, res.u, opt_ps)
-res2 = solve(op2, LBFGS(linesearch=BackTracking()); maxiters=2000, callback=plot_cb)
+res2 = solve(op2, LBFGS(linesearch = BackTracking()); maxiters = 2000, callback = plot_cb)
 
 (new_u0, new_p) = oop_update(prob3, res2.u)
-new_prob1 = remake(prob3, u0=new_u0, p=new_p)
-new_sol1 = solve(new_prob1, Tsit5(), abstol=1e-6, reltol=1e-6)
-
-plt = plot(new_sol1, layout=(2,3), idxs=[
-    sys3.thermal_nn.nn.inputs[1], sys3.thermal_nn.x,
-    sys3.thermal_nn.nn.outputs[1], sys3.thermal_nn.port_b.T,
-    sys3.pot.T, sys3.pot.port.Q_flow],
-    size=(950,800))
-plot!(plt, sol1, idxs=[
-    (sys1.conduction.port_a.T-273.15)/10, sys1.conduction.port_a.T,
-    sys1.conduction.port_a.Q_flow, sys1.conduction.port_b.T,
-    sys1.pot.T, sys1.pot.port.Q_flow], ls=:dash)
+new_prob1 = remake(prob3, u0 = new_u0, p = new_p)
+new_sol1 = solve(new_prob1, Tsit5(), abstol = 1e-6, reltol = 1e-6)
+
+plt = plot(new_sol1,
+    layout = (2, 3),
+    idxs = [
+        sys3.thermal_nn.nn.inputs[1], sys3.thermal_nn.x,
+        sys3.thermal_nn.nn.outputs[1], sys3.thermal_nn.port_b.T,
+        sys3.pot.T, sys3.pot.port.Q_flow],
+    size = (950, 800))
+plot!(plt,
+    sol1,
+    idxs = [
+        (sys1.conduction.port_a.T-273.15)/10, sys1.conduction.port_a.T,
+        sys1.conduction.port_a.Q_flow, sys1.conduction.port_b.T,
+        sys1.pot.T, sys1.pot.port.Q_flow],
+    ls = :dash)
 ```
 
 As we can see from the final plot, the neural network fits very well and not only the training data fits, but also the rest of the
 predictions of the system match the original system. Let us also compare against the predictions of the incomplete system:
 
 ```@example potplate
-(new_u0, new_p) = oop_update(prob3, res2.u)
-new_prob1 = remake(prob3, u0=new_u0, p=new_p)
-new_sol1 = solve(new_prob1, Tsit5(), abstol=1e-6, reltol=1e-6)
-
-plot(sol1, label=["original sys: pot T" "original sys: plate T"], lw=3)
-plot!(sol3; idxs=[sys3.pot.T], label="untrained UDE", lw=2.5)
-plot!(sol2; idxs=[sys2.pot.T], label="incomplete sys: pot T", lw=2.5)
-plot!(new_sol1; idxs=[sys3.pot.T, sys3.thermal_nn.x], label="trained UDE", ls=:dash, lw=2.5)
+plot(sol1, label = ["original sys: pot T" "original sys: plate T"], lw = 3)
+plot!(sol3; idxs = [sys3.pot.T], label = "untrained UDE", lw = 2.5)
+plot!(sol2; idxs = [sys2.pot.T], label = "incomplete sys: pot T", lw = 2.5)
+plot!(new_sol1; idxs = [sys3.pot.T, sys3.thermal_nn.x],
+    label = "trained UDE", ls = :dash, lw = 2.5)
 ```
 
 Now that our neural network is trained, we can go a step further and use [`SymbolicRegression.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl) to find
diff --git a/src/utils.jl b/src/utils.jl
@@ -3,17 +3,18 @@
         depth::Int = 1, activation = tanh, use_bias = true, initial_scaling_factor = 1e-8)
 
 Create a Lux.jl `Chain` for use in [`NeuralNetworkBlock`](@ref)s. The weights of the last layer
-are multipled by the `initial_scaling_factor` in order to make the initial contribution
-of the network small and thus help with acheiving a stable starting position for the training.
+are multiplied by the `initial_scaling_factor` in order to make the initial contribution
+of the network small and thus help with achieving a stable starting position for the training.
 """
 function multi_layer_feed_forward(; n_input, n_output, width::Int = 4,
         depth::Int = 1, activation = tanh, use_bias = true, initial_scaling_factor = 1e-8)
     Lux.Chain(
         Lux.Dense(n_input, width, activation; use_bias),
         [Lux.Dense(width, width, activation; use_bias) for _ in 1:(depth)]...,
         Lux.Dense(width, n_output;
-            init_weight = (rng, a...) -> initial_scaling_factor *
-                                         Lux.kaiming_uniform(rng, a...), use_bias)
+            init_weight = (
+                rng, a...) -> initial_scaling_factor *
+                              Lux.kaiming_uniform(rng, a...), use_bias)
     )
 end
 
diff --git a/test/lotka_volterra.jl b/test/lotka_volterra.jl