2
2
3
3
[ ` ModelingToolkitNeuralNets ` ] ( https://github.com/SciML/ModelingToolkitNeuralNets.jl ) provides 2 main interfaces for representing neural networks symbolically:
4
4
5
- * The [ ` NeuralNetworkBlock ` ] ( @ref ) , which represents the neural network as a block component
6
- * The [ ` SymbolicNeuralNetwork ` ] ( @ref ) , which represents the neural network via callable parameters
5
+ - The [ ` NeuralNetworkBlock ` ] ( @ref ) , which represents the neural network as a block component
6
+ - The [ ` SymbolicNeuralNetwork ` ] ( @ref ) , which represents the neural network via callable parameters
7
7
8
8
This tutorial will introduce the [ ` NeuralNetworkBlock ` ] ( @ref ) . This representation is useful in the context of hierarchical acausal component-based model.
9
9
10
10
For such models we have a component representation that is converted to a a differential-algebraic equation (DAE) system, where the algebraic equations are given by the constraints and equalities between different component variables.
11
- The process of going from the component representation to the full DAE system at the end is reffered to as [ structural simplification] ( https://docs.sciml.ai/ModelingToolkit/stable/API/model_building/#System-simplification ) .
12
- In order to formulate Universal Differential Equations (UDEs) in this context, we could operate eiter operate before the structural simplification step or after that, on the
11
+ The process of going from the component representation to the full DAE system at the end is referred to as [ structural simplification] ( https://docs.sciml.ai/ModelingToolkit/stable/API/model_building/#System-simplification ) .
12
+ In order to formulate Universal Differential Equations (UDEs) in this context, we could operate either operate before the structural simplification step or after that, on the
13
13
resulting DAE system. We call these the component UDE formulation and the system UDE formulation.
14
14
15
15
The advantage of the component UDE formulation is that it allows us to represent the model
@@ -46,13 +46,13 @@ input_f(t) = (1+sin(0.005 * t^2))/2
46
46
C2 = 15
47
47
end
48
48
@components begin
49
- input = Blocks.TimeVaryingFunction(f= input_f)
50
- source = PrescribedHeatFlow(T_ref= 373.15)
51
- plate = HeatCapacitor(C= C1, T= 273.15)
52
- pot = HeatCapacitor(C= C2, T= 273.15)
53
- conduction = ThermalConductor(G= 1)
54
- air = ThermalConductor(G= 0.1)
55
- env = FixedTemperature(T= 293.15)
49
+ input = Blocks.TimeVaryingFunction(f = input_f)
50
+ source = PrescribedHeatFlow(T_ref = 373.15)
51
+ plate = HeatCapacitor(C = C1, T = 273.15)
52
+ pot = HeatCapacitor(C = C2, T = 273.15)
53
+ conduction = ThermalConductor(G = 1)
54
+ air = ThermalConductor(G = 0.1)
55
+ env = FixedTemperature(T = 293.15)
56
56
Tsensor = TemperatureSensor()
57
57
end
58
58
@equations begin
70
70
C2 = 15
71
71
end
72
72
@components begin
73
- input = Blocks.TimeVaryingFunction(f= input_f)
74
- source = PrescribedHeatFlow(T_ref= 373.15)
75
- pot = HeatCapacitor(C= C2, T= 273.15)
76
- air = ThermalConductor(G= 0.1)
77
- env = FixedTemperature(T= 293.15)
73
+ input = Blocks.TimeVaryingFunction(f = input_f)
74
+ source = PrescribedHeatFlow(T_ref = 373.15)
75
+ pot = HeatCapacitor(C = C2, T = 273.15)
76
+ air = ThermalConductor(G = 0.1)
77
+ env = FixedTemperature(T = 293.15)
78
78
Tsensor = TemperatureSensor()
79
79
end
80
80
@equations begin
91
91
## solve and plot the temperature of the pot in the 2 systems
92
92
93
93
prob1 = ODEProblem(sys1, Pair[], (0, 100.0))
94
- sol1 = solve(prob1, Tsit5(), reltol= 1e-6)
94
+ sol1 = solve(prob1, Tsit5(), reltol = 1e-6)
95
95
prob2 = ODEProblem(sys2, Pair[], (0, 100.0))
96
- sol2 = solve(prob2, Tsit5(), reltol= 1e-6)
97
- plot(sol1, idxs= sys1.pot.T, label= "pot.T in original system")
98
- plot!(sol2, idxs= sys1.pot.T, label= "pot.T in simplified system")
96
+ sol2 = solve(prob2, Tsit5(), reltol = 1e-6)
97
+ plot(sol1, idxs = sys1.pot.T, label = "pot.T in original system")
98
+ plot!(sol2, idxs = sys1.pot.T, label = "pot.T in simplified system")
99
99
```
100
100
101
101
If we take a closer look at the 2 models, the original system has 2 unknowns,
@@ -105,6 +105,7 @@ unknowns(sys1)
105
105
```
106
106
107
107
while the simplified system only has 1 unknown
108
+
108
109
``` @example potplate
109
110
unknowns(sys2)
110
111
```
@@ -127,12 +128,13 @@ always output positive numbers for positive inputs, so this also makes physical
127
128
begin
128
129
n_input = 2
129
130
n_output = 1
130
- chain = multi_layer_feed_forward(; n_input, n_output, depth=1, width=4, activation=Lux.swish)
131
+ chain = multi_layer_feed_forward(;
132
+ n_input, n_output, depth = 1, width = 4, activation = Lux.swish)
131
133
end
132
134
@components begin
133
135
port_a = HeatPort()
134
136
port_b = HeatPort()
135
- nn = NeuralNetworkBlock(; n_input, n_output, chain, rng= StableRNG(1337))
137
+ nn = NeuralNetworkBlock(; n_input, n_output, chain, rng = StableRNG(1337))
136
138
end
137
139
@parameters begin
138
140
T0 = 273.15
@@ -160,11 +162,11 @@ end
160
162
C2 = 15
161
163
end
162
164
@components begin
163
- input = Blocks.TimeVaryingFunction(f= input_f)
164
- source = PrescribedHeatFlow(T_ref= 373.15)
165
- pot = HeatCapacitor(C= C2, T= 273.15)
166
- air = ThermalConductor(G= 0.1)
167
- env = FixedTemperature(T= 293.15)
165
+ input = Blocks.TimeVaryingFunction(f = input_f)
166
+ source = PrescribedHeatFlow(T_ref = 373.15)
167
+ pot = HeatCapacitor(C = C2, T = 273.15)
168
+ air = ThermalConductor(G = 0.1)
169
+ env = FixedTemperature(T = 293.15)
168
170
Tsensor = TemperatureSensor()
169
171
thermal_nn = ThermalNN()
170
172
end
@@ -181,19 +183,19 @@ end
181
183
@named model = NeuralPot()
182
184
sys3 = mtkcompile(model)
183
185
184
- # Let's check that we can succesfully simulate the system in the
186
+ # Let's check that we can successfully simulate the system in the
185
187
# initial state
186
188
prob3 = ODEProblem(sys3, Pair[], (0, 100.0))
187
- sol3 = solve(prob3, Tsit5(), abstol= 1e-6, reltol= 1e-6)
189
+ sol3 = solve(prob3, Tsit5(), abstol = 1e-6, reltol = 1e-6)
188
190
@assert SciMLBase.successful_retcode(sol3)
189
191
```
190
192
191
193
Now that we have the system with the embedded neural network, we can start training the network.
192
194
The training will be formulated as an optimization problem where we will minimize the mean absolute squared distance
193
195
between the predictions of the new system and the data obtained from the original system.
194
196
In order to gain some insight into the training process we will also add a callback that will plot various quantities
195
- in the system versus their equivalents in the original system. In a more realistic scenarion we would not have access
196
- to the original system, but we could still monitor how well we fit the traning data and the system predictions.
197
+ in the system versus their equivalents in the original system. In a more realistic scenario we would not have access
198
+ to the original system, but we could still monitor how well we fit the training data and the system predictions.
197
199
198
200
``` @example potplate
199
201
using SymbolicIndexingInterface
@@ -209,23 +211,28 @@ x0 = prob3.ps[tp]
209
211
210
212
oop_update = setsym_oop(prob3, tp);
211
213
212
- plot_cb = (opt_state, loss) -> begin
214
+ plot_cb = (opt_state,
215
+ loss) -> begin
213
216
opt_state.iter % 1000 ≠ 0 && return false
214
217
@info "step $(opt_state.iter), loss: $loss"
215
218
216
219
(new_u0, new_p) = oop_update(prob3, opt_state.u)
217
- new_prob = remake(prob3, u0=new_u0, p=new_p)
218
- sol = solve(new_prob, Tsit5(), abstol=1e-8, reltol=1e-8)
219
-
220
- plt = plot(sol, layout=(2,3), idxs=[
221
- sys3.thermal_nn.nn.inputs[1], sys3.thermal_nn.x,
222
- sys3.thermal_nn.nn.outputs[1], sys3.thermal_nn.port_b.T,
223
- sys3.pot.T, sys3.pot.port.Q_flow],
224
- size=(950,800))
225
- plot!(plt, sol1, idxs=[
226
- (sys1.conduction.port_a.T-273.15)/10, sys1.conduction.port_a.T,
227
- sys1.conduction.port_a.Q_flow, sys1.conduction.port_b.T,
228
- sys1.pot.T, sys1.pot.port.Q_flow])
220
+ new_prob = remake(prob3, u0 = new_u0, p = new_p)
221
+ sol = solve(new_prob, Tsit5(), abstol = 1e-8, reltol = 1e-8)
222
+
223
+ plt = plot(sol,
224
+ layout = (2, 3),
225
+ idxs = [
226
+ sys3.thermal_nn.nn.inputs[1], sys3.thermal_nn.x,
227
+ sys3.thermal_nn.nn.outputs[1], sys3.thermal_nn.port_b.T,
228
+ sys3.pot.T, sys3.pot.port.Q_flow],
229
+ size = (950, 800))
230
+ plot!(plt,
231
+ sol1,
232
+ idxs = [
233
+ (sys1.conduction.port_a.T-273.15)/10, sys1.conduction.port_a.T,
234
+ sys1.conduction.port_a.Q_flow, sys1.conduction.port_b.T,
235
+ sys1.pot.T, sys1.pot.port.Q_flow])
229
236
display(plt)
230
237
false
231
238
end
@@ -236,7 +243,8 @@ function cost(x, opt_ps)
236
243
u0, p = oop_update(prob, x)
237
244
new_prob = remake(prob; u0, p)
238
245
239
- new_sol = solve(new_prob, Tsit5(), saveat=ts, abstol=1e-8, reltol=1e-8, verbose=false, sensealg=GaussAdjoint())
246
+ new_sol = solve(new_prob, Tsit5(), saveat = ts, abstol = 1e-8,
247
+ reltol = 1e-8, verbose = false, sensealg = GaussAdjoint())
240
248
241
249
!SciMLBase.successful_retcode(new_sol) && return Inf
242
250
@@ -249,39 +257,41 @@ data = sol1[sys1.pot.T]
249
257
get_T = getsym(prob3, sys3.pot.T)
250
258
opt_ps = (prob3, oop_update, data, sol1.t, get_T);
251
259
252
- op = OptimizationProblem(of, x0, opt_ps, )
260
+ op = OptimizationProblem(of, x0, opt_ps)
253
261
254
- res = solve(op, Adam(); maxiters= 10_000, callback= plot_cb)
262
+ res = solve(op, Adam(); maxiters = 10_000, callback = plot_cb)
255
263
op2 = OptimizationProblem(of, res.u, opt_ps)
256
- res2 = solve(op2, LBFGS(linesearch= BackTracking()); maxiters= 2000, callback= plot_cb)
264
+ res2 = solve(op2, LBFGS(linesearch = BackTracking()); maxiters = 2000, callback = plot_cb)
257
265
258
266
(new_u0, new_p) = oop_update(prob3, res2.u)
259
- new_prob1 = remake(prob3, u0=new_u0, p=new_p)
260
- new_sol1 = solve(new_prob1, Tsit5(), abstol=1e-6, reltol=1e-6)
261
-
262
- plt = plot(new_sol1, layout=(2,3), idxs=[
263
- sys3.thermal_nn.nn.inputs[1], sys3.thermal_nn.x,
264
- sys3.thermal_nn.nn.outputs[1], sys3.thermal_nn.port_b.T,
265
- sys3.pot.T, sys3.pot.port.Q_flow],
266
- size=(950,800))
267
- plot!(plt, sol1, idxs=[
268
- (sys1.conduction.port_a.T-273.15)/10, sys1.conduction.port_a.T,
269
- sys1.conduction.port_a.Q_flow, sys1.conduction.port_b.T,
270
- sys1.pot.T, sys1.pot.port.Q_flow], ls=:dash)
267
+ new_prob1 = remake(prob3, u0 = new_u0, p = new_p)
268
+ new_sol1 = solve(new_prob1, Tsit5(), abstol = 1e-6, reltol = 1e-6)
269
+
270
+ plt = plot(new_sol1,
271
+ layout = (2, 3),
272
+ idxs = [
273
+ sys3.thermal_nn.nn.inputs[1], sys3.thermal_nn.x,
274
+ sys3.thermal_nn.nn.outputs[1], sys3.thermal_nn.port_b.T,
275
+ sys3.pot.T, sys3.pot.port.Q_flow],
276
+ size = (950, 800))
277
+ plot!(plt,
278
+ sol1,
279
+ idxs = [
280
+ (sys1.conduction.port_a.T-273.15)/10, sys1.conduction.port_a.T,
281
+ sys1.conduction.port_a.Q_flow, sys1.conduction.port_b.T,
282
+ sys1.pot.T, sys1.pot.port.Q_flow],
283
+ ls = :dash)
271
284
```
272
285
273
286
As we can see from the final plot, the neural network fits very well and not only the training data fits, but also the rest of the
274
287
predictions of the system match the original system. Let us also compare against the predictions of the incomplete system:
275
288
276
289
``` @example potplate
277
- (new_u0, new_p) = oop_update(prob3, res2.u)
278
- new_prob1 = remake(prob3, u0=new_u0, p=new_p)
279
- new_sol1 = solve(new_prob1, Tsit5(), abstol=1e-6, reltol=1e-6)
280
-
281
- plot(sol1, label=["original sys: pot T" "original sys: plate T"], lw=3)
282
- plot!(sol3; idxs=[sys3.pot.T], label="untrained UDE", lw=2.5)
283
- plot!(sol2; idxs=[sys2.pot.T], label="incomplete sys: pot T", lw=2.5)
284
- plot!(new_sol1; idxs=[sys3.pot.T, sys3.thermal_nn.x], label="trained UDE", ls=:dash, lw=2.5)
290
+ plot(sol1, label = ["original sys: pot T" "original sys: plate T"], lw = 3)
291
+ plot!(sol3; idxs = [sys3.pot.T], label = "untrained UDE", lw = 2.5)
292
+ plot!(sol2; idxs = [sys2.pot.T], label = "incomplete sys: pot T", lw = 2.5)
293
+ plot!(new_sol1; idxs = [sys3.pot.T, sys3.thermal_nn.x],
294
+ label = "trained UDE", ls = :dash, lw = 2.5)
285
295
```
286
296
287
297
Now that our neural network is trained, we can go a step further and use [ ` SymbolicRegression.jl ` ] ( https://github.com/MilesCranmer/SymbolicRegression.jl ) to find
0 commit comments