Do multiple shooting (#505)

Manas2030 · web-flow · commit f6dc71edded3 · 2021-03-21T07:36:33.000-04:00
* Do multiple shooting

* Update DiffEqFlux.jl

Included multiple_shooting.jl and exported multiple_shoot method.

* Update multiple_shooting.jl

Changed the method name to multiple_shoot and also modified the method so that now it returns individual group predictions

* Create multiple_shooting.md

Added the docs for the multiple_shoot method.

* Create multiple_shoot.jl

Added test file for src/multiple_shooting.jl

* Update multiple_shooting.md

Corrected line lengths, added training with BFGS optimizer after ADAM optimizer and changed output image with respect to `grp_size` = 1
diff --git a/docs/src/examples/multiple_shooting.md b/docs/src/examples/multiple_shooting.md
@@ -0,0 +1,118 @@
+# Multiple Shooting
+
+In Multiple Shooting, the training data is split into overlapping intervals.
+The solver is then trained on individual intervals. If the end conditions of any
+interval co-incide with the initial conditions of the next immediate interval,
+then the joined/combined solution is same as solving on the whole dataset
+(without splitting).
+
+To ensure that the overlapping part of two consecutive intervals co-incide,
+we add a penalizing term, `continuity_strength * absolute_value_of( prediction
+of last point of some group, i - prediction of first point of group i+1 )`, to
+the loss.
+
+Note that the `continuity_strength` should have a large positive value to add
+high penalities in case the solver predicts discontinuous values.
+
+
+The following is a working demo, using Multiple Shooting
+
+```julia
+using DiffEqFlux, OrdinaryDiffEq, Flux, Optim, Plots
+
+# Define initial conditions and timesteps
+datasize = 30
+u0 = Float32[2.0, 0.0]
+tspan = (0.0f0, 5.0f0)
+tsteps = range(tspan[1], tspan[2], length = datasize)
+
+
+# Get the data
+function trueODEfunc(du, u, p, t)
+    true_A = [-0.1 2.0; -2.0 -0.1]
+    du .= ((u.^3)'true_A)'
+end
+prob_trueode = ODEProblem(trueODEfunc, u0, tspan)
+ode_data = Array(solve(prob_trueode, Tsit5(), saveat = tsteps))
+
+
+# Define the Neural Network
+dudt2 = FastChain((x, p) -> x.^3,
+                  FastDense(2, 16, tanh),
+                  FastDense(16, 2))
+
+prob_neuralode = NeuralODE(dudt2, (0.0,5.0), Tsit5(), saveat = tsteps)
+
+function plot_function_for_multiple_shoot(plt, pred, grp_size)
+	step = 1
+	if(grp_size != 1)
+		step = grp_size-1
+	end
+	if(grp_size == datasize)
+		scatter!(plt, tsteps, pred[1][1,:], label = "pred")
+	else
+		for i in 1:step:datasize-grp_size
+			# The term `trunc(Integer,(i-1)/(grp_size-1)+1)` goes from 1, 2, ... , N where N is the total number of groups that can be formed from `ode_data` (In other words, N = trunc(Integer, (datasize-1)/(grp_size-1)))
+			scatter!(plt, tsteps[i:i+step], pred[trunc(Integer,(i-1)/step+1)][1,:], label = "grp"*string(trunc(Integer,(i-1)/step+1)))
+		end
+	end
+end
+
+callback = function (p, l, pred, predictions; doplot = true)
+  display(l)
+  if doplot
+	# plot the original data
+	plt = scatter(tsteps[1:size(pred,2)], ode_data[1,1:size(pred,2)], label = "data")
+
+	# plot the different predictions for individual shoot
+	plot_function_for_multiple_shoot(plt, predictions, grp_size_param)
+
+	# plot a single shooting performance of our multiple shooting training (this is what the solver predicts after the training is done)
+	# scatter!(plt, tsteps[1:size(pred,2)], pred[1,:], label = "pred")
+
+    display(plot(plt))
+  end
+  return false
+end
+
+# Define parameters for Multiple Shooting
+grp_size_param = 1
+loss_multiplier_param = 100
+
+neural_ode_f(u,p,t) = dudt2(u,p)
+prob_param = ODEProblem(neural_ode_f, u0, tspan, initial_params(dudt2))
+
+function loss_function_param(ode_data, pred):: Float32
+	return sum(abs2, (ode_data .- pred))^2
+end
+
+function loss_neuralode(p)
+	return multiple_shoot(p, ode_data, tsteps, prob_param, loss_function_param, grp_size_param, loss_multiplier_param)
+end
+
+result_neuralode = DiffEqFlux.sciml_train(loss_neuralode, prob_neuralode.p,
+                                          ADAM(0.05), cb = callback,
+                                          maxiters = 300)
+callback(result_neuralode.minimizer,loss_neuralode(result_neuralode.minimizer)...;doplot=true)
+
+result_neuralode_2 = DiffEqFlux.sciml_train(loss_neuralode, result_neuralode.minimizer,
+                                          BFGS(), cb = callback,
+                                          maxiters = 100, allow_f_increases=true)
+callback(result_neuralode_2.minimizer,loss_neuralode(result_neuralode_2.minimizer)...;doplot=true)
+
+```
+Here's the plots that we get from above
+
+![pic](https://user-images.githubusercontent.com/58384989/111881194-6de9a480-89d5-11eb-8f21-6481d1e22521.PNG)
+The picture on the left shows how well our Neural Network does on a single shoot
+after training it through `multiple_shoot`.
+The picture on the right shows the predictions of each group (Notice that there
+are overlapping points as well. These are the points we are trying to co-incide.)
+
+Here is an output with `grp_size` = 30 (which is same as solving on the whole
+interval without splitting also called single shooting)
+
+![pic_single_shoot3](https://user-images.githubusercontent.com/58384989/111843307-f0fff180-8926-11eb-9a06-2731113173bc.PNG)
+
+It is clear from the above picture, a single shoot doesn't perform very well
+with the ODE Problem we have and gets stuck in a local minima.
diff --git a/src/DiffEqFlux.jl b/src/DiffEqFlux.jl
@@ -82,6 +82,7 @@ include("tensor_product_basis.jl")
 include("tensor_product_layer.jl")
 include("collocation.jl")
 include("hnn.jl")
+include("multiple_shooting.jl")
 
 export diffeq_fd, diffeq_rd, diffeq_adjoint
 export DeterministicCNF, FFJORD, NeuralODE, NeuralDSDE, NeuralSDE, NeuralCDDE, NeuralDAE, NeuralODEMM, TensorLayer, AugmentedNDELayer, SplineLayer, NeuralHamiltonianDE
@@ -96,4 +97,6 @@ export TriweightKernel, TricubeKernel, GaussianKernel, CosineKernel
 export LogisticKernel, SigmoidKernel, SilvermanKernel
 export collocate_data
 
+export multiple_shoot
+
 end
diff --git a/src/multiple_shooting.jl b/src/multiple_shooting.jl
@@ -0,0 +1,61 @@
+"""
+Returns the a total loss after trying a 'Direct multiple shooting' on ODE data, predictions on the whole ODE data and an array of predictions from the each of the groups (smaller intervals).
+In Direct Multiple Shooting, the Neural Network divides the interval into smaller intervals and solves for them separately.
+The default group size is 5 implying the whole dataset would be divided in groups of 5 and the Neural Network will solve on them individually.
+The default continuity term is 100 implying any losses arising from the non-continuity of 2 different groups will be scaled by 100.
+
+```julia
+multiple_shoot(p,ode_data,tsteps,prob,loss_function,grp_size=5,continuity_strength=100)
+```
+Arguments:
+- `p`: The parameters of the Neural Network to be trained.
+- `ode_data`: Original Data to be modelled.
+- `tsteps`: Timesteps on which ode_data was calculated.
+- `prob`: ODE problem that the Neural Network attempts to solve.
+- `loss_function`: Any arbitrary function to calculate loss.
+- `grp_size`: The group size achieved after splitting the ode_data into equal sizes.
+- `continuity_strength`: Multiplying factor to ensure continuity of predictions throughout different groups.
+
+!!!note
+The parameter 'continuity_strength' should be a relatively big number to enforce a large penalty whenever the last point of any group doesn't coincide with the first point of next group.
+"""
+function multiple_shoot(p :: Array, ode_data :: Array, tsteps, prob :: ODEProblem, loss_function ::Function, grp_size :: Integer = 5, continuity_term :: Integer = 100)
+	datasize = length(ode_data[1,:])
+
+	@assert (grp_size >= 1 && grp_size <= datasize) "grp_size can't be <= 1 or >= number of data points"
+
+	tot_loss = 0
+
+	if(grp_size == datasize)
+		grp_predictions = [solve(remake(prob, p = p, tspan = (tsteps[1],tsteps[datasize]), u0 = ode_data[:,1]), Tsit5(),saveat = tsteps)]
+		tot_loss = loss_function(ode_data, grp_predictions[1][:,1:grp_size])
+		return tot_loss, grp_predictions[1], grp_predictions
+	end
+
+	if(grp_size == 1)
+		# store individual single shooting predictions for each group
+		grp_predictions = [solve(remake(prob, p = p, tspan = (tsteps[i],tsteps[i+1]), u0 = ode_data[:,i]), Tsit5(),saveat = tsteps[i:i+1]) for i in 1:datasize-1]
+
+		# calculate multiple shooting loss from the single shoots done in above step
+		for i in 1:datasize-1
+		tot_loss += loss_function(ode_data[:,i:i+1], grp_predictions[i][:, 1:grp_size]) + (continuity_term * sum(abs,grp_predictions[i][:,2] - ode_data[:,i+1]))
+		end
+
+		# single shooting predictions from ode_data[:,1] (= u0)
+		pred = solve(remake(prob, p = p, tspan = (tsteps[1],tsteps[datasize]), u0 = ode_data[:,1]), Tsit5(),saveat = tsteps)
+		return tot_loss, pred, grp_predictions
+	end
+
+	# multiple shooting predictions
+	grp_predictions = [solve(remake(prob, p = p, tspan = (tsteps[i],tsteps[i+grp_size-1]), u0 = ode_data[:,i]), Tsit5(),saveat = tsteps[i:i+grp_size-1]) for i in 1:grp_size-1:datasize-grp_size]
+
+	# calculate multiple shooting loss
+	for i in 1:grp_size-1:datasize-grp_size
+		# The term `trunc(Integer,(i-1)/(grp_size-1)+1)` goes from 1, 2, ... , N where N is the total number of groups that can be formed from `ode_data` (In other words, N = trunc(Integer, (datasize-1)/(grp_size-1)))
+		tot_loss += loss_function(ode_data[:,i:i+grp_size-1], grp_predictions[trunc(Integer,(i-1)/(grp_size-1)+1)][:, 1:grp_size]) + (continuity_term * sum(abs,grp_predictions[trunc(Integer,(i-1)/(grp_size-1)+1)][:,grp_size] - ode_data[:,i+grp_size-1]))
+	end
+
+	# single shooting prediction
+	pred = solve(remake(prob, p = p, tspan = (tsteps[1],tsteps[datasize]), u0 = ode_data[:,1]), Tsit5(),saveat = tsteps)
+	return tot_loss, pred, grp_predictions
+end
diff --git a/test/multiple_shoot.jl b/test/multiple_shoot.jl
@@ -0,0 +1,78 @@
+using DiffEqFlux, OrdinaryDiffEq, Flux, Optim, Test
+
+# General loss function to compare single shooting and multiple shooting predictions
+function general_loss_function(result_neuralode)
+	return sum(abs2, (ode_data[:,:] .- Array(prob_neuralode(u0, result_neuralode.minimizer)) ))
+end
+
+# Define initial conditions and timesteps
+datasize = 30
+u0 = Float32[2.0, 0.0]
+tspan = (0.0f0, 5.0f0)
+tsteps = range(tspan[1], tspan[2], length = datasize)
+
+# Get the data
+function trueODEfunc(du, u, p, t)
+    true_A = [-0.1 2.0; -2.0 -0.1]
+    du .= ((u.^3)'true_A)'
+end
+prob_trueode = ODEProblem(trueODEfunc, u0, tspan)
+ode_data = Array(solve(prob_trueode, Tsit5(), saveat = tsteps))
+
+# Define the Neural Network
+dudt2 = FastChain((x, p) -> x.^3,
+                  FastDense(2, 16, tanh),
+                  FastDense(16, 2))
+prob_neuralode = NeuralODE(dudt2, (0.0,5.0), Tsit5(), saveat = tsteps)
+
+function loss_neuralode(p)
+    pred = Array(prob_neuralode(u0, p))
+    loss = sum(abs2, (ode_data[:,1:size(pred,2)] .- pred))
+    return loss, pred
+end
+
+
+result_neuralode = DiffEqFlux.sciml_train(loss_neuralode, prob_neuralode.p,
+                                          ADAM(0.05),
+										  maxiters = 300)
+
+single_shoot_loss = general_loss_function(result_neuralode)
+println("single_shoot_loss: ",single_shoot_loss)
+
+# Define parameters for Multiple Shooting
+grp_size_param = 1
+loss_multiplier_param = 100
+
+neural_ode_f(u,p,t) = dudt2(u,p)
+prob_param = ODEProblem(neural_ode_f, u0, tspan, initial_params(dudt2))
+
+function loss_function_param(ode_data, pred):: Float32
+	return sum(abs2, (ode_data .- pred))^2
+end
+
+function loss_neuralode_param(p)
+	return multiple_shoot(p, ode_data, tsteps, prob_param, loss_function_param, grp_size_param, loss_multiplier_param)
+end
+
+
+multiple_shoot_result_neuralode_1 = DiffEqFlux.sciml_train(loss_neuralode_param, prob_neuralode.p,
+                                          ADAM(0.05),
+                                          maxiters = 300)
+
+multiple_shoot_loss_1 = general_loss_function(multiple_shoot_result_neuralode_1)
+println("multiple_shoot_loss_1: ",multiple_shoot_loss_1)
+
+
+# test for grp_size = 1
+@test multiple_shoot_loss_1 < single_shoot_loss
+
+# test for grp_size = 5
+grp_size_param = 5
+multiple_shoot_result_neuralode_2 = DiffEqFlux.sciml_train(loss_neuralode_param, prob_neuralode.p,
+                                          ADAM(0.05),
+                                          maxiters = 300)
+
+multiple_shoot_loss_2 = general_loss_function(multiple_shoot_result_neuralode_2)
+println("multiple_shoot_loss_2: ",multiple_shoot_loss_2)
+
+@test multiple_shoot_loss_2 < single_shoot_loss