SciML
diff --git a/‎docs/pages.jl‎
Lines changed: 3 additions & 3 deletions b/‎docs/pages.jl‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/src/examples/neural_ode/simplechains.md‎
Lines changed: 2 additions & 1 deletion b/‎docs/src/examples/neural_ode/simplechains.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/src/examples/optimal_control/optimal_control.md‎
Lines changed: 6 additions & 3 deletions b/‎docs/src/examples/optimal_control/optimal_control.md‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎docs/src/examples/pde/brusselator.md‎
Lines changed: 43 additions & 39 deletions b/‎docs/src/examples/pde/brusselator.md‎
Lines changed: 43 additions & 39 deletions
diff --git a/‎docs/src/examples/sde/SDE_control.md‎
Lines changed: 8 additions & 4 deletions b/‎docs/src/examples/sde/SDE_control.md‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎docs/src/manual/differential_equation_sensitivities.md‎
Lines changed: 4 additions & 3 deletions b/‎docs/src/manual/differential_equation_sensitivities.md‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎docs/src/tutorials/adjoint_continuous_functional.md‎
Lines changed: 4 additions & 2 deletions b/‎docs/src/tutorials/adjoint_continuous_functional.md‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎docs/src/tutorials/data_parallel.md‎
Lines changed: 6 additions & 3 deletions b/‎docs/src/tutorials/data_parallel.md‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎docs/src/tutorials/direct_sensitivity.md‎
Lines changed: 2 additions & 1 deletion b/‎docs/src/tutorials/direct_sensitivity.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/src/tutorials/training_tips/divergence.md‎
Lines changed: 2 additions & 1 deletion b/‎docs/src/tutorials/training_tips/divergence.md‎
Lines changed: 2 additions & 1 deletion
@@ -21,9 +21,9 @@ pages = ["index.md",
             "examples/sde/SDE_control.md"],
         "Delay Differential Equations (DDEs)" => Any["examples/dde/delay_diffeq.md"],
         "Partial Differential Equations (PDEs)" => Any[
-                "examples/pde/pde_constrained.md",
-                "examples/pde/brusselator.md"
-            ],
+            "examples/pde/pde_constrained.md",
+            "examples/pde/brusselator.md"
+        ],
         "Hybrid and Jump Equations" => Any["examples/hybrid_jump/hybrid_diffeq.md",
             "examples/hybrid_jump/bouncing_ball.md"],
         "Bayesian Estimation" => Any["examples/bayesian/turing_bayesian.md"],
 
@@ -7,7 +7,8 @@
 First, we'll need data for training the NeuralODE, which can be obtained by solving the ODE `u' = f(u,p,t)` numerically using the SciML ecosystem in Julia.
 
 ```@example sc_neuralode
-import SimpleChains, OrdinaryDiffEq as ODE, SciMLSensitivity as SMS, Optimization as OPT, OptimizationOptimisers as OPO, Plots
+import SimpleChains, OrdinaryDiffEq as ODE, SciMLSensitivity as SMS, Optimization as OPT,
+       OptimizationOptimisers as OPO, Plots
 using StaticArrays: @SArray, @SMatrix
 
 u0 = @SArray Float32[2.0, 0.0]
 
@@ -87,9 +87,11 @@ cb = function (state, l; doplot = true)
     ps = CA.ComponentArray(state.u, ax)
 
     if doplot
-        p = Plots.plot(ODE.solve(ODE.remake(prob, p = state.u), ODE.Tsit5(), saveat = 0.01),
+        p = Plots.plot(
+            ODE.solve(ODE.remake(prob, p = state.u), ODE.Tsit5(), saveat = 0.01),
             ylim = (-6, 6), lw = 3)
-        Plots.plot!(p, ts, [first(first(ann([t], ps, st))) for t in ts], label = "u(t)", lw = 3)
+        Plots.plot!(
+            p, ts, [first(first(ann([t], ps, st))) for t in ts], label = "u(t)", lw = 3)
         display(p)
     end
 
@@ -132,7 +134,8 @@ Now let's see what we received:
 ```@example neuraloptimalcontrol
 l = loss_adjoint(res3.u)
 cb(res3, l)
-p = Plots.plot(ODE.solve(ODE.remake(prob, p = res3.u), ODE.Tsit5(), saveat = 0.01), ylim = (-6, 6), lw = 3)
+p = Plots.plot(ODE.solve(ODE.remake(prob, p = res3.u), ODE.Tsit5(), saveat = 0.01), ylim = (
+    -6, 6), lw = 3)
 Plots.plot!(p, ts, [first(first(ann([t], CA.ComponentArray(res3.u, ax), st))) for t in ts],
     label = "u(t)", lw = 3)
 ```
@@ -56,7 +56,6 @@ giving us a field tensor of shape $(N, N, 2)$. This structure is flexible and ex
 
 ## Finite Difference Laplacian and Forcing
 
-
 For spatial derivatives, we apply a second-order central difference scheme using a three-point stencil. The Laplacian is discretized as:
 
 $$[\ 1,\ -2,\ 1\ ]$$
@@ -65,19 +64,19 @@ in both the $ x $ and $ y $ directions, forming a tridiagonal structure in both
 
 ## Generating Training Data
 
-This provides us with an `ODEProblem` that can be solved to obtain training data. 
+This provides us with an `ODEProblem` that can be solved to obtain training data.
 
 ```@example bruss
 import ComponentArrays as CA, Random, Plots, OrdinaryDiffEq as ODE
 import SciMLBase
 
 N_GRID = 16
-XYD = range(0f0, stop = 1f0, length = N_GRID)
+XYD = range(0.0f0, stop = 1.0f0, length = N_GRID)
 dx = step(XYD)
 T_FINAL = 11.5f0
 SAVE_AT = 0.5f0
 tspan = (0.0f0, T_FINAL)
-t_points = range(tspan[1], stop=tspan[2], step=SAVE_AT)
+t_points = range(tspan[1], stop = tspan[2], step = SAVE_AT)
 A, B, alpha = 3.4f0, 1.0f0, 10.0f0
 
 brusselator_f(x, y, t) = (((x - 0.3f0)^2 + (y - 0.6f0)^2) <= 0.01f0) * (t >= 1.1f0) * 5.0f0
@@ -88,8 +87,8 @@ function init_brusselator(xyd)
     u0 = zeros(Float32, N_GRID, N_GRID, 2)
     for I in CartesianIndices((N_GRID, N_GRID))
         x, y = xyd[I[1]], xyd[I[2]]
-        u0[I,1] = 22f0 * (y * (1f0 - y))^(3f0/2f0)
-        u0[I,2] = 27f0 * (x * (1f0 - x))^(3f0/2f0)
+        u0[I, 1] = 22.0f0 * (y * (1.0f0 - y))^(3.0f0/2.0f0)
+        u0[I, 2] = 27.0f0 * (x * (1.0f0 - x))^(3.0f0/2.0f0)
     end
     println("[Init] Done.")
     return u0
@@ -104,22 +103,23 @@ function pde_truth!(du, u, p, t)
         x, y = XYD[i], XYD[j]
         ip1, im1 = limit(i+1, N_GRID), limit(i-1, N_GRID)
         jp1, jm1 = limit(j+1, N_GRID), limit(j-1, N_GRID)
-        U, V = u[i,j,1], u[i,j,2]
-        ΔU = u[im1,j,1] + u[ip1,j,1] + u[i,jp1,1] + u[i,jm1,1] - 4f0 * U
-        ΔV = u[im1,j,2] + u[ip1,j,2] + u[i,jp1,2] + u[i,jm1,2] - 4f0 * V
-        du[i,j,1] = αdx*ΔU + B + U^2 * V - (A+1f0)*U + brusselator_f(x, y, t)
-        du[i,j,2] = αdx*ΔV + A*U - U^2 * V
+        U, V = u[i, j, 1], u[i, j, 2]
+        ΔU = u[im1, j, 1] + u[ip1, j, 1] + u[i, jp1, 1] + u[i, jm1, 1] - 4.0f0 * U
+        ΔV = u[im1, j, 2] + u[ip1, j, 2] + u[i, jp1, 2] + u[i, jm1, 2] - 4.0f0 * V
+        du[i, j, 1] = αdx*ΔU + B + U^2 * V - (A+1.0f0)*U + brusselator_f(x, y, t)
+        du[i, j, 2] = αdx*ΔV + A*U - U^2 * V
     end
 end
 
 p_tuple = (A, B, alpha, dx)
-@time sol_truth = ODE.solve(ODE.ODEProblem(pde_truth!, u0, tspan, p_tuple), ODE.FBDF(), saveat=t_points)
+@time sol_truth = ODE.solve(ODE.ODEProblem(pde_truth!, u0, tspan, p_tuple), ODE.FBDF(), saveat = t_points)
 u_true = Array(sol_truth)
 ```
 
 ## Visualizing Mean Concentration Over Time
 
 We can now use this code for training our UDE, and generating time-series plots of the concentrations of species of U and V using the code:
+
 ```@example bruss
 import Plots, Statistics
 
@@ -128,9 +128,10 @@ avg_U = [Statistics.mean(snapshot[:, :, 1]) for snapshot in sol_truth.u]
 avg_V = [Statistics.mean(snapshot[:, :, 2]) for snapshot in sol_truth.u]
 
 # Plot average concentrations over time
-Plots.plot(sol_truth.t, avg_U, label="Mean U", lw=2, xlabel="Time", ylabel="Concentration",
-     title="Mean Concentration of U and V Over Time")
-Plots.plot!(sol_truth.t, avg_V, label="Mean V", lw=2, linestyle=:dash)
+Plots.plot(
+    sol_truth.t, avg_U, label = "Mean U", lw = 2, xlabel = "Time", ylabel = "Concentration",
+    title = "Mean Concentration of U and V Over Time")
+Plots.plot!(sol_truth.t, avg_V, label = "Mean V", lw = 2, linestyle = :dash)
 ```
 
 With the ground truth data generated and visualized, we are now ready to construct a Universal Differential Equation (UDE) by replacing the nonlinear term  $U^2V$ with a neural network. The next section outlines how we define this hybrid model and train it to recover the reaction dynamics from data.
@@ -154,7 +155,8 @@ Here, $\mathcal{N}_\theta(U, V)$ is trained to approximate the true interaction
 First, we have to define and configure the neural network that has to be used for the training. The implementation for that is as follows:
 
 ```@example bruss
-import Lux, Random, Optimization as OPT, OptimizationOptimJL as OOJ, SciMLSensitivity as SMS, Zygote
+import Lux, Random, Optimization as OPT, OptimizationOptimJL as OOJ,
+       SciMLSensitivity as SMS, Zygote
 
 model = Lux.Chain(Lux.Dense(2 => 16, tanh), Lux.Dense(16 => 1))
 rng = Random.default_rng()
@@ -166,14 +168,15 @@ We use a simple fully connected neural network with one hidden layer of 16 tanh-
 
 To ensure consistency between the ground truth simulation and the learned Universal Differential Equation (UDE) model, we preserve the same spatial discretization scheme used in the original ODEProblem. This includes:
 
-* the finite difference Laplacian,
-* periodic boundary conditions, and
-* the external forcing function.
+  - the finite difference Laplacian,
+  - periodic boundary conditions, and
+  - the external forcing function.
 
-The only change lies in the replacement of the known nonlinear term $U^2V$ with a neural network approximation 
+The only change lies in the replacement of the known nonlinear term $U^2V$ with a neural network approximation
 $\mathcal{N}_\theta(U, V)$. This design enables the UDE to learn complex or unknown dynamics from data while maintaining the underlying physical structure of the system.
 
 The function below implements this hybrid formulation:
+
 ```@example bruss
 function pde_ude!(du, u, ps_nn, t)
     αdx = alpha / dx^2
@@ -182,22 +185,24 @@ function pde_ude!(du, u, ps_nn, t)
         x, y = XYD[i], XYD[j]
         ip1, im1 = limit(i+1, N_GRID), limit(i-1, N_GRID)
         jp1, jm1 = limit(j+1, N_GRID), limit(j-1, N_GRID)
-        U, V = u[i,j,1], u[i,j,2]
-        ΔU = u[im1,j,1] + u[ip1,j,1] + u[i,jp1,1] + u[i,jm1,1] - 4f0 * U
-        ΔV = u[im1,j,2] + u[ip1,j,2] + u[i,jp1,2] + u[i,jm1,2] - 4f0 * V
+        U, V = u[i, j, 1], u[i, j, 2]
+        ΔU = u[im1, j, 1] + u[ip1, j, 1] + u[i, jp1, 1] + u[i, jm1, 1] - 4.0f0 * U
+        ΔV = u[im1, j, 2] + u[ip1, j, 2] + u[i, jp1, 2] + u[i, jm1, 2] - 4.0f0 * V
         nn_val, _ = model([U, V], ps_nn, st)
         val = nn_val[1]
-        du[i,j,1] = αdx*ΔU + B + val - (A+1f0)*U + brusselator_f(x, y, t)
-        du[i,j,2] = αdx*ΔV + A*U - val
+        du[i, j, 1] = αdx*ΔU + B + val - (A+1.0f0)*U + brusselator_f(x, y, t)
+        du[i, j, 2] = αdx*ΔV + A*U - val
     end
 end
 prob_ude_template = ODE.ODEProblem(pde_ude!, u0, tspan, ps_init)
 ```
+
 ## Loss Function and Optimization
-To train the neural network 
+
+To train the neural network
 $\mathcal{N}_\theta(U, V)$ embedded in the UDE, we define a loss function that measures how closely the solution of the UDE matches the ground truth data generated earlier.
 
-The loss is computed as the sum of squared errors between the predicted solution from the UDE and the true solution at each saved time point. If the solver fails (e.g., due to numerical instability or incorrect parameters), we return an infinite loss to discard that configuration during optimization. We use ```FBDF()``` as the solver due to the stiff nature of the brusselators euqation. Other solvers like ```KenCarp47()``` could also be used. 
+The loss is computed as the sum of squared errors between the predicted solution from the UDE and the true solution at each saved time point. If the solver fails (e.g., due to numerical instability or incorrect parameters), we return an infinite loss to discard that configuration during optimization. We use `FBDF()` as the solver due to the stiff nature of the brusselators euqation. Other solvers like `KenCarp47()` could also be used.
 
 To efficiently compute gradients of the loss with respect to the neural network parameters, we use an adjoint sensitivity method (`GaussAdjoint`), which performs high-accuracy quadrature-based integration of the adjoint equations. This approach enables scalable and memory-efficient training for stiff PDEs by avoiding full trajectory storage while maintaining accurate gradient estimates.
 
@@ -206,8 +211,8 @@ The loss function and initial evaluation are implemented as follows:
 ```@example bruss
 println("[Loss] Defining loss function...")
 function loss_fn(ps, _)
-    prob = ODE.remake(prob_ude_template, p=ps)
-    sol = ODE.solve(prob, ODE.FBDF(), saveat=t_points)
+    prob = ODE.remake(prob_ude_template, p = ps)
+    sol = ODE.solve(prob, ODE.FBDF(), saveat = t_points)
     # Failed solve 
     if !SciMLBase.successful_retcode(sol)
         return Inf32
@@ -218,7 +223,7 @@ function loss_fn(ps, _)
 end
 ```
 
-Once the loss function is defined, we use the ADAM optimizer to train the neural network. The optimization problem is defined using SciML's ```Optimization.jl``` tools, and gradients are computed via automatic differentiation using ```AutoZygote()``` from ```SciMLSensitivity```:
+Once the loss function is defined, we use the ADAM optimizer to train the neural network. The optimization problem is defined using SciML's `Optimization.jl` tools, and gradients are computed via automatic differentiation using `AutoZygote()` from `SciMLSensitivity`:
 
 ```@example bruss
 println("[Training] Starting optimization...")
@@ -227,7 +232,6 @@ optf = OPT.OptimizationFunction(loss_fn, SMS.AutoZygote())
 optprob = OPT.OptimizationProblem(optf, ps_init)
 loss_history = Float32[]
 
-
 callback = (ps, l) -> begin
     push!(loss_history, l)
     println("Epoch $(length(loss_history)): Loss = $l")
@@ -238,7 +242,7 @@ end
 Finally to run everything:
 
 ```@example bruss
-res = OPT.solve(optprob, OPO.Optimisers.Adam(0.01), callback=callback, maxiters=100)
+res = OPT.solve(optprob, OPO.Optimisers.Adam(0.01), callback = callback, maxiters = 100)
 ```
 
 ```@example bruss
@@ -248,22 +252,22 @@ res.objective
 ```@example bruss
 println("[Plot] Final U/V comparison plots...")
 center = N_GRID ÷ 2
-sol_final = ODE.solve(ODE.remake(prob_ude_template, p=res.u), ODE.FBDF(), saveat=t_points)
+sol_final = ODE.solve(ODE.remake(prob_ude_template, p = res.u), ODE.FBDF(), saveat = t_points)
 pred = Array(sol_final)
 
-p1 = Plots.plot(t_points, u_true[center,center,1,:], lw=2, label="U True")
-Plots.plot!(p1, t_points, pred[center,center,1,:], lw=2, ls=:dash, label="U Pred")
+p1 = Plots.plot(t_points, u_true[center, center, 1, :], lw = 2, label = "U True")
+Plots.plot!(p1, t_points, pred[center, center, 1, :], lw = 2, ls = :dash, label = "U Pred")
 Plots.title!(p1, "Center U Concentration Over Time")
 
-p2 = Plots.plot(t_points, u_true[center,center,2,:], lw=2, label="V True")
-Plots.plot!(p2, t_points, pred[center,center,2,:], lw=2, ls=:dash, label="V Pred")
+p2 = Plots.plot(t_points, u_true[center, center, 2, :], lw = 2, label = "V True")
+Plots.plot!(p2, t_points, pred[center, center, 2, :], lw = 2, ls = :dash, label = "V Pred")
 Plots.title!(p2, "Center V Concentration Over Time")
 
-Plots.plot(p1, p2, layout=(1,2), size=(900,400))
+Plots.plot(p1, p2, layout = (1, 2), size = (900, 400))
 ```
 
 ## Results and Conclusion
 
 After training the Universal Differential Equation (UDE), we compared the predicted dynamics to the ground truth for both chemical species.
 
-The low training loss shows us that the neural network in the UDE was able to understand the underlying dynamics, and it was able to learn the $U^2V$ term in the partial differential equation. 
+The low training loss shows us that the neural network in the UDE was able to understand the underlying dynamics, and it was able to learn the $U^2V$ term in the partial differential equation.
@@ -164,7 +164,8 @@ NG = DNP.NoiseGrid(myparameters.ts, W1)
 p_all = CA.ComponentArray(p_nn = p_nn,
     myparameters = [myparameters.Δ, myparameters.Ωmax, myparameters.κ])
 # define SDE problem
-prob = SDE.SDEProblem{true}(qubit_drift!, qubit_diffusion!, vec(u0[:, 1]), myparameters.tspan,
+prob = SDE.SDEProblem{true}(
+    qubit_drift!, qubit_diffusion!, vec(u0[:, 1]), myparameters.tspan,
     p_all,
     callback = callback, noise = NG)
 
@@ -175,7 +176,8 @@ function g(u, p, t)
     cdR = @view u[2, :, :]
     ceI = @view u[3, :, :]
     cdI = @view u[4, :, :]
-    p[1] * Statistics.mean((cdR .^ 2 + cdI .^ 2) ./ (ceR .^ 2 + cdR .^ 2 + ceI .^ 2 + cdI .^ 2))
+    p[1] *
+    Statistics.mean((cdR .^ 2 + cdI .^ 2) ./ (ceR .^ 2 + cdR .^ 2 + ceI .^ 2 + cdI .^ 2))
 end
 
 function loss(p_nn; alg = SDE.EM(), sensealg = SMS.BacksolveAdjoint(autojacvec = SMS.ReverseDiffVJP()))
@@ -502,7 +504,8 @@ NG = DNP.NoiseGrid(myparameters.ts, W1)
 p_all = CA.ComponentArray(p_nn = p_nn,
     myparameters = [myparameters.Δ; myparameters.Ωmax; myparameters.κ])
 # define SDE problem
-prob = SDE.SDEProblem{true}(qubit_drift!, qubit_diffusion!, vec(u0[:, 1]), myparameters.tspan,
+prob = SDE.SDEProblem{true}(
+    qubit_drift!, qubit_diffusion!, vec(u0[:, 1]), myparameters.tspan,
     p_all,
     callback = callback, noise = NG)
 ```
@@ -526,7 +529,8 @@ function g(u, p, t)
     cdR = @view u[2, :, :]
     ceI = @view u[3, :, :]
     cdI = @view u[4, :, :]
-    p[1] * Statistics.mean((cdR .^ 2 + cdI .^ 2) ./ (ceR .^ 2 + cdR .^ 2 + ceI .^ 2 + cdI .^ 2))
+    p[1] *
+    Statistics.mean((cdR .^ 2 + cdI .^ 2) ./ (ceR .^ 2 + cdR .^ 2 + ceI .^ 2 + cdI .^ 2))
 end
 
 function loss(p_nn; alg = SDE.EM(), sensealg = SMS.BacksolveAdjoint(autojacvec = SMS.ReverseDiffVJP()))
 
@@ -55,7 +55,8 @@ by which the derivative is computed. For example:
 
 ```julia
 function loss(u0, p)
-    sum(ODE.solve(prob, ODE.Tsit5(), u0 = u0, p = p, saveat = 0.1, sensealg = SMS.ForwardSensitivity()))
+    sum(ODE.solve(prob, ODE.Tsit5(), u0 = u0, p = p, saveat = 0.1,
+        sensealg = SMS.ForwardSensitivity()))
 end
 du0, dp = Zygote.gradient(loss, u0, p)
 ```
@@ -76,7 +77,7 @@ differentiation). Generally:
 
   - Continuous sensitivity analysis methods only support a subset of
     equations, which currently includes:
-
+    
       + ODEProblem (with mass matrices for differential-algebraic equations (DAEs)
       + SDEProblem
       + SteadyStateProblem / NonlinearProblem
@@ -113,7 +114,7 @@ is:
     `TrackerAdjoint` with an out-of-place definition may currently be the best option.
 
 !!! note
-
+    
     Compatibility with direct automatic differentiation algorithms (`ForwardDiffSensitivity`,
     `ReverseDiffAdjoint`, etc.) can be queried using the
     `SciMLBase.isautodifferentiable(::SciMLAlgorithm)` trait function.
 
@@ -85,7 +85,8 @@ To get the adjoint sensitivities, we call:
 ```@example continuousadjoint
 prob = ODE.ODEProblem(f, [1.0; 1.0], (0.0, 10.0), p)
 sol = ODE.solve(prob, ODE.DP8())
-res = SMS.adjoint_sensitivities(sol, ODE.Vern9(), dgdu_continuous = dg, g = g, abstol = 1e-8,
+res = SMS.adjoint_sensitivities(
+    sol, ODE.Vern9(), dgdu_continuous = dg, g = g, abstol = 1e-8,
     reltol = 1e-8)
 ```
 
@@ -99,7 +100,8 @@ import Calculus
 function G(p)
     tmp_prob = ODE.remake(prob, p = p)
     sol = ODE.solve(tmp_prob, ODE.Vern9(), abstol = 1e-14, reltol = 1e-14)
-    res, err = QuadGK.quadgk((t) -> sum(sol(t) .^ 2) ./ 2, 0.0, 10.0, atol = 1e-14, rtol = 1e-10)
+    res,
+    err = QuadGK.quadgk((t) -> sum(sol(t) .^ 2) ./ 2, 0.0, 10.0, atol = 1e-14, rtol = 1e-10)
     res
 end
 res2 = FD.gradient(G, [1.5, 1.0, 3.0])
 
@@ -170,13 +170,15 @@ solve this in serial with 100 trajectories. Note that `i` will thus run
 from `1:100`.
 
 ```@example dataparallel
-sim = ODE.solve(ensemble_prob, ODE.Tsit5(), ODE.EnsembleSerial(), saveat = 0.1, trajectories = 100)
+sim = ODE.solve(
+    ensemble_prob, ODE.Tsit5(), ODE.EnsembleSerial(), saveat = 0.1, trajectories = 100)
 ```
 
 and thus running in multithreading would be:
 
 ```@example dataparallel
-sim = ODE.solve(ensemble_prob, ODE.Tsit5(), ODE.EnsembleThreads(), saveat = 0.1, trajectories = 100)
+sim = ODE.solve(
+    ensemble_prob, ODE.Tsit5(), ODE.EnsembleThreads(), saveat = 0.1, trajectories = 100)
 ```
 
 This whole mechanism is differentiable, so we then put it in a training
@@ -193,7 +195,8 @@ Changing to distributed computing is very simple as well. The setup is
 all the same, except you utilize `EnsembleDistributed` as the ensembler:
 
 ```@example dataparallel
-sim = ODE.solve(ensemble_prob, ODE.Tsit5(), ODE.EnsembleDistributed(), saveat = 0.1, trajectories = 100)
+sim = ODE.solve(
+    ensemble_prob, ODE.Tsit5(), ODE.EnsembleDistributed(), saveat = 0.1, trajectories = 100)
 ```
 
 Note that for this to work, you need to ensure that your processes are
 
@@ -113,7 +113,8 @@ sensitivities, call:
 
 ```@example directsense
 ts = 0:0.5:10
-res = SMS.adjoint_sensitivities(sol, ODE.Vern9(), t = ts, dgdu_discrete = dg, abstol = 1e-14,
+res = SMS.adjoint_sensitivities(
+    sol, ODE.Vern9(), t = ts, dgdu_discrete = dg, abstol = 1e-14,
     reltol = 1e-14)
 ```
 
 
@@ -28,7 +28,8 @@ end
 A full example making use of this trick is:
 
 ```@example divergence
-import OrdinaryDiffEq as ODE, SciMLSensitivity as SMS, SciMLBase, Optimization as OPT, OptimizationOptimisers as OPO, Plots
+import OrdinaryDiffEq as ODE, SciMLSensitivity as SMS, SciMLBase, Optimization as OPT,
+       OptimizationOptimisers as OPO, Plots
 
 function lotka_volterra!(du, u, p, t)
     rab, wol = u