|
| 1 | +# %% [markdown] |
| 2 | +# # Evaluating a `SINDy` model |
| 3 | +# |
| 4 | +# There's several broad ways to evaluate pysindy models: |
| 5 | +# * The functional form, or coefficient evaluation |
| 6 | +# * The instantaneous derivative prediction |
| 7 | +# * The simulation forwards in time |
| 8 | +# |
| 9 | +# Each of these methods may be the best for a given use case, |
| 10 | +# but each also have unique challenges. |
| 11 | +# This notebook will take you through different ways you can evaluate a pysindy model. |
| 12 | +# It will illustrate the use cases and the drawbacks of each. |
| 13 | +# |
| 14 | +# |
| 15 | +# Let's take a look at some models. Some will be good, some will be bad |
| 16 | +# %% |
| 17 | +import matplotlib.pyplot as plt |
| 18 | +import numpy as np |
| 19 | +from sklearn.metrics import mean_absolute_error |
| 20 | +from sklearn.metrics import mean_squared_error |
| 21 | + |
| 22 | +if __name__ != "testing": |
| 23 | + from example_data import get_models |
| 24 | +else: |
| 25 | + from mock_data import get_models |
| 26 | + |
| 27 | +from utils import compare_coefficient_plots |
| 28 | + |
| 29 | +# %% |
| 30 | +t, x, good_model, ok_model, bad_model = get_models() |
| 31 | + |
| 32 | +# %% [markdown] |
| 33 | +# Our data is the Lorenz system, and each model is trained on progressively less data: |
| 34 | + |
| 35 | +# %% |
| 36 | +ax = plt.figure().add_subplot(projection="3d") |
| 37 | +ax.plot(x[:, 0], x[:, 1], x[:, 2]) |
| 38 | + |
| 39 | +# %% [markdown] |
| 40 | +# ## Coefficient evaluation |
| 41 | +# |
| 42 | +# The most straightforwards way to evaluate a model is by seeing how well it compares |
| 43 | +# to the true equations. The model coefficients are stored in `model.optimizer.coef_` |
| 44 | + |
| 45 | +# %% |
| 46 | +print("Good:\n") |
| 47 | +good_model.print() |
| 48 | +print("\nSo-so:") |
| 49 | +ok_model.print() |
| 50 | +print("\nBad:") |
| 51 | +bad_model.print() |
| 52 | + |
| 53 | +# %% |
| 54 | +good_model.optimizer.coef_ |
| 55 | + |
| 56 | +# %% [markdown] |
| 57 | +# This as an array, with one equation per row. Each column represents a term from |
| 58 | +# `model.get_feature_names()`, so it is possible to construct an array representing |
| 59 | +# the true coefficients, then compare the difference. |
| 60 | +# |
| 61 | + |
| 62 | +# %% |
| 63 | +features = good_model.get_feature_names() |
| 64 | +features |
| 65 | + |
| 66 | +# %% |
| 67 | +lorenz_coef = np.array( |
| 68 | + [ |
| 69 | + [0, -10, 10, 0, 0, 0, 0, 0, 0, 0], |
| 70 | + [0, 28, -1, 0, 0, 0, -1, 0, 0, 0], |
| 71 | + [0, 0, 0, -8.0 / 3, 0, 1, 0, 0, 0, 0], |
| 72 | + ], |
| 73 | + dtype=float, |
| 74 | +) |
| 75 | +print(f"Good model MSE: {mean_squared_error(lorenz_coef, good_model.optimizer.coef_)}") |
| 76 | +print(f"Good model MAE: {mean_absolute_error(lorenz_coef, good_model.optimizer.coef_)}") |
| 77 | +print(f"Ok model MSE: {mean_squared_error(lorenz_coef, ok_model.optimizer.coef_)}") |
| 78 | +print(f"Ok model MAE: {mean_absolute_error(lorenz_coef, ok_model.optimizer.coef_)}") |
| 79 | +print(f"bad model MSE: {mean_squared_error(lorenz_coef, bad_model.optimizer.coef_)}") |
| 80 | +print(f"bad model MAE: {mean_absolute_error(lorenz_coef, bad_model.optimizer.coef_)}") |
| 81 | + |
| 82 | +# %% [markdown] |
| 83 | +# These coefficients can also be compared visually, e.g. using `pyplot.imshow` or |
| 84 | +# `seaborn.heatmap`. The pysindy-experiments package has some plotting and comparison |
| 85 | +# utilities which have been copied into an adjacent utility module: |
| 86 | + |
| 87 | +# %% |
| 88 | +fig = plt.figure(figsize=[8, 4]) |
| 89 | +axes = fig.subplots(1, 4) |
| 90 | +fig = compare_coefficient_plots( |
| 91 | + good_model.optimizer.coef_, |
| 92 | + lorenz_coef, |
| 93 | + input_features=["x", "y", "z"], |
| 94 | + feature_names=good_model.get_feature_names(), |
| 95 | + axs=axes[:2], |
| 96 | +) |
| 97 | +axes[1].set_title("Good model") |
| 98 | +fig = compare_coefficient_plots( |
| 99 | + ok_model.optimizer.coef_, |
| 100 | + lorenz_coef, |
| 101 | + input_features=["x", "y", "z"], |
| 102 | + feature_names=ok_model.get_feature_names(), |
| 103 | + axs=[axes[0], axes[2]], |
| 104 | +) |
| 105 | +axes[2].set_title("OK model") |
| 106 | +fig = compare_coefficient_plots( |
| 107 | + bad_model.optimizer.coef_, |
| 108 | + lorenz_coef, |
| 109 | + input_features=["x", "y", "z"], |
| 110 | + feature_names=bad_model.get_feature_names(), |
| 111 | + axs=[axes[0], axes[3]], |
| 112 | +) |
| 113 | +axes[3].set_title("Bad model") |
| 114 | +plt.tight_layout() |
| 115 | + |
| 116 | +# %% [markdown] |
| 117 | +# Not all coefficients are equivalent, however. |
| 118 | +# E.g. a small coefficient in front of an $x^5$ term may mean more to you and your |
| 119 | +# use case than a larger constant coefficient. |
| 120 | +# There are different ways of evaluating how important each feature is, |
| 121 | +# but they all end up as weights in a call to a scoring metric. |
| 122 | +# In this example, weights are calculated as root mean square values of each feature. |
| 123 | + |
| 124 | +# %% |
| 125 | +weights = np.sqrt(np.sum(good_model.feature_library.transform(x) ** 2, axis=0) / len(x)) |
| 126 | +print( |
| 127 | + "weights are ", {feat: f"{weight:.0f}" for feat, weight in zip(features, weights)} |
| 128 | +) |
| 129 | + |
| 130 | +good_weighted_mse = mean_squared_error( |
| 131 | + lorenz_coef.T, good_model.optimizer.coef_.T, sample_weight=weights |
| 132 | +) |
| 133 | +good_weighted_mae = mean_absolute_error( |
| 134 | + lorenz_coef.T, good_model.optimizer.coef_.T, sample_weight=weights |
| 135 | +) |
| 136 | +print(f"Good model weighted MSE: {good_weighted_mse}") |
| 137 | +print(f"Good model weighted MAE: {good_weighted_mae}") |
| 138 | +ok_weighted_mse = mean_squared_error( |
| 139 | + lorenz_coef.T, ok_model.optimizer.coef_.T, sample_weight=weights |
| 140 | +) |
| 141 | +ok_weighted_mae = mean_absolute_error( |
| 142 | + lorenz_coef.T, ok_model.optimizer.coef_.T, sample_weight=weights |
| 143 | +) |
| 144 | +print(f"Ok model weighted MSE: {ok_weighted_mse}") |
| 145 | +print(f"Ok model weighted MAE: {ok_weighted_mae}") |
| 146 | +bad_weighted_mse = mean_squared_error( |
| 147 | + lorenz_coef.T, bad_model.optimizer.coef_.T, sample_weight=weights |
| 148 | +) |
| 149 | +bad_weighted_mae = mean_absolute_error( |
| 150 | + lorenz_coef.T, bad_model.optimizer.coef_.T, sample_weight=weights |
| 151 | +) |
| 152 | +print(f"Bad model weighted MSE: {bad_weighted_mse}") |
| 153 | +print(f"Bad model weighted MAE: {bad_weighted_mae}") |
| 154 | + |
| 155 | +# %% [markdown] |
| 156 | +# There are other ways of evaluating model coefficients beyond these metrics, |
| 157 | +# the most popular being ways of mathematically analyzing the stability of the discovered model. |
| 158 | +# These are beyond the scope of the tutorial, but look at the notebooks on `StabilizedLinearSR3` |
| 159 | +# and `TrappingSR3`. |
| 160 | + |
| 161 | +# %% [markdown] |
| 162 | +# ## Prediction |
| 163 | +# |
| 164 | +# |
| 165 | + |
| 166 | +# %% [markdown] |
| 167 | +# Sometimes, there's no simple way to evaluate the functional form of the discovered model. |
| 168 | +# Some use cases, such as model predictive control, care about immediate prediction |
| 169 | +# but not the analytic differences between functions. |
| 170 | +# In these cases, it makes the most sense to evaluate the predictive capability. |
| 171 | +# |
| 172 | +# A great example is the nonlinear pendulum. |
| 173 | +# If a pendulum is swinging close to origin, $f(x)=x$ and $f(x)=\sin(x)$ |
| 174 | +# are very close together. |
| 175 | +# Even though coefficient metrics would say that these are different functions, |
| 176 | +# they yield very similar predictions, as shown below: |
| 177 | + |
| 178 | +# %% |
| 179 | +# +/- 30 degrees from bottom dead center |
| 180 | +pendulum_angles = np.pi / 180 * np.linspace(-30, 30, 21) |
| 181 | +plt.plot(pendulum_angles, np.sin(pendulum_angles), label=r"$f(x)=\sin(x)$") |
| 182 | +plt.plot(pendulum_angles, pendulum_angles, label=r"$f(x)=x$") |
| 183 | +plt.legend() |
| 184 | + |
| 185 | +# %% [markdown] |
| 186 | +# This occurs because the features are nearly collinear. |
| 187 | +# Understanding and compensating for collinearity of features in the function library |
| 188 | +# is a challenge. We can avoid that difficulty if we just score models based upon prediction. |
| 189 | +# |
| 190 | +# Fortunately, `model.score` depends upon `model.predict()`, which makes evaluating models |
| 191 | +# based upon prediction more straightforwards than evaluating the coefficients of the discovered |
| 192 | +# model. |
| 193 | + |
| 194 | +# %% |
| 195 | +good_model.score(x, t) |
| 196 | + |
| 197 | +# %% [markdown] |
| 198 | +# We can inspect the predicted vs observed phase space and gradient plots for visual equivalence |
| 199 | +# as well as look for systemic bias in prediction |
| 200 | + |
| 201 | +# %% |
| 202 | +fig = plt.figure(figsize=[6, 3]) |
| 203 | +axes = fig.subplots(1, 3) |
| 204 | +for ax, model, name in zip( |
| 205 | + axes, (good_model, ok_model, bad_model), ("good", "ok", "bad") |
| 206 | +): |
| 207 | + x_dot_pred = model.predict(x) |
| 208 | + x_dot_true = model.differentiation_method(x, t[1] - t[0]) |
| 209 | + ax.scatter(x_dot_true, x_dot_pred - x_dot_true) |
| 210 | + ax.set_title(name) |
| 211 | +fig.suptitle("Is there a systemic bias?") |
| 212 | +axes[1].set_xlabel("'True' value") |
| 213 | +axes[0].set_ylabel("Prediction error") |
| 214 | +fig.tight_layout() |
| 215 | + |
| 216 | +# %% [markdown] |
| 217 | +# **WARNING!** All of the predictive measurements compare predictions with an |
| 218 | +# ostensibly 'true' $\dot x$, which is typically not available. |
| 219 | +# All these examples use the x_dot calculated in the first step of SINDy as "true". |
| 220 | +# Even `score()` does so internally. |
| 221 | +# If using a differentiation method that oversmooths (in the limit, to a constant line), |
| 222 | +# models that predict smaller values of $\dot x$ (in the limit, $\dot x=0$) |
| 223 | +# will score the best. |
| 224 | + |
| 225 | +# %% [markdown] |
| 226 | +# ## Simulation |
| 227 | +# |
| 228 | +# If we want to understand the behavior of a system over time, there's no substitute for simulation. |
| 229 | +# However, nolinear models of the type of SINDy are not guaranteed to have trajectories |
| 230 | +# beyond a certain duration. |
| 231 | +# Attempts to simulate near or beyond that duration may bog down or fail the solver. |
| 232 | +# Here's an example of a system that explodes in finite time, and cannot be simulated |
| 233 | +# beyond it: $$ \dot x = x^2 $$ |
| 234 | +# |
| 235 | +# There is no straightforwards way to guarantee |
| 236 | +# that a system will not blow up or go off to infinity, |
| 237 | +# but a good prediction or coefficient score is a decent indication. |
| 238 | + |
| 239 | +# %% |
| 240 | +x_sim_good = good_model.simulate(x[0], t) |
| 241 | +x_sim_ok = ok_model.simulate(x[0], t) |
| 242 | + |
| 243 | +# %% [markdown] |
| 244 | +# The bad model becomes stiff, potentially blowing up. |
| 245 | +# A stiffer solver may be able to integrate in cases where blow-up occurs, |
| 246 | +# but these cases require individual attention and are beyond the scope of this |
| 247 | +# tutorial. Run the following cell to see the kinds of warnings that occur, |
| 248 | +# but you will likely have to interrupt the kernel. |
| 249 | + |
| 250 | +# %% |
| 251 | +# x_sim_bad = bad_model.simulate(x[0], t) |
| 252 | + |
| 253 | +# %% [markdown] |
| 254 | +# The second problem with simulation is that the simulated dynamics may capture some essential |
| 255 | +# aspect of the problem, but the numerical difference suggests a poor model. |
| 256 | +# This simultaneous correctness and incorrectness can occur if the model recreates the |
| 257 | +# exact period and shape of an oscillation, but is out of phase |
| 258 | +# (e.g. confusing the predator and prey in lotka volterra). |
| 259 | +# It can also occur in chaotic systems, which may mirror the true system very well for a time |
| 260 | +# but must eventually diverge dramatically. Our model and true Lorenz system are chaotic: |
| 261 | +# |
| 262 | + |
| 263 | +# %% |
| 264 | +fig, axs = plt.subplots(3, 2) |
| 265 | +for col, (x_sim, name) in enumerate(zip((x_sim_good, x_sim_ok), ("good", "ok"))): |
| 266 | + axs[0, col].set_title(f"{name} model") |
| 267 | + for coord_ind in range(3): |
| 268 | + axs[coord_ind, col].plot(t, x[:, coord_ind]) |
| 269 | + axs[coord_ind, col].plot(t, x_sim[:, coord_ind]) |
| 270 | +fig.suptitle("Even an accurate model of a chaotic system will look bad in time") |
| 271 | + |
| 272 | +# %% [markdown] |
| 273 | +# However, chaotic systems have a useful property. |
| 274 | +# Their aperiodic trajectories sweep out a probability distribution. |
| 275 | +# And though this distribution is complex and potentially low-or-fractal-dimensional, |
| 276 | +# we can estimate a distribution from the data and see how much the simulation diverges from |
| 277 | +# that distribution. |
| 278 | +# The log likelihood measures divergence in nats, a continuous quantity from information theory. |
| 279 | +# |
| 280 | +# Here, create the reference distribution from a Gaussian KDE of the true data: |
| 281 | + |
| 282 | +# %% |
| 283 | +from sklearn.neighbors import KernelDensity |
| 284 | + |
| 285 | +kde = KernelDensity(kernel="gaussian").fit(x) |
| 286 | +base_likelihood = kde.score_samples(x).sum() |
| 287 | + |
| 288 | +# %% |
| 289 | +good_excess_loss = base_likelihood - kde.score_samples(x_sim_good).sum() |
| 290 | +ok_excess_loss = base_likelihood - kde.score_samples(x_sim_ok).sum() |
| 291 | +print(f"Our best model loses {good_excess_loss} nats of information") |
| 292 | +print(f"Our ok model loses {ok_excess_loss} nats of information") |
| 293 | + |
| 294 | +# %% [markdown] |
| 295 | +# ## What to do with this information |
| 296 | +# |
| 297 | +# These methods all help you evaluate whether a model is fitting well, |
| 298 | +# and given several models, help you evaluate the best for a given application. |
| 299 | +# If a model is performing poorly, it may be possible to construct a better SINDy model. |
| 300 | +# The next tutorial describes how to choose better components for the SINDy model. |
0 commit comments