Skip to content

Commit 3518a9b

Browse files
Sample Weights (#673)
* Added sample weight to SINDy fit method. To do: account for WEAK sindy * Files to be modified * Core * No expand Weights * sampleConcatter * test_weights * Sample Weights new * Now weighted Weak PDE * No Set Config * No Space * For Pull Request * pre-commit * Reviewed changes * Adjusted comments * Discrete sindy sample weight fit * sample weight score * to pull * check_sample_Weight * adjusted check * sample_weights adjusted. Test score nut passing * almost * Final * Remove transform_sample_weights in sample concatter * adjusted for base optimizer * Removed SampleConcatter and Adjusted Coverage * Apply suggestion from @Jacob-Stevens-Haas Co-authored-by: Jacob Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> * Remove redundant test Remove test for SINDy model fitting with invalid inputs. * Remove drop_nan_sample * Remvoe conversion to axes, not needed in baseOpimizer as drop_nan_samples has been removed --------- Co-authored-by: Jacob Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
1 parent b7558be commit 3518a9b

File tree

8 files changed

+350
-67
lines changed

8 files changed

+350
-67
lines changed

pysindy/_core.py

Lines changed: 157 additions & 40 deletions
Large diffs are not rendered by default.

pysindy/deeptime.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from sklearn.utils.validation import check_is_fitted
66

77
from ._core import SINDy
8-
from .utils import SampleConcatter
98

109

1110
class SINDyEstimator(SINDy):
@@ -178,7 +177,6 @@ def __init__(
178177

179178
steps = [
180179
("features", feature_library),
181-
("shaping", SampleConcatter()),
182180
("model", optimizer),
183181
]
184182
self.model = Pipeline(steps)

pysindy/optimizers/base.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
from .._typing import Float2D
2121
from .._typing import FloatDType
2222
from ..utils import AxesArray
23-
from ..utils import drop_nan_samples
2423

2524
NFeat = NewType("NFeat", int)
2625
NTarget = NewType("NTarget", int)
@@ -203,10 +202,6 @@ def fit(self, x_, y, sample_weight=None, **reduce_kws):
203202
-------
204203
self : returns an instance of self
205204
"""
206-
x_ = AxesArray(np.asarray(x_), {"ax_sample": 0, "ax_coord": 1})
207-
y_axes = {"ax_sample": 0} if y.ndim == 1 else {"ax_sample": 0, "ax_coord": 1}
208-
y = AxesArray(np.asarray(y), y_axes)
209-
x_, y = drop_nan_samples(x_, y)
210205
x_, y = check_X_y(x_, y, accept_sparse=[], y_numeric=True, multi_output=True)
211206

212207
x, y, X_offset, y_offset, _, sample_weight_sqrt = _preprocess_data(

pysindy/utils/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from ._axes import AxesArray
22
from ._axes import comprehend_axes
33
from ._axes import concat_sample_axis
4-
from ._axes import SampleConcatter
54
from ._axes import wrap_axes
65
from .base import capped_simplex_projection
76
from .base import drop_nan_samples
@@ -47,7 +46,6 @@
4746

4847
__all__ = [
4948
"AxesArray",
50-
"SampleConcatter",
5149
"concat_sample_axis",
5250
"wrap_axes",
5351
"comprehend_axes",

pysindy/utils/_axes.py

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@
6969

7070
import numpy as np
7171
from numpy.typing import NDArray
72-
from sklearn.base import TransformerMixin
7372

7473
HANDLED_FUNCTIONS = {}
7574

@@ -826,22 +825,10 @@ def comprehend_axes(x):
826825
return axes
827826

828827

829-
class SampleConcatter(TransformerMixin):
830-
def __init__(self):
831-
pass
832-
833-
def fit(self, x_list, y_list=None):
834-
return self
835-
836-
def __sklearn_is_fitted__(self):
837-
return True
838-
839-
def transform(self, x_list):
840-
return concat_sample_axis(x_list)
841-
842-
843-
def concat_sample_axis(x_list: List[AxesArray]):
828+
def concat_sample_axis(x_list: Optional[List[AxesArray]]):
844829
"""Concatenate all trajectories and axes used to create samples."""
830+
if x_list is None:
831+
return None
845832
new_arrs = []
846833
for x in x_list:
847834
sample_ax_names = ("ax_spatial", "ax_time", "ax_sample")

pysindy/utils/base.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def _check_control_shape(x, u):
125125
return u_arr
126126

127127

128-
def drop_nan_samples(x, y):
128+
def drop_nan_samples(x, y, w=None):
129129
"""Drops samples from x and y where either has a nan value"""
130130
x_non_sample_axes = tuple(ax for ax in range(x.ndim) if ax != x.ax_sample)
131131
y_non_sample_axes = tuple(ax for ax in range(y.ndim) if ax != y.ax_sample)
@@ -134,7 +134,9 @@ def drop_nan_samples(x, y):
134134
good_sample_ind = np.nonzero(x_good_samples & y_good_samples)[0]
135135
x = x.take(good_sample_ind, axis=x.ax_sample)
136136
y = y.take(good_sample_ind, axis=y.ax_sample)
137-
return x, y
137+
if w is not None:
138+
w = w.take(good_sample_ind, axis=w.ax_sample)
139+
return x, y, w
138140

139141

140142
def reorder_constraints(arr, n_features, output_order="feature"):

test/conftest.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,18 @@ def data_2dspatial():
292292
return x, y, u
293293

294294

295+
@pytest.fixture(scope="session")
296+
def data_2d_linear():
297+
t = np.linspace(0, 2 * np.pi, 50)
298+
x_a = np.stack([np.cos(t), np.sin(t)], axis=1)
299+
xdot_a = np.stack([-np.sin(t), np.cos(t)], axis=1)
300+
301+
x_b = np.stack([np.cos(2 * t), np.sin(2 * t)], axis=1)
302+
xdot_b = np.stack([-2 * np.sin(2 * t), 2 * np.cos(2 * t)], axis=1)
303+
304+
return (x_a, xdot_a), (x_b, xdot_b)
305+
306+
295307
@pytest.fixture
296308
def custom_library():
297309
library_functions = [

test/test_pysindy.py

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from sklearn.exceptions import NotFittedError
1919
from sklearn.linear_model import ElasticNet
2020
from sklearn.linear_model import Lasso
21+
from sklearn.linear_model import LinearRegression
2122
from sklearn.model_selection import RandomizedSearchCV
2223
from sklearn.model_selection import TimeSeriesSplit
2324
from sklearn.utils.validation import check_is_fitted
@@ -579,3 +580,176 @@ def test_diffusion_pde(diffuse_multiple_trajectories):
579580
model.fit(u, t=t, feature_names=["u"])
580581
assert abs(model.coefficients()[0, -1] - 1) < 1e-1
581582
assert np.all(model.coefficients()[0, :-1] == 0)
583+
584+
585+
def test_sample_weight_fit_continuous(data_2d_linear):
586+
(x_a, xdot_a), (x_b, xdot_b) = data_2d_linear
587+
x_trajs = [x_a, x_a, x_b]
588+
xdot_trajs = [xdot_a, xdot_a, xdot_b]
589+
sample_weight = [
590+
np.ones((len(x_a), 1)),
591+
np.ones((len(x_a), 1)),
592+
10 * np.ones((len(x_b), 1)),
593+
]
594+
595+
model = SINDy(optimizer=LinearRegression(fit_intercept=False))
596+
model.fit(x_trajs, t=0.1, x_dot=xdot_trajs)
597+
coef_unweighted = np.copy(model.optimizer.coef_)
598+
model.fit(x_trajs, t=0.1, x_dot=xdot_trajs, sample_weight=sample_weight)
599+
coef_weighted = np.copy(model.optimizer.coef_)
600+
601+
model_a = SINDy(optimizer=LinearRegression(fit_intercept=False))
602+
model_a.fit([x_a], t=0.1, x_dot=[xdot_a])
603+
coef_a = np.copy(model_a.optimizer.coef_)
604+
605+
model_b = SINDy(optimizer=LinearRegression(fit_intercept=False))
606+
model_b.fit([x_b], t=0.1, x_dot=[xdot_b])
607+
coef_b = np.copy(model_b.optimizer.coef_)
608+
609+
expected_unweighted = (2 * coef_a + coef_b) / 3.0
610+
expected_weighted = (2 * coef_a + 10 * coef_b) / 12.0
611+
612+
assert np.allclose(coef_unweighted, expected_unweighted, rtol=1e-2, atol=1e-6)
613+
assert np.allclose(coef_weighted, expected_weighted, rtol=1e-2, atol=1e-6)
614+
assert np.linalg.norm(coef_weighted - coef_b) < np.linalg.norm(
615+
coef_unweighted - coef_b
616+
)
617+
618+
619+
def test_sample_weight_fit_discrete(data_2d_linear):
620+
(x_a, _), (x_b, _) = data_2d_linear
621+
x_trajs = [x_a, x_a, x_b]
622+
x_next_trajs = [x[1:] for x in x_trajs]
623+
x_trajs = [x[:-1] for x in x_trajs]
624+
sample_weight = [
625+
np.ones((len(x_trajs[0]), 1)),
626+
np.ones((len(x_trajs[1]), 1)),
627+
10 * np.ones((len(x_trajs[2]), 1)),
628+
]
629+
630+
model = DiscreteSINDy(optimizer=LinearRegression(fit_intercept=False))
631+
model.fit(x_trajs, t=1, x_next=x_next_trajs)
632+
coef_unweighted = np.copy(model.optimizer.coef_)
633+
model.fit(x_trajs, t=1, x_next=x_next_trajs, sample_weight=sample_weight)
634+
coef_weighted = np.copy(model.optimizer.coef_)
635+
636+
model_a = DiscreteSINDy(optimizer=LinearRegression(fit_intercept=False))
637+
model_a.fit([x_trajs[0]], t=1, x_next=[x_next_trajs[0]])
638+
coef_a = np.copy(model_a.optimizer.coef_)
639+
640+
model_b = DiscreteSINDy(optimizer=LinearRegression(fit_intercept=False))
641+
model_b.fit([x_trajs[2]], t=1, x_next=[x_next_trajs[2]])
642+
coef_b = np.copy(model_b.optimizer.coef_)
643+
644+
expected_unweighted = (2 * coef_a + coef_b) / 3.0
645+
expected_weighted = (2 * coef_a + 10 * coef_b) / 12.0
646+
647+
assert np.allclose(coef_unweighted, expected_unweighted, rtol=1e-2, atol=1e-6)
648+
assert np.allclose(coef_weighted, expected_weighted, rtol=1e-2, atol=1e-6)
649+
assert np.linalg.norm(coef_weighted - coef_b) < np.linalg.norm(
650+
coef_unweighted - coef_b
651+
)
652+
653+
654+
def test_sample_weight_score_continuous(data_2d_linear):
655+
(x_a, xdot_a), (x_b, xdot_b) = data_2d_linear
656+
657+
model = SINDy(optimizer=LinearRegression(fit_intercept=False))
658+
model.fit([x_a], t=0.1, x_dot=[xdot_a])
659+
660+
score_a = model.score([x_a], t=0.1, x_dot=[xdot_a])
661+
score_b = model.score([x_b], t=0.1, x_dot=[xdot_b])
662+
score_unweighted = model.score([x_a, x_b], t=0.1, x_dot=[xdot_a, xdot_b])
663+
664+
score_weighted_to_a = model.score(
665+
[x_a, x_b],
666+
t=0.1,
667+
x_dot=[xdot_a, xdot_b],
668+
sample_weight=[
669+
10 * np.ones((len(x_a), 1)),
670+
np.ones((len(x_b), 1)),
671+
],
672+
)
673+
score_weighted_to_b = model.score(
674+
[x_a, x_b],
675+
t=0.1,
676+
x_dot=[xdot_a, xdot_b],
677+
sample_weight=[
678+
np.ones((len(x_a), 1)),
679+
10 * np.ones((len(x_b), 1)),
680+
],
681+
)
682+
683+
for s in [
684+
score_a,
685+
score_b,
686+
score_unweighted,
687+
score_weighted_to_a,
688+
score_weighted_to_b,
689+
]:
690+
assert isinstance(s, float)
691+
assert np.isfinite(s)
692+
assert s <= 1
693+
694+
assert score_a >= score_b
695+
assert score_weighted_to_a >= score_unweighted >= score_weighted_to_b
696+
697+
698+
def test_sample_weight_score_discrete(data_2d_linear):
699+
(x_a, _), (x_b, _) = data_2d_linear
700+
x_a, x_next_a = x_a[:-1], x_a[1:]
701+
x_b, x_next_b = x_b[:-1], x_b[1:]
702+
703+
model = DiscreteSINDy(optimizer=LinearRegression(fit_intercept=False))
704+
model.fit([x_a], t=1, x_next=[x_next_a])
705+
706+
score_a = model.score([x_a], t=1, x_next=[x_next_a])
707+
score_b = model.score([x_b], t=1, x_next=[x_next_b])
708+
score_unweighted = model.score([x_a, x_b], t=1, x_next=[x_next_a, x_next_b])
709+
710+
score_weighted_to_a = model.score(
711+
[x_a, x_b],
712+
t=1,
713+
x_next=[x_next_a, x_next_b],
714+
sample_weight=[
715+
10 * np.ones((len(x_a), 1)),
716+
np.ones((len(x_b), 1)),
717+
],
718+
)
719+
score_weighted_to_b = model.score(
720+
[x_a, x_b],
721+
t=1,
722+
x_next=[x_next_a, x_next_b],
723+
sample_weight=[
724+
np.ones((len(x_a), 1)),
725+
10 * np.ones((len(x_b), 1)),
726+
],
727+
)
728+
729+
for s in [
730+
score_a,
731+
score_b,
732+
score_unweighted,
733+
score_weighted_to_a,
734+
score_weighted_to_b,
735+
]:
736+
assert isinstance(s, float)
737+
assert np.isfinite(s)
738+
assert s <= 1
739+
740+
assert score_a >= score_b
741+
assert score_weighted_to_a >= score_unweighted >= score_weighted_to_b
742+
743+
744+
def test_sample_weight_error():
745+
x = np.arange(24, dtype=float).reshape(3, 4, 2)
746+
t = np.linspace(0.0, 0.3, 4)
747+
weights = [np.linspace(1.0, 2.0, 4)]
748+
feature_library = PolynomialLibrary()
749+
with pytest.raises(
750+
ValueError,
751+
match=r"sample_weight\[0] has shape \(4,\), but it must match \(3, 4, 1\)",
752+
):
753+
_core._comprehend_and_validate_inputs(
754+
[x], [t], None, None, feature_library, sample_weight=weights
755+
)

0 commit comments

Comments
 (0)