Sample Weights (#673)

filippozacchei · Jacob-Stevens-Haas · web-flow · commit 3518a9b7f827 · 2026-03-18T13:18:36.000-07:00
* Added sample weight to SINDy fit method. To do: account for WEAK sindy * Files to be modified * Core * No expand Weights * sampleConcatter * test_weights * Sample Weights new * Now weighted Weak PDE * No Set Config * No Space * For Pull Request * pre-commit * Reviewed changes * Adjusted comments * Discrete sindy sample weight fit * sample weight score * to pull * check_sample_Weight * adjusted check * sample_weights adjusted. Test score nut passing * almost * Final * Remove transform_sample_weights in sample concatter * adjusted for base optimizer * Removed SampleConcatter and Adjusted Coverage * Apply suggestion from @Jacob-Stevens-Haas Co-authored-by: Jacob Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com> * Remove redundant test Remove test for SINDy model fitting with invalid inputs. * Remove drop_nan_sample * Remvoe conversion to axes, not needed in baseOpimizer as drop_nan_samples has been removed --------- Co-authored-by: Jacob Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
diff --git a/pysindy/_core.py b/pysindy/_core.py
diff --git a/pysindy/deeptime.py b/pysindy/deeptime.py
@@ -5,7 +5,6 @@
 from sklearn.utils.validation import check_is_fitted
 
 from ._core import SINDy
-from .utils import SampleConcatter
 
 
 class SINDyEstimator(SINDy):
@@ -178,7 +177,6 @@ def __init__(
 
         steps = [
             ("features", feature_library),
-            ("shaping", SampleConcatter()),
             ("model", optimizer),
         ]
         self.model = Pipeline(steps)
diff --git a/pysindy/optimizers/base.py b/pysindy/optimizers/base.py
@@ -20,7 +20,6 @@
 from .._typing import Float2D
 from .._typing import FloatDType
 from ..utils import AxesArray
-from ..utils import drop_nan_samples
 
 NFeat = NewType("NFeat", int)
 NTarget = NewType("NTarget", int)
@@ -203,10 +202,6 @@ def fit(self, x_, y, sample_weight=None, **reduce_kws):
         -------
         self : returns an instance of self
         """
-        x_ = AxesArray(np.asarray(x_), {"ax_sample": 0, "ax_coord": 1})
-        y_axes = {"ax_sample": 0} if y.ndim == 1 else {"ax_sample": 0, "ax_coord": 1}
-        y = AxesArray(np.asarray(y), y_axes)
-        x_, y = drop_nan_samples(x_, y)
         x_, y = check_X_y(x_, y, accept_sparse=[], y_numeric=True, multi_output=True)
 
         x, y, X_offset, y_offset, _, sample_weight_sqrt = _preprocess_data(
diff --git a/pysindy/utils/__init__.py b/pysindy/utils/__init__.py
@@ -1,7 +1,6 @@
 from ._axes import AxesArray
 from ._axes import comprehend_axes
 from ._axes import concat_sample_axis
-from ._axes import SampleConcatter
 from ._axes import wrap_axes
 from .base import capped_simplex_projection
 from .base import drop_nan_samples
@@ -47,7 +46,6 @@
 
 __all__ = [
     "AxesArray",
-    "SampleConcatter",
     "concat_sample_axis",
     "wrap_axes",
     "comprehend_axes",
diff --git a/pysindy/utils/_axes.py b/pysindy/utils/_axes.py
@@ -69,7 +69,6 @@
 
 import numpy as np
 from numpy.typing import NDArray
-from sklearn.base import TransformerMixin
 
 HANDLED_FUNCTIONS = {}
 
@@ -826,22 +825,10 @@ def comprehend_axes(x):
     return axes
 
 
-class SampleConcatter(TransformerMixin):
-    def __init__(self):
-        pass
-
-    def fit(self, x_list, y_list=None):
-        return self
-
-    def __sklearn_is_fitted__(self):
-        return True
-
-    def transform(self, x_list):
-        return concat_sample_axis(x_list)
-
-
-def concat_sample_axis(x_list: List[AxesArray]):
+def concat_sample_axis(x_list: Optional[List[AxesArray]]):
     """Concatenate all trajectories and axes used to create samples."""
+    if x_list is None:
+        return None
     new_arrs = []
     for x in x_list:
         sample_ax_names = ("ax_spatial", "ax_time", "ax_sample")
diff --git a/pysindy/utils/base.py b/pysindy/utils/base.py
@@ -125,7 +125,7 @@ def _check_control_shape(x, u):
     return u_arr
 
 
-def drop_nan_samples(x, y):
+def drop_nan_samples(x, y, w=None):
     """Drops samples from x and y where either has a nan value"""
     x_non_sample_axes = tuple(ax for ax in range(x.ndim) if ax != x.ax_sample)
     y_non_sample_axes = tuple(ax for ax in range(y.ndim) if ax != y.ax_sample)
@@ -134,7 +134,9 @@ def drop_nan_samples(x, y):
     good_sample_ind = np.nonzero(x_good_samples & y_good_samples)[0]
     x = x.take(good_sample_ind, axis=x.ax_sample)
     y = y.take(good_sample_ind, axis=y.ax_sample)
-    return x, y
+    if w is not None:
+        w = w.take(good_sample_ind, axis=w.ax_sample)
+    return x, y, w
 
 
 def reorder_constraints(arr, n_features, output_order="feature"):
diff --git a/test/conftest.py b/test/conftest.py
@@ -292,6 +292,18 @@ def data_2dspatial():
     return x, y, u
 
 
+@pytest.fixture(scope="session")
+def data_2d_linear():
+    t = np.linspace(0, 2 * np.pi, 50)
+    x_a = np.stack([np.cos(t), np.sin(t)], axis=1)
+    xdot_a = np.stack([-np.sin(t), np.cos(t)], axis=1)
+
+    x_b = np.stack([np.cos(2 * t), np.sin(2 * t)], axis=1)
+    xdot_b = np.stack([-2 * np.sin(2 * t), 2 * np.cos(2 * t)], axis=1)
+
+    return (x_a, xdot_a), (x_b, xdot_b)
+
+
 @pytest.fixture
 def custom_library():
     library_functions = [
diff --git a/test/test_pysindy.py b/test/test_pysindy.py
@@ -18,6 +18,7 @@
 from sklearn.exceptions import NotFittedError
 from sklearn.linear_model import ElasticNet
 from sklearn.linear_model import Lasso
+from sklearn.linear_model import LinearRegression
 from sklearn.model_selection import RandomizedSearchCV
 from sklearn.model_selection import TimeSeriesSplit
 from sklearn.utils.validation import check_is_fitted
@@ -579,3 +580,176 @@ def test_diffusion_pde(diffuse_multiple_trajectories):
     model.fit(u, t=t, feature_names=["u"])
     assert abs(model.coefficients()[0, -1] - 1) < 1e-1
     assert np.all(model.coefficients()[0, :-1] == 0)
+
+
+def test_sample_weight_fit_continuous(data_2d_linear):
+    (x_a, xdot_a), (x_b, xdot_b) = data_2d_linear
+    x_trajs = [x_a, x_a, x_b]
+    xdot_trajs = [xdot_a, xdot_a, xdot_b]
+    sample_weight = [
+        np.ones((len(x_a), 1)),
+        np.ones((len(x_a), 1)),
+        10 * np.ones((len(x_b), 1)),
+    ]
+
+    model = SINDy(optimizer=LinearRegression(fit_intercept=False))
+    model.fit(x_trajs, t=0.1, x_dot=xdot_trajs)
+    coef_unweighted = np.copy(model.optimizer.coef_)
+    model.fit(x_trajs, t=0.1, x_dot=xdot_trajs, sample_weight=sample_weight)
+    coef_weighted = np.copy(model.optimizer.coef_)
+
+    model_a = SINDy(optimizer=LinearRegression(fit_intercept=False))
+    model_a.fit([x_a], t=0.1, x_dot=[xdot_a])
+    coef_a = np.copy(model_a.optimizer.coef_)
+
+    model_b = SINDy(optimizer=LinearRegression(fit_intercept=False))
+    model_b.fit([x_b], t=0.1, x_dot=[xdot_b])
+    coef_b = np.copy(model_b.optimizer.coef_)
+
+    expected_unweighted = (2 * coef_a + coef_b) / 3.0
+    expected_weighted = (2 * coef_a + 10 * coef_b) / 12.0
+
+    assert np.allclose(coef_unweighted, expected_unweighted, rtol=1e-2, atol=1e-6)
+    assert np.allclose(coef_weighted, expected_weighted, rtol=1e-2, atol=1e-6)
+    assert np.linalg.norm(coef_weighted - coef_b) < np.linalg.norm(
+        coef_unweighted - coef_b
+    )
+
+
+def test_sample_weight_fit_discrete(data_2d_linear):
+    (x_a, _), (x_b, _) = data_2d_linear
+    x_trajs = [x_a, x_a, x_b]
+    x_next_trajs = [x[1:] for x in x_trajs]
+    x_trajs = [x[:-1] for x in x_trajs]
+    sample_weight = [
+        np.ones((len(x_trajs[0]), 1)),
+        np.ones((len(x_trajs[1]), 1)),
+        10 * np.ones((len(x_trajs[2]), 1)),
+    ]
+
+    model = DiscreteSINDy(optimizer=LinearRegression(fit_intercept=False))
+    model.fit(x_trajs, t=1, x_next=x_next_trajs)
+    coef_unweighted = np.copy(model.optimizer.coef_)
+    model.fit(x_trajs, t=1, x_next=x_next_trajs, sample_weight=sample_weight)
+    coef_weighted = np.copy(model.optimizer.coef_)
+
+    model_a = DiscreteSINDy(optimizer=LinearRegression(fit_intercept=False))
+    model_a.fit([x_trajs[0]], t=1, x_next=[x_next_trajs[0]])
+    coef_a = np.copy(model_a.optimizer.coef_)
+
+    model_b = DiscreteSINDy(optimizer=LinearRegression(fit_intercept=False))
+    model_b.fit([x_trajs[2]], t=1, x_next=[x_next_trajs[2]])
+    coef_b = np.copy(model_b.optimizer.coef_)
+
+    expected_unweighted = (2 * coef_a + coef_b) / 3.0
+    expected_weighted = (2 * coef_a + 10 * coef_b) / 12.0
+
+    assert np.allclose(coef_unweighted, expected_unweighted, rtol=1e-2, atol=1e-6)
+    assert np.allclose(coef_weighted, expected_weighted, rtol=1e-2, atol=1e-6)
+    assert np.linalg.norm(coef_weighted - coef_b) < np.linalg.norm(
+        coef_unweighted - coef_b
+    )
+
+
+def test_sample_weight_score_continuous(data_2d_linear):
+    (x_a, xdot_a), (x_b, xdot_b) = data_2d_linear
+
+    model = SINDy(optimizer=LinearRegression(fit_intercept=False))
+    model.fit([x_a], t=0.1, x_dot=[xdot_a])
+
+    score_a = model.score([x_a], t=0.1, x_dot=[xdot_a])
+    score_b = model.score([x_b], t=0.1, x_dot=[xdot_b])
+    score_unweighted = model.score([x_a, x_b], t=0.1, x_dot=[xdot_a, xdot_b])
+
+    score_weighted_to_a = model.score(
+        [x_a, x_b],
+        t=0.1,
+        x_dot=[xdot_a, xdot_b],
+        sample_weight=[
+            10 * np.ones((len(x_a), 1)),
+            np.ones((len(x_b), 1)),
+        ],
+    )
+    score_weighted_to_b = model.score(
+        [x_a, x_b],
+        t=0.1,
+        x_dot=[xdot_a, xdot_b],
+        sample_weight=[
+            np.ones((len(x_a), 1)),
+            10 * np.ones((len(x_b), 1)),
+        ],
+    )
+
+    for s in [
+        score_a,
+        score_b,
+        score_unweighted,
+        score_weighted_to_a,
+        score_weighted_to_b,
+    ]:
+        assert isinstance(s, float)
+        assert np.isfinite(s)
+        assert s <= 1
+
+    assert score_a >= score_b
+    assert score_weighted_to_a >= score_unweighted >= score_weighted_to_b
+
+
+def test_sample_weight_score_discrete(data_2d_linear):
+    (x_a, _), (x_b, _) = data_2d_linear
+    x_a, x_next_a = x_a[:-1], x_a[1:]
+    x_b, x_next_b = x_b[:-1], x_b[1:]
+
+    model = DiscreteSINDy(optimizer=LinearRegression(fit_intercept=False))
+    model.fit([x_a], t=1, x_next=[x_next_a])
+
+    score_a = model.score([x_a], t=1, x_next=[x_next_a])
+    score_b = model.score([x_b], t=1, x_next=[x_next_b])
+    score_unweighted = model.score([x_a, x_b], t=1, x_next=[x_next_a, x_next_b])
+
+    score_weighted_to_a = model.score(
+        [x_a, x_b],
+        t=1,
+        x_next=[x_next_a, x_next_b],
+        sample_weight=[
+            10 * np.ones((len(x_a), 1)),
+            np.ones((len(x_b), 1)),
+        ],
+    )
+    score_weighted_to_b = model.score(
+        [x_a, x_b],
+        t=1,
+        x_next=[x_next_a, x_next_b],
+        sample_weight=[
+            np.ones((len(x_a), 1)),
+            10 * np.ones((len(x_b), 1)),
+        ],
+    )
+
+    for s in [
+        score_a,
+        score_b,
+        score_unweighted,
+        score_weighted_to_a,
+        score_weighted_to_b,
+    ]:
+        assert isinstance(s, float)
+        assert np.isfinite(s)
+        assert s <= 1
+
+    assert score_a >= score_b
+    assert score_weighted_to_a >= score_unweighted >= score_weighted_to_b
+
+
+def test_sample_weight_error():
+    x = np.arange(24, dtype=float).reshape(3, 4, 2)
+    t = np.linspace(0.0, 0.3, 4)
+    weights = [np.linspace(1.0, 2.0, 4)]
+    feature_library = PolynomialLibrary()
+    with pytest.raises(
+        ValueError,
+        match=r"sample_weight\[0] has shape \(4,\), but it must match \(3, 4, 1\)",
+    ):
+        _core._comprehend_and_validate_inputs(
+            [x], [t], None, None, feature_library, sample_weight=weights
+        )