|
18 | 18 | from sklearn.exceptions import NotFittedError |
19 | 19 | from sklearn.linear_model import ElasticNet |
20 | 20 | from sklearn.linear_model import Lasso |
| 21 | +from sklearn.linear_model import LinearRegression |
21 | 22 | from sklearn.model_selection import RandomizedSearchCV |
22 | 23 | from sklearn.model_selection import TimeSeriesSplit |
23 | 24 | from sklearn.utils.validation import check_is_fitted |
@@ -579,3 +580,176 @@ def test_diffusion_pde(diffuse_multiple_trajectories): |
579 | 580 | model.fit(u, t=t, feature_names=["u"]) |
580 | 581 | assert abs(model.coefficients()[0, -1] - 1) < 1e-1 |
581 | 582 | assert np.all(model.coefficients()[0, :-1] == 0) |
| 583 | + |
| 584 | + |
| 585 | +def test_sample_weight_fit_continuous(data_2d_linear): |
| 586 | + (x_a, xdot_a), (x_b, xdot_b) = data_2d_linear |
| 587 | + x_trajs = [x_a, x_a, x_b] |
| 588 | + xdot_trajs = [xdot_a, xdot_a, xdot_b] |
| 589 | + sample_weight = [ |
| 590 | + np.ones((len(x_a), 1)), |
| 591 | + np.ones((len(x_a), 1)), |
| 592 | + 10 * np.ones((len(x_b), 1)), |
| 593 | + ] |
| 594 | + |
| 595 | + model = SINDy(optimizer=LinearRegression(fit_intercept=False)) |
| 596 | + model.fit(x_trajs, t=0.1, x_dot=xdot_trajs) |
| 597 | + coef_unweighted = np.copy(model.optimizer.coef_) |
| 598 | + model.fit(x_trajs, t=0.1, x_dot=xdot_trajs, sample_weight=sample_weight) |
| 599 | + coef_weighted = np.copy(model.optimizer.coef_) |
| 600 | + |
| 601 | + model_a = SINDy(optimizer=LinearRegression(fit_intercept=False)) |
| 602 | + model_a.fit([x_a], t=0.1, x_dot=[xdot_a]) |
| 603 | + coef_a = np.copy(model_a.optimizer.coef_) |
| 604 | + |
| 605 | + model_b = SINDy(optimizer=LinearRegression(fit_intercept=False)) |
| 606 | + model_b.fit([x_b], t=0.1, x_dot=[xdot_b]) |
| 607 | + coef_b = np.copy(model_b.optimizer.coef_) |
| 608 | + |
| 609 | + expected_unweighted = (2 * coef_a + coef_b) / 3.0 |
| 610 | + expected_weighted = (2 * coef_a + 10 * coef_b) / 12.0 |
| 611 | + |
| 612 | + assert np.allclose(coef_unweighted, expected_unweighted, rtol=1e-2, atol=1e-6) |
| 613 | + assert np.allclose(coef_weighted, expected_weighted, rtol=1e-2, atol=1e-6) |
| 614 | + assert np.linalg.norm(coef_weighted - coef_b) < np.linalg.norm( |
| 615 | + coef_unweighted - coef_b |
| 616 | + ) |
| 617 | + |
| 618 | + |
| 619 | +def test_sample_weight_fit_discrete(data_2d_linear): |
| 620 | + (x_a, _), (x_b, _) = data_2d_linear |
| 621 | + x_trajs = [x_a, x_a, x_b] |
| 622 | + x_next_trajs = [x[1:] for x in x_trajs] |
| 623 | + x_trajs = [x[:-1] for x in x_trajs] |
| 624 | + sample_weight = [ |
| 625 | + np.ones((len(x_trajs[0]), 1)), |
| 626 | + np.ones((len(x_trajs[1]), 1)), |
| 627 | + 10 * np.ones((len(x_trajs[2]), 1)), |
| 628 | + ] |
| 629 | + |
| 630 | + model = DiscreteSINDy(optimizer=LinearRegression(fit_intercept=False)) |
| 631 | + model.fit(x_trajs, t=1, x_next=x_next_trajs) |
| 632 | + coef_unweighted = np.copy(model.optimizer.coef_) |
| 633 | + model.fit(x_trajs, t=1, x_next=x_next_trajs, sample_weight=sample_weight) |
| 634 | + coef_weighted = np.copy(model.optimizer.coef_) |
| 635 | + |
| 636 | + model_a = DiscreteSINDy(optimizer=LinearRegression(fit_intercept=False)) |
| 637 | + model_a.fit([x_trajs[0]], t=1, x_next=[x_next_trajs[0]]) |
| 638 | + coef_a = np.copy(model_a.optimizer.coef_) |
| 639 | + |
| 640 | + model_b = DiscreteSINDy(optimizer=LinearRegression(fit_intercept=False)) |
| 641 | + model_b.fit([x_trajs[2]], t=1, x_next=[x_next_trajs[2]]) |
| 642 | + coef_b = np.copy(model_b.optimizer.coef_) |
| 643 | + |
| 644 | + expected_unweighted = (2 * coef_a + coef_b) / 3.0 |
| 645 | + expected_weighted = (2 * coef_a + 10 * coef_b) / 12.0 |
| 646 | + |
| 647 | + assert np.allclose(coef_unweighted, expected_unweighted, rtol=1e-2, atol=1e-6) |
| 648 | + assert np.allclose(coef_weighted, expected_weighted, rtol=1e-2, atol=1e-6) |
| 649 | + assert np.linalg.norm(coef_weighted - coef_b) < np.linalg.norm( |
| 650 | + coef_unweighted - coef_b |
| 651 | + ) |
| 652 | + |
| 653 | + |
| 654 | +def test_sample_weight_score_continuous(data_2d_linear): |
| 655 | + (x_a, xdot_a), (x_b, xdot_b) = data_2d_linear |
| 656 | + |
| 657 | + model = SINDy(optimizer=LinearRegression(fit_intercept=False)) |
| 658 | + model.fit([x_a], t=0.1, x_dot=[xdot_a]) |
| 659 | + |
| 660 | + score_a = model.score([x_a], t=0.1, x_dot=[xdot_a]) |
| 661 | + score_b = model.score([x_b], t=0.1, x_dot=[xdot_b]) |
| 662 | + score_unweighted = model.score([x_a, x_b], t=0.1, x_dot=[xdot_a, xdot_b]) |
| 663 | + |
| 664 | + score_weighted_to_a = model.score( |
| 665 | + [x_a, x_b], |
| 666 | + t=0.1, |
| 667 | + x_dot=[xdot_a, xdot_b], |
| 668 | + sample_weight=[ |
| 669 | + 10 * np.ones((len(x_a), 1)), |
| 670 | + np.ones((len(x_b), 1)), |
| 671 | + ], |
| 672 | + ) |
| 673 | + score_weighted_to_b = model.score( |
| 674 | + [x_a, x_b], |
| 675 | + t=0.1, |
| 676 | + x_dot=[xdot_a, xdot_b], |
| 677 | + sample_weight=[ |
| 678 | + np.ones((len(x_a), 1)), |
| 679 | + 10 * np.ones((len(x_b), 1)), |
| 680 | + ], |
| 681 | + ) |
| 682 | + |
| 683 | + for s in [ |
| 684 | + score_a, |
| 685 | + score_b, |
| 686 | + score_unweighted, |
| 687 | + score_weighted_to_a, |
| 688 | + score_weighted_to_b, |
| 689 | + ]: |
| 690 | + assert isinstance(s, float) |
| 691 | + assert np.isfinite(s) |
| 692 | + assert s <= 1 |
| 693 | + |
| 694 | + assert score_a >= score_b |
| 695 | + assert score_weighted_to_a >= score_unweighted >= score_weighted_to_b |
| 696 | + |
| 697 | + |
| 698 | +def test_sample_weight_score_discrete(data_2d_linear): |
| 699 | + (x_a, _), (x_b, _) = data_2d_linear |
| 700 | + x_a, x_next_a = x_a[:-1], x_a[1:] |
| 701 | + x_b, x_next_b = x_b[:-1], x_b[1:] |
| 702 | + |
| 703 | + model = DiscreteSINDy(optimizer=LinearRegression(fit_intercept=False)) |
| 704 | + model.fit([x_a], t=1, x_next=[x_next_a]) |
| 705 | + |
| 706 | + score_a = model.score([x_a], t=1, x_next=[x_next_a]) |
| 707 | + score_b = model.score([x_b], t=1, x_next=[x_next_b]) |
| 708 | + score_unweighted = model.score([x_a, x_b], t=1, x_next=[x_next_a, x_next_b]) |
| 709 | + |
| 710 | + score_weighted_to_a = model.score( |
| 711 | + [x_a, x_b], |
| 712 | + t=1, |
| 713 | + x_next=[x_next_a, x_next_b], |
| 714 | + sample_weight=[ |
| 715 | + 10 * np.ones((len(x_a), 1)), |
| 716 | + np.ones((len(x_b), 1)), |
| 717 | + ], |
| 718 | + ) |
| 719 | + score_weighted_to_b = model.score( |
| 720 | + [x_a, x_b], |
| 721 | + t=1, |
| 722 | + x_next=[x_next_a, x_next_b], |
| 723 | + sample_weight=[ |
| 724 | + np.ones((len(x_a), 1)), |
| 725 | + 10 * np.ones((len(x_b), 1)), |
| 726 | + ], |
| 727 | + ) |
| 728 | + |
| 729 | + for s in [ |
| 730 | + score_a, |
| 731 | + score_b, |
| 732 | + score_unweighted, |
| 733 | + score_weighted_to_a, |
| 734 | + score_weighted_to_b, |
| 735 | + ]: |
| 736 | + assert isinstance(s, float) |
| 737 | + assert np.isfinite(s) |
| 738 | + assert s <= 1 |
| 739 | + |
| 740 | + assert score_a >= score_b |
| 741 | + assert score_weighted_to_a >= score_unweighted >= score_weighted_to_b |
| 742 | + |
| 743 | + |
| 744 | +def test_sample_weight_error(): |
| 745 | + x = np.arange(24, dtype=float).reshape(3, 4, 2) |
| 746 | + t = np.linspace(0.0, 0.3, 4) |
| 747 | + weights = [np.linspace(1.0, 2.0, 4)] |
| 748 | + feature_library = PolynomialLibrary() |
| 749 | + with pytest.raises( |
| 750 | + ValueError, |
| 751 | + match=r"sample_weight\[0] has shape \(4,\), but it must match \(3, 4, 1\)", |
| 752 | + ): |
| 753 | + _core._comprehend_and_validate_inputs( |
| 754 | + [x], [t], None, None, feature_library, sample_weight=weights |
| 755 | + ) |
0 commit comments