|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | 3 | import warnings |
4 | | -from typing import Iterable, List, Optional, Tuple, Union, cast |
| 4 | +from typing import Iterable, Dict, List, Optional, Tuple, Union, cast |
5 | 5 |
|
6 | 6 | import numpy as np |
7 | 7 | from sklearn.base import RegressorMixin, clone |
@@ -546,93 +546,150 @@ def fit( |
546 | 546 | MapieQuantileRegressor |
547 | 547 | The model itself. |
548 | 548 | """ |
549 | | - self.cv = self._check_cv(cast(str, self.cv)) |
550 | 549 |
|
551 | | - # Initialization |
552 | | - self.estimators_: List[RegressorMixin] = [] |
553 | | - if self.cv == "prefit": |
554 | | - estimator = cast(List, self.estimator) |
555 | | - alpha = self._check_alpha(self.alpha) |
556 | | - self._check_prefit_params(estimator) |
557 | | - X_calib, y_calib = indexable(X, y) |
| 550 | + self.init_fit() |
558 | 551 |
|
559 | | - self.n_calib_samples = _num_samples(y_calib) |
560 | | - y_calib_preds = np.full( |
561 | | - shape=(3, self.n_calib_samples), |
562 | | - fill_value=np.nan |
563 | | - ) |
564 | | - for i, est in enumerate(estimator): |
565 | | - self.estimators_.append(est) |
566 | | - y_calib_preds[i] = est.predict(X_calib).ravel() |
567 | | - self.single_estimator_ = self.estimators_[2] |
| 552 | + if self.cv == "prefit": |
| 553 | + X_calib, y_calib = self.prefit_estimators(X, y) |
568 | 554 | else: |
569 | | - # Checks |
570 | | - self._check_parameters() |
571 | | - checked_estimator = self._check_estimator(self.estimator) |
572 | | - alpha = self._check_alpha(self.alpha) |
573 | | - X, y = indexable(X, y) |
574 | | - random_state = check_random_state(random_state) |
575 | | - results = self._check_calib_set( |
576 | | - X, |
577 | | - y, |
578 | | - sample_weight, |
579 | | - X_calib, |
580 | | - y_calib, |
581 | | - calib_size, |
582 | | - random_state, |
583 | | - shuffle, |
584 | | - stratify, |
| 555 | + X_calib, y_calib = self.fit_estimators( |
| 556 | + X=X, |
| 557 | + y=y, |
| 558 | + sample_weight=sample_weight, |
| 559 | + groups=groups, |
| 560 | + X_calib=X_calib, |
| 561 | + y_calib=y_calib, |
| 562 | + calib_size=calib_size, |
| 563 | + random_state=random_state, |
| 564 | + shuffle=shuffle, |
| 565 | + stratify=stratify, |
| 566 | + **fit_params, |
585 | 567 | ) |
586 | | - X_train, y_train, X_calib, y_calib, sample_weight_train = results |
587 | | - X_train, y_train = indexable(X_train, y_train) |
588 | | - X_calib, y_calib = indexable(X_calib, y_calib) |
589 | | - y_train, y_calib = _check_y(y_train), _check_y(y_calib) |
590 | | - self.n_calib_samples = _num_samples(y_calib) |
591 | | - check_alpha_and_n_samples(self.alpha, self.n_calib_samples) |
592 | | - sample_weight_train, X_train, y_train = check_null_weight( |
593 | | - sample_weight_train, |
| 568 | + |
| 569 | + self.conformalize(X_calib, y_calib) |
| 570 | + |
| 571 | + return self |
| 572 | + |
| 573 | + def init_fit(self): |
| 574 | + |
| 575 | + self.cv = self._check_cv(cast(str, self.cv)) |
| 576 | + self.alpha_np = self._check_alpha(self.alpha) |
| 577 | + self.estimators_: List[RegressorMixin] = [] |
| 578 | + |
| 579 | + def prefit_estimators( |
| 580 | + self, |
| 581 | + X: ArrayLike, |
| 582 | + y: ArrayLike |
| 583 | + ) -> Tuple[ArrayLike, ArrayLike]: |
| 584 | + |
| 585 | + estimator = cast(List, self.estimator) |
| 586 | + self._check_prefit_params(estimator) |
| 587 | + self.estimators_ = list(estimator) |
| 588 | + self.single_estimator_ = self.estimators_[2] |
| 589 | + |
| 590 | + X_calib, y_calib = indexable(X, y) |
| 591 | + return X_calib, y_calib |
| 592 | + |
| 593 | + def fit_estimators( |
| 594 | + self, |
| 595 | + X: ArrayLike, |
| 596 | + y: ArrayLike, |
| 597 | + sample_weight: Optional[ArrayLike] = None, |
| 598 | + groups: Optional[ArrayLike] = None, |
| 599 | + X_calib: Optional[ArrayLike] = None, |
| 600 | + y_calib: Optional[ArrayLike] = None, |
| 601 | + calib_size: Optional[float] = 0.3, |
| 602 | + random_state: Optional[Union[int, np.random.RandomState]] = None, |
| 603 | + shuffle: Optional[bool] = True, |
| 604 | + stratify: Optional[ArrayLike] = None, |
| 605 | + **fit_params, |
| 606 | + ) -> Tuple[ArrayLike, ArrayLike]: |
| 607 | + |
| 608 | + self._check_parameters() |
| 609 | + checked_estimator = self._check_estimator(self.estimator) |
| 610 | + random_state = check_random_state(random_state) |
| 611 | + X, y = indexable(X, y) |
| 612 | + |
| 613 | + results = self._check_calib_set( |
| 614 | + X, |
| 615 | + y, |
| 616 | + sample_weight, |
| 617 | + X_calib, |
| 618 | + y_calib, |
| 619 | + calib_size, |
| 620 | + random_state, |
| 621 | + shuffle, |
| 622 | + stratify, |
| 623 | + ) |
| 624 | + |
| 625 | + X_train, y_train, X_calib, y_calib, sample_weight_train = results |
| 626 | + X_train, y_train = indexable(X_train, y_train) |
| 627 | + X_calib, y_calib = indexable(X_calib, y_calib) |
| 628 | + y_train, y_calib = _check_y(y_train), _check_y(y_calib) |
| 629 | + self.n_calib_samples = _num_samples(y_calib) |
| 630 | + check_alpha_and_n_samples(self.alpha, self.n_calib_samples) |
| 631 | + sample_weight_train, X_train, y_train = check_null_weight( |
| 632 | + sample_weight_train, |
| 633 | + X_train, |
| 634 | + y_train |
| 635 | + ) |
| 636 | + y_train = cast(NDArray, y_train) |
| 637 | + |
| 638 | + if isinstance(checked_estimator, Pipeline): |
| 639 | + estimator = checked_estimator[-1] |
| 640 | + else: |
| 641 | + estimator = checked_estimator |
| 642 | + name_estimator = estimator.__class__.__name__ |
| 643 | + alpha_name = self.quantile_estimator_params[ |
| 644 | + name_estimator |
| 645 | + ]["alpha_name"] |
| 646 | + for i, alpha_ in enumerate(self.alpha_np): |
| 647 | + cloned_estimator_ = clone(checked_estimator) |
| 648 | + params = {alpha_name: alpha_} |
| 649 | + if isinstance(checked_estimator, Pipeline): |
| 650 | + cloned_estimator_[-1].set_params(**params) |
| 651 | + else: |
| 652 | + cloned_estimator_.set_params(**params) |
| 653 | + self.estimators_.append(fit_estimator( |
| 654 | + cloned_estimator_, |
594 | 655 | X_train, |
595 | | - y_train |
| 656 | + y_train, |
| 657 | + sample_weight_train, |
| 658 | + **fit_params, |
| 659 | + ) |
596 | 660 | ) |
597 | | - y_train = cast(NDArray, y_train) |
| 661 | + self.single_estimator_ = self.estimators_[2] |
598 | 662 |
|
599 | | - y_calib_preds = np.full( |
| 663 | + X_calib = cast(ArrayLike, X_calib) |
| 664 | + y_calib = cast(ArrayLike, y_calib) |
| 665 | + |
| 666 | + return X_calib, y_calib |
| 667 | + |
| 668 | + def conformalize( |
| 669 | + self, |
| 670 | + X_conf: ArrayLike, |
| 671 | + y_conf: ArrayLike, |
| 672 | + sample_weight: Optional[ArrayLike] = None, |
| 673 | + predict_params: Dict = {}, |
| 674 | + ): |
| 675 | + |
| 676 | + self.n_calib_samples = _num_samples(y_conf) |
| 677 | + |
| 678 | + y_calib_preds = np.full( |
600 | 679 | shape=(3, self.n_calib_samples), |
601 | 680 | fill_value=np.nan |
602 | 681 | ) |
603 | 682 |
|
604 | | - if isinstance(checked_estimator, Pipeline): |
605 | | - estimator = checked_estimator[-1] |
606 | | - else: |
607 | | - estimator = checked_estimator |
608 | | - name_estimator = estimator.__class__.__name__ |
609 | | - alpha_name = self.quantile_estimator_params[ |
610 | | - name_estimator |
611 | | - ]["alpha_name"] |
612 | | - for i, alpha_ in enumerate(alpha): |
613 | | - cloned_estimator_ = clone(checked_estimator) |
614 | | - params = {alpha_name: alpha_} |
615 | | - if isinstance(checked_estimator, Pipeline): |
616 | | - cloned_estimator_[-1].set_params(**params) |
617 | | - else: |
618 | | - cloned_estimator_.set_params(**params) |
619 | | - self.estimators_.append(fit_estimator( |
620 | | - cloned_estimator_, |
621 | | - X_train, |
622 | | - y_train, |
623 | | - sample_weight_train, |
624 | | - **fit_params, |
625 | | - ) |
626 | | - ) |
627 | | - y_calib_preds[i] = self.estimators_[-1].predict(X_calib) |
628 | | - self.single_estimator_ = self.estimators_[2] |
| 683 | + for i, est in enumerate(self.estimators_): |
| 684 | + y_calib_preds[i] = est.predict(X_conf, **predict_params).ravel() |
629 | 685 |
|
630 | 686 | self.conformity_scores_ = np.full( |
631 | 687 | shape=(3, self.n_calib_samples), |
632 | 688 | fill_value=np.nan |
633 | 689 | ) |
634 | | - self.conformity_scores_[0] = y_calib_preds[0] - y_calib |
635 | | - self.conformity_scores_[1] = y_calib - y_calib_preds[1] |
| 690 | + |
| 691 | + self.conformity_scores_[0] = y_calib_preds[0] - y_conf |
| 692 | + self.conformity_scores_[1] = y_conf - y_calib_preds[1] |
636 | 693 | self.conformity_scores_[2] = np.max( |
637 | 694 | [ |
638 | 695 | self.conformity_scores_[0], |
|
0 commit comments