Skip to content

Commit 5084217

Browse files
authored
Merge pull request #136 from scikit-learn-contrib/fix_issue_128
Fix issue 128
2 parents 6b1f863 + 057b22e commit 5084217

File tree

9 files changed

+150
-74
lines changed

9 files changed

+150
-74
lines changed

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ Please describe the tests that you ran to verify your changes. Provide instructi
2020
- [ ] Test A
2121
- [ ] Test B
2222

23-
# Checklist:
23+
# Checklist
2424

2525
- [ ] I have read the [contributing guidelines](https://github.com/simai-ml/MAPIE/blob/master/CONTRIBUTING.rst)
2626
- [ ] I have updated the [HISTORY.rst](https://github.com/simai-ml/MAPIE/blob/master/HISTORY.rst) and [AUTHORS.rst](https://github.com/simai-ml/MAPIE/blob/master/AUTHORS.rst) files

HISTORY.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ History
1414
"predict" in regression.py
1515
* Add replication of the Chen Xu's tutorial testing Jackknife+aB vs Jackknife+
1616
* Add Jackknife+-after-Bootstrap documentation
17+
* Improve scikit-learn pipelines compatibility
1718

1819
0.3.1 (2021-11-19)
1920
------------------

environment.dev.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ channels:
55
dependencies:
66
- bump2version=1.0.1
77
- flake8=4.0.1
8-
- mypy=0.920
8+
- mypy=0.910
99
- numpydoc=1.1.0
1010
- pandas=1.3.5
1111
- pytest=6.2.5

examples/classification/1-quickstart/plot_comp_methods_on_2d_dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def plot_scores(
168168
fig, axs = plt.subplots(1, 2, figsize=(10, 5))
169169
for i, method in enumerate(methods):
170170
conformity_scores = mapie[method].conformity_scores_
171-
n = mapie[method].n_samples_val_
171+
n = mapie[method].n_samples_
172172
quantiles = mapie[method].quantiles_
173173
plot_scores(alpha, conformity_scores, quantiles, method, axs[i])
174174
plt.show()

mapie/classification.py

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,15 @@
77
from sklearn.linear_model import LogisticRegression
88
from sklearn.model_selection import BaseCrossValidator
99
from sklearn.pipeline import Pipeline
10-
from sklearn.utils import check_X_y, check_array, check_random_state
11-
from sklearn.utils.multiclass import type_of_target
12-
from sklearn.utils.validation import check_is_fitted
1310
from sklearn.preprocessing import label_binarize
11+
from sklearn.utils import check_random_state, _safe_indexing
12+
from sklearn.utils.multiclass import type_of_target
13+
from sklearn.utils.validation import (
14+
indexable,
15+
check_is_fitted,
16+
_num_samples,
17+
_check_y,
18+
)
1419

1520
from ._typing import ArrayLike
1621
from ._machine_precision import EPSILON
@@ -122,7 +127,7 @@ class MapieClassifier(BaseEstimator, ClassifierMixin): # type: ignore
122127
n_features_in_: int
123128
Number of features passed to the fit method.
124129
125-
n_samples_val_: Union[int, List[int]]
130+
n_samples_: Union[int, List[int]]
126131
Number of samples passed to the fit method.
127132
128133
conformity_scores_ : ArrayLike of shape (n_samples_train)
@@ -173,7 +178,7 @@ class MapieClassifier(BaseEstimator, ClassifierMixin): # type: ignore
173178
"single_estimator_",
174179
"estimators_",
175180
"n_features_in_",
176-
"n_samples_val_",
181+
"n_samples_",
177182
"conformity_scores_"
178183
]
179184

@@ -601,16 +606,18 @@ def _fit_and_predict_oof_model(
601606
of shape (n_samples_val,).
602607
603608
"""
604-
X_train, y_train, X_val, y_val = (
605-
X[train_index], y[train_index], X[val_index], y[val_index]
606-
)
609+
X_train = _safe_indexing(X, train_index)
610+
y_train = _safe_indexing(y, train_index)
611+
X_val = _safe_indexing(X, val_index)
612+
y_val = _safe_indexing(y, val_index)
613+
607614
if sample_weight is None:
608615
estimator = fit_estimator(estimator, X_train, y_train)
609616
else:
610617
estimator = fit_estimator(
611618
estimator, X_train, y_train, sample_weight[train_index]
612619
)
613-
if X_val.shape[0] > 0:
620+
if _num_samples(X_val) > 0:
614621
y_pred_proba = self._predict_oof_model(
615622
estimator, X_val,
616623
)
@@ -663,13 +670,10 @@ def fit(
663670
self._check_parameters()
664671
cv = check_cv(self.cv)
665672
estimator = self._check_estimator(X, y, self.estimator)
666-
667673
if self.image_input:
668674
check_input_is_image(X)
669-
X, y = check_X_y(
670-
X, y, force_all_finite=False, ensure_2d=self.image_input,
671-
allow_nd=self.image_input, dtype=["float64", "int", "object"]
672-
)
675+
X, y = indexable(X, y)
676+
y = _check_y(y)
673677
assert type_of_target(y) == "multiclass"
674678
self.n_classes_ = len(set(y))
675679
self.n_features_in_ = check_n_features_in(X, cv, estimator)
@@ -678,7 +682,7 @@ def fit(
678682
# Initialization
679683
self.estimators_: List[ClassifierMixin] = []
680684
self.k_ = np.empty_like(y, dtype=int)
681-
self.n_samples_val_ = X.shape[0]
685+
self.n_samples_ = _num_samples(X)
682686

683687
# Work
684688
if cv == "prefit":
@@ -716,7 +720,7 @@ def fit(
716720
self.conformity_scores_ = np.empty(y_pred_proba.shape)
717721
elif self.method == "score":
718722
self.conformity_scores_ = np.take_along_axis(
719-
1 - y_pred_proba, y.reshape(-1, 1), axis=1
723+
1 - y_pred_proba, np.ravel(y).reshape(-1, 1), axis=1
720724
)
721725
elif self.method == "cumulated_score":
722726
y_true = label_binarize(y=y, classes=estimator.classes_)
@@ -731,7 +735,7 @@ def fit(
731735
y_pred_proba_sorted_cumsum, cutoff.reshape(-1, 1), axis=1
732736
)
733737
y_proba_true = np.take_along_axis(
734-
y_pred_proba, y.reshape(-1, 1), axis=1
738+
y_pred_proba, np.ravel(y).reshape(-1, 1), axis=1
735739
)
736740
random_state = check_random_state(self.random_state)
737741
u = random_state.uniform(size=len(y_pred_proba)).reshape(-1, 1)
@@ -744,7 +748,7 @@ def fit(
744748
)
745749
self.conformity_scores_ = np.take_along_axis(
746750
index,
747-
y.reshape(-1, 1),
751+
np.ravel(y).reshape(-1, 1),
748752
axis=1
749753
)
750754

@@ -829,13 +833,6 @@ def predict(
829833
check_is_fitted(self, self.fit_attributes)
830834
if self.image_input:
831835
check_input_is_image(X)
832-
X = check_array(
833-
X,
834-
force_all_finite=False,
835-
ensure_2d=self.image_input,
836-
allow_nd=self.image_input,
837-
dtype=["float64", "object"]
838-
)
839836

840837
# Estimate probabilities from estimator(s)
841838
y_pred = self.single_estimator_.predict(X)
@@ -865,7 +862,7 @@ def predict(
865862
raise ValueError("Invalid 'agg_scores' argument.")
866863

867864
# Estimate prediction sets
868-
n = self.n_samples_val_
865+
n = self.n_samples_
869866
if alpha_ is None:
870867
return np.array(y_pred)
871868

mapie/regression.py

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,13 @@
99
from sklearn.linear_model import LinearRegression
1010
from sklearn.model_selection import BaseCrossValidator
1111
from sklearn.pipeline import Pipeline
12-
from sklearn.utils import check_array, check_X_y
13-
from sklearn.utils.validation import check_is_fitted
12+
from sklearn.utils import _safe_indexing
13+
from sklearn.utils.validation import (
14+
indexable,
15+
check_is_fitted,
16+
_num_samples,
17+
_check_y,
18+
)
1419

1520
from ._typing import ArrayLike
1621
from .aggregation_functions import aggregate_all, phi2D
@@ -149,7 +154,7 @@ class MapieRegressor(BaseEstimator, RegressorMixin): # type: ignore
149154
n_features_in_: int
150155
Number of features passed to the fit method.
151156
152-
n_samples_val_: List[int]
157+
n_samples_: List[int]
153158
Number of samples passed to the fit method.
154159
155160
References
@@ -190,7 +195,7 @@ class MapieRegressor(BaseEstimator, RegressorMixin): # type: ignore
190195
"k_",
191196
"residuals_",
192197
"n_features_in_",
193-
"n_samples_val_",
198+
"n_samples_",
194199
]
195200

196201
def __init__(
@@ -341,7 +346,7 @@ def _fit_and_predict_oof_model(
341346
val_index: ArrayLike,
342347
k: int,
343348
sample_weight: Optional[ArrayLike] = None,
344-
) -> Tuple[RegressorMixin, ArrayLike, ArrayLike, ArrayLike]:
349+
) -> Tuple[RegressorMixin, ArrayLike, ArrayLike]:
345350
"""
346351
Fit a single out-of-fold model on a given training set and
347352
perform predictions on a test set.
@@ -372,30 +377,30 @@ def _fit_and_predict_oof_model(
372377
373378
Returns
374379
-------
375-
Tuple[RegressorMixin, ArrayLike, ArrayLike, ArrayLike]
380+
Tuple[RegressorMixin, ArrayLike, ArrayLike]
376381
377382
- [0]: Fitted estimator
378383
- [1]: Estimator predictions on the validation fold,
379384
of shape (n_samples_val,)
380-
- [2]: Identification number of the validation fold,
381-
of shape (n_samples_val,)
382385
- [3]: Validation data indices,
383386
of shape (n_samples_val,).
384387
385388
"""
386-
X_train, y_train, X_val = X[train_index], y[train_index], X[val_index]
389+
X_train = _safe_indexing(X, train_index)
390+
y_train = _safe_indexing(y, train_index)
391+
X_val = _safe_indexing(X, val_index)
387392
if sample_weight is None:
388393
estimator = fit_estimator(estimator, X_train, y_train)
389394
else:
395+
sample_weight_train = _safe_indexing(sample_weight, train_index)
390396
estimator = fit_estimator(
391-
estimator, X_train, y_train, sample_weight[train_index]
397+
estimator, X_train, y_train, sample_weight_train
392398
)
393-
if X_val.shape[0] > 0:
399+
if _num_samples(X_val) > 0:
394400
y_pred = estimator.predict(X_val)
395401
else:
396402
y_pred = np.array([])
397-
val_id = np.full_like(y_pred, k, dtype=int)
398-
return estimator, y_pred, val_id, val_index
403+
return estimator, y_pred, val_index
399404

400405
def aggregate_with_mask(self, x: ArrayLike, k: ArrayLike) -> ArrayLike:
401406
"""
@@ -479,9 +484,8 @@ def fit(
479484
cv = check_cv(self.cv)
480485
estimator = self._check_estimator(self.estimator)
481486
agg_function = self._check_agg_function(self.agg_function)
482-
X, y = check_X_y(
483-
X, y, force_all_finite=False, dtype=["float64", "int", "object"]
484-
)
487+
X, y = indexable(X, y)
488+
y = _check_y(y)
485489
self.n_features_in_ = check_n_features_in(X, cv, estimator)
486490
sample_weight, X, y = check_null_weight(sample_weight, X, y)
487491

@@ -492,7 +496,7 @@ def fit(
492496
if cv == "prefit":
493497
self.single_estimator_ = estimator
494498
y_pred = self.single_estimator_.predict(X)
495-
self.n_samples_val_ = [X.shape[0]]
499+
self.n_samples_ = [_num_samples(X)]
496500
self.k_ = np.full(
497501
shape=(len(y), 1), fill_value=np.nan, dtype=float
498502
)
@@ -514,7 +518,7 @@ def fit(
514518
)
515519
if self.method == "naive":
516520
y_pred = self.single_estimator_.predict(X)
517-
self.n_samples_val_ = [X.shape[0]]
521+
self.n_samples_ = [_num_samples(X)]
518522
else:
519523
outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
520524
delayed(self._fit_and_predict_oof_model)(
@@ -528,22 +532,22 @@ def fit(
528532
)
529533
for k, (train_index, val_index) in enumerate(cv.split(X))
530534
)
531-
self.estimators_, predictions, val_ids, val_indices = map(
535+
self.estimators_, predictions, val_indices = map(
532536
list, zip(*outputs)
533537
)
534538

535-
self.n_samples_val_ = [
539+
self.n_samples_ = [
536540
np.array(pred).shape[0] for pred in predictions
537541
]
538542

539543
for i, val_ind in enumerate(val_indices):
540-
pred_matrix[val_ind, i] = predictions[i]
544+
pred_matrix[val_ind, i] = np.array(predictions[i]).ravel()
541545
self.k_[val_ind, i] = 1
542546
check_nan_in_aposteriori_prediction(pred_matrix)
543547

544548
y_pred = aggregate_all(agg_function, pred_matrix)
545549

546-
self.residuals_ = np.abs(y - y_pred)
550+
self.residuals_ = np.abs(np.ravel(y) - y_pred)
547551
return self
548552

549553
def predict(
@@ -605,7 +609,6 @@ def predict(
605609
check_is_fitted(self, self.fit_attributes)
606610
self._check_ensemble(ensemble)
607611
alpha_ = check_alpha(alpha)
608-
X = check_array(X, force_all_finite=False, dtype=["float64", "object"])
609612
y_pred = self.single_estimator_.predict(X)
610613

611614
if alpha is None:
@@ -658,7 +661,7 @@ def predict(
658661
)
659662
for _alpha in alpha_
660663
]
661-
)
664+
).data
662665
y_pred_up = np.column_stack(
663666
[
664667
np.quantile(
@@ -669,7 +672,8 @@ def predict(
669672
)
670673
for _alpha in alpha_
671674
]
672-
)
675+
).data
673676
if ensemble:
674677
y_pred = aggregate_all(self.agg_function, y_pred_multi)
678+
np.stack([y_pred_low, y_pred_up], axis=1)
675679
return y_pred, np.stack([y_pred_low, y_pred_up], axis=1)

0 commit comments

Comments
 (0)