@@ -44,46 +44,28 @@ def pointwise_global_reconstruction_error(
4444 X use input shape (samples, features). For sample reconstruction of Y using X
4545 use input shape (features, samples).
4646 train_idx : numpy.ndarray, dtype=int, default=None
47- array of indices used for training, if None,
48- If None, the complement of the ``test_idx`` is used. If ``train_size`` is
49- also None, 2-fold split is taken.
47+ Array of indices used for training. If None, the complement of the ``test_idx``
48+ is used. If ``train_size`` is also None, 2-fold split is taken.
5049 test_idx : numpy.ndarray, dtype=int, default=None
51- array of indices used for training, if None,
52- If None, the complement of the ``train_idx`` is used. If ``test_size`` is
53- also None, 2-fold split is taken.
54- scaler : object implementing fit/transfom
55- Scales the X and Y before computing the reconstruction measure.
56- The default value scales the features such that the reconstruction
57- measure on the training set is upper bounded to 1.
58- estimator : object implementing fit/predict, default=None
59- Sklearn estimator used to reconstruct features/samples.
50+ Array of indices used for testing. If None, the complement of the ``train_idx``
51+ is used. If ``test_size`` is also None, 2-fold split is taken.
52+ scaler : object implementing fit/transform, default=``StandardFlexibleScaler``
53+ Scales X and Y before computing the reconstruction measure. The default value
54+ scales the features such that the reconstruction measure on the training set is
55+ upper bounded to 1.
56+ estimator : object implementing fit/predict, default=``Ridge2FoldCV``
57+ Sklearn estimator used to reconstruct test features/samples.
6058
6159 Returns
6260 -------
6361 pointwise_global_reconstruction_error : numpy.ndarray
64- The global reconstruction error for each sample/point
62+ The global reconstruction error for each test sample/point.
6563 """
66- (
67- train_idx ,
68- test_idx ,
69- scaler ,
70- estimator ,
71- ) = check_global_reconstruction_measures_input (
64+ train_idx , test_idx , scaler , estimator = check_global_reconstruction_measures_input (
7265 X , Y , train_idx , test_idx , scaler , estimator
7366 )
74- X_train , X_test , Y_train , Y_test = (
75- X [train_idx ],
76- X [test_idx ],
77- Y [train_idx ],
78- Y [test_idx ],
79- )
8067
81- scaler .fit (X_train )
82- X_train = scaler .transform (X_train )
83- X_test = scaler .transform (X_test )
84- scaler .fit (Y_train )
85- Y_train = scaler .transform (Y_train )
86- Y_test = scaler .transform (Y_test )
68+ X_train , X_test , Y_train , Y_test = _prepare_data (X , Y , train_idx , test_idx , scaler )
8769
8870 estimator .fit (X_train , Y_train )
8971
@@ -120,27 +102,25 @@ def global_reconstruction_error(
120102 Parameters
121103 ----------
122104 X : numpy.ndarray of shape (n_samples, X_n_features)
123- Source data which reconstructs target Y.
124- For feature reconstruction of Y using X use input shape (samples, features).
125- For sample reconstruction of Y using X use input shape (features, samples).
105+ Source data which reconstructs target Y. For feature reconstruction of Y using X
106+ use input shape (samples, features). For sample reconstruction of Y using X use
107+ input shape (features, samples).
126108 Y : numpy.ndarray of shape (n_samples, Y_n_targets)
127- Target data which is reconstructed with X.
128- For feature reconstruction of Y using X use input shape (samples, features).
129- For sample reconstruction of Y using X use input shape (features, samples).
109+ Target data which is reconstructed with X. For feature reconstruction of Y using
110+ X use input shape (samples, features). For sample reconstruction of Y using X
111+ use input shape (features, samples).
130112 train_idx : numpy.ndarray, dtype=int, default=None
131- array of indices used for training, if None,
132- If None, the complement of the ``test_idx`` is used. If ``train_size`` is
133- also None, 2-fold split is taken.
113+ Array of indices used for training. If None, the complement of the ``test_idx``
114+ is used. If ``train_size`` is also None, 2-fold split is taken.
134115 test_idx : numpy.ndarray, dtype=int, default=None
135- array of indices used for training, if None,
136- If None, the complement of the ``train_idx`` is used. If ``test_size`` is
137- also None, 2-fold split is taken.
138- scaler : object implementing fit/transfom
139- Scales the X and Y before computing the reconstruction measure.
140- The default value scales the features such that the reconstruction
141- measure on the training set is upper bounded to 1.
142- estimator : object implementing fit/predict, default=None
143- Sklearn estimator used to reconstruct features/samples.
116+ Array of indices used for testing. If None, the complement of the ``train_idx``
117+ is used. If ``test_size`` is also None, 2-fold split is taken.
118+ scaler : object implementing fit/transform, default=``StandardFlexibleScaler``
119+ Scales X and Y before computing the reconstruction measure. The default value
120+ scales the features such that the reconstruction measure on the training set is
121+ upper bounded to 1.
122+ estimator : object implementing fit/predict, default=``Ridge2FoldCV``
123+ Sklearn estimator used to reconstruct test features/samples.
144124
145125 Returns
146126 -------
@@ -201,46 +181,28 @@ def pointwise_global_reconstruction_distortion(
201181 For feature reconstruction of Y using X use input shape (samples, features).
202182 For sample reconstruction of Y using X use input shape (features, samples).
203183 train_idx : numpy.ndarray, dtype=int, default=None
204- array of indices used for training, if None,
205- If None, the complement of the ``test_idx`` is used. If ``train_size`` is
206- also None, 2-fold split is taken.
184+ Array of indices used for training. If None, the complement of the ``test_idx``
185+ is used. If ``train_size`` is also None, 2-fold split is taken.
207186 test_idx : numpy.ndarray, dtype=int, default=None
208- array of indices used for training, if None,
209- If None, the complement of the ``train_idx`` is used. If ``test_size`` is
210- also None, 2-fold split is taken.
211- scaler : object implementing fit/transfom
212- Scales the X and Y before computing the reconstruction measure.
213- The default value scales the features such that the reconstruction
214- measure on the training set is upper bounded to 1.
215- estimator : object implementing fit/predict, default=None
216- Sklearn estimator used to reconstruct features/samples.
187+ Array of indices used for testing. If None, the complement of the ``train_idx``
188+ is used. If ``test_size`` is also None, 2-fold split is taken.
189+ scaler : object implementing fit/transform, default=``StandardFlexibleScaler``
190+ Scales X and Y before computing the reconstruction measure. The default value
191+ scales the features such that the reconstruction measure on the training set is
192+ upper bounded to 1.
193+ estimator : object implementing fit/predict, default=``Ridge2FoldCV``
194+ Sklearn estimator used to reconstruct test features/samples.
217195
218196 Returns
219197 -------
220198 pointwise_global_reconstruction_distortion : ndarray
221199 The global reconstruction distortion for each sample/point
222200 """
223- (
224- train_idx ,
225- test_idx ,
226- scaler ,
227- estimator ,
228- ) = check_global_reconstruction_measures_input (
201+ train_idx , test_idx , scaler , estimator = check_global_reconstruction_measures_input (
229202 X , Y , train_idx , test_idx , scaler , estimator
230203 )
231- X_train , X_test , Y_train , Y_test = (
232- X [train_idx ],
233- X [test_idx ],
234- Y [train_idx ],
235- Y [test_idx ],
236- )
237204
238- scaler .fit (X_train )
239- X_train = scaler .transform (X_train )
240- X_test = scaler .transform (X_test )
241- scaler .fit (Y_train )
242- Y_train = scaler .transform (Y_train )
243- Y_test = scaler .transform (Y_test )
205+ X_train , X_test , Y_train , _Y_test = _prepare_data (X , Y , train_idx , test_idx , scaler )
244206
245207 predictions_Y_test = estimator .fit (X_train , Y_train ).predict (X_test )
246208 orthogonal_predictions_Y_test = (
@@ -291,19 +253,17 @@ def global_reconstruction_distortion(
291253 For feature reconstruction of Y using X use input shape (samples, features).
292254 For sample reconstruction of Y using X use input shape (features, samples).
293255 train_idx : numpy.ndarray, dtype=int, default=None
294- array of indices used for training, if None,
295- If None, the complement of the ``test_idx`` is used. If ``train_size`` is
296- also None, 2-fold split is taken.
256+ Array of indices used for training. If None, the complement of the ``test_idx``
257+ is used. If ``train_size`` is also None, 2-fold split is taken.
297258 test_idx : numpy.ndarray, dtype=int, default=None
298- array of indices used for training, if None,
299- If None, the complement of the ``train_idx`` is used. If ``test_size`` is
300- also None, 2-fold split is taken.
301- scaler : object implementing fit/transfom
302- Scales the X and Y before computing the reconstruction measure.
303- The default value scales the features such that the reconstruction
304- measure on the training set is upper bounded to 1.
305- estimator : object implementing fit/predict, default=None
306- Sklearn estimator used to reconstruct features/samples.
259+ Array of indices used for testing. If None, the complement of the ``train_idx``
260+ is used. If ``test_size`` is also None, 2-fold split is taken.
261+ scaler : object implementing fit/transform, default=``StandardFlexibleScaler``
262+ Scales X and Y before computing the reconstruction measure. The default value
263+ scales the features such that the reconstruction measure on the training set is
264+ upper bounded to 1.
265+ estimator : object implementing fit/predict, default=``Ridge2FoldCV``
266+ Sklearn estimator used to reconstruct test features/samples.
307267
308268 Returns
309269 -------
@@ -373,47 +333,35 @@ def pointwise_local_reconstruction_error(
373333 Number of neighbour points used to compute the local reconstruction weight for
374334 each sample/point.
375335 train_idx : numpy.ndarray, dtype=int, default=None
376- array of indices used for training, if None,
377- If None, the complement of the ``test_idx`` is used. If ``train_size`` is
378- also None, 2-fold split is taken.
336+ Array of indices used for training. If None, the complement of the ``test_idx``
337+ is used. If ``train_size`` is also None, 2-fold split is taken.
379338 test_idx : numpy.ndarray, dtype=int, default=None
380- array of indices used for training, if None,
381- If None, the complement of the ``train_idx`` is used. If ``test_size`` is
382- also None, 2-fold split is taken.
383- scaler : object implementing fit/transfom
384- Scales the X and Y before computing the reconstruction measure.
385- The default value scales the features such that the reconstruction
386- measure on the training set is upper bounded to 1.
387- estimator : object implementing fit/predict, default=None
388- Sklearn estimator used to reconstruct features/samples.
339+ Array of indices used for testing. If None, the complement of the ``train_idx``
340+ is used. If ``test_size`` is also None, 2-fold split is taken.
341+ scaler : object implementing fit/transform, default=``StandardFlexibleScaler``
342+ Scales X and Y before computing the reconstruction measure. The default value
343+ scales the features such that the reconstruction measure on the training set is
344+ upper bounded to 1.
345+ estimator : object implementing fit/predict, default=``Ridge2FoldCV``
346+ Sklearn estimator used to reconstruct test features/samples.
347+ n_jobs : int, default=None
348+ The number of CPUs to use to do the computation.
349+ :obj:`None` means 1 unless in a :obj:`joblib.parallel_backend` context.
350+ ``-1`` means using all processors. See
351+ `n_jobs glossary from sklearn (external link) <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`_
352+ for more details.
389353
390354 Returns
391355 -------
392356 pointwise_local_reconstruction_error : numpy.ndarray
393357 The local reconstruction error for each sample/point
394358
395359 """
396- (
397- train_idx ,
398- test_idx ,
399- scaler ,
400- estimator ,
401- ) = check_local_reconstruction_measures_input (
360+ train_idx , test_idx , scaler , estimator = check_local_reconstruction_measures_input (
402361 X , Y , n_local_points , train_idx , test_idx , scaler , estimator
403362 )
404- X_train , X_test , Y_train , Y_test = (
405- X [train_idx ],
406- X [test_idx ],
407- Y [train_idx ],
408- Y [test_idx ],
409- )
410363
411- scaler .fit (X_train )
412- X_train = scaler .transform (X_train )
413- X_test = scaler .transform (X_test ).astype (X_train .dtype )
414- scaler .fit (Y_train )
415- Y_train = scaler .transform (Y_train )
416- Y_test = scaler .transform (Y_test )
364+ X_train , X_test , Y_train , Y_test = _prepare_data (X , Y , train_idx , test_idx , scaler )
417365
418366 squared_dist = (
419367 np .sum (X_train ** 2 , axis = 1 )
@@ -496,19 +444,23 @@ def local_reconstruction_error(
496444 Number of neighbour points used to compute the local reconstruction weight for
497445 each sample/point.
498446 train_idx : numpy.ndarray, dtype=int, default=None
499- array of indices used for training, if None,
500- If None, the complement of the ``test_idx`` is used. If ``train_size`` is
501- also None, 2-fold split is taken.
447+ Array of indices used for training. If None, the complement of the ``test_idx``
448+ is used. If ``train_size`` is also None, 2-fold split is taken.
502449 test_idx : numpy.ndarray, dtype=int, default=None
503- array of indices used for training, if None,
504- If None, the complement of the ``train_idx`` is used. If ``test_size`` is
505- also None, 2-fold split is taken.
506- scaler : object implementing fit/transfom
507- Scales the X and Y before computing the reconstruction measure.
508- The default value scales the features such that the reconstruction
509- measure on the training set is upper bounded to 1.
510- estimator : object implementing fit/predict, default=None
511- Sklearn estimator used to reconstruct features/samples.
450+ Array of indices used for testing. If None, the complement of the ``train_idx``
451+ is used. If ``test_size`` is also None, 2-fold split is taken.
452+ scaler : object implementing fit/transform, default=``StandardFlexibleScaler``
453+ Scales X and Y before computing the reconstruction measure. The default value
454+ scales the features such that the reconstruction measure on the training set is
455+ upper bounded to 1.
456+ estimator : object implementing fit/predict, default=``Ridge2FoldCV``
457+ Sklearn estimator used to reconstruct test features/samples.
458+ n_jobs : int, default=None
459+ The number of CPUs to use to do the computation.
460+ :obj:`None` means 1 unless in a :obj:`joblib.parallel_backend` context.
461+ ``-1`` means using all processors. See
462+ `n_jobs glossary from sklearn (external link) <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`_
463+ for more details.
512464
513465 Returns
514466 -------
@@ -534,7 +486,11 @@ def check_global_reconstruction_measures_input(
534486 X , Y , train_idx , test_idx , scaler , estimator
535487):
536488 """Returns default reconstruction measure inputs for all None parameters"""
537- assert len (X ) == len (Y )
489+ if X .shape [0 ] != Y .shape [0 ]:
490+ raise ValueError (
491+ f"First dimension of X ({ X .shape [0 ]} ) and Y ({ Y .shape [0 ]} ) must match"
492+ )
493+
538494 if (train_idx is None ) and (test_idx is None ):
539495 train_idx , test_idx = train_test_split (
540496 np .arange (len (X )),
@@ -562,6 +518,7 @@ def check_global_reconstruction_measures_input(
562518 scoring = "neg_root_mean_squared_error" ,
563519 n_jobs = 1 ,
564520 )
521+
565522 return train_idx , test_idx , scaler , estimator
566523
567524
@@ -570,7 +527,39 @@ def check_local_reconstruction_measures_input(
570527):
571528 """Returns default reconstruction measure inputs for all None parameters"""
572529 # only needs to check one extra parameter
573- assert len (X ) >= n_local_points
530+ if len (X ) < n_local_points :
531+ raise ValueError (
532+ f"X has { len (X )} samples but n_local_points={ n_local_points } . "
533+ "Must have at least n_local_points samples"
534+ )
535+
574536 return check_global_reconstruction_measures_input (
575537 X , Y , train_idx , test_idx , scaler , estimator
576538 )
539+
540+
541+ def _prepare_data (X , Y , train_idx , test_idx , scaler ):
542+ """
543+ Split and scale data for reconstruction measures
544+
545+ Parameters
546+ ----------
547+ X, Y : array-like
548+ Input data
549+ train_idx, test_idx : array-like
550+ Indices for train/test split
551+ scaler : object
552+ Fitted scaler
553+ """
554+ X_train , X_test = X [train_idx ], X [test_idx ]
555+ Y_train , Y_test = Y [train_idx ], Y [test_idx ]
556+
557+ scaler .fit (X_train )
558+ X_train_scaled = scaler .transform (X_train )
559+ X_test_scaled = scaler .transform (X_test )
560+
561+ scaler .fit (Y_train )
562+ Y_train_scaled = scaler .transform (Y_train )
563+ Y_test_scaled = scaler .transform (Y_test )
564+
565+ return X_train_scaled , X_test_scaled , Y_train_scaled , Y_test_scaled
0 commit comments