Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 113 additions & 135 deletions src/skmatter/metrics/_reconstruction_measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,46 +44,28 @@ def pointwise_global_reconstruction_error(
X use input shape (samples, features). For sample reconstruction of Y using X
use input shape (features, samples).
train_idx : numpy.ndarray, dtype=int, default=None
array of indices used for training, if None,
If None, the complement of the ``test_idx`` is used. If ``train_size`` is
also None, 2-fold split is taken.
Array of indices used for training. If ``None``, the complement of the
``test_idx`` is used. If ``train_size`` is also None, 2-fold split is taken.
test_idx : numpy.ndarray, dtype=int, default=None
array of indices used for training, if None,
If None, the complement of the ``train_idx`` is used. If ``test_size`` is
also None, 2-fold split is taken.
scaler : object implementing fit/transfom
Scales the X and Y before computing the reconstruction measure.
The default value scales the features such that the reconstruction
measure on the training set is upper bounded to 1.
estimator : object implementing fit/predict, default=None
Sklearn estimator used to reconstruct features/samples.
Array of indices used for testing. If ``None``, the complement of the
``train_idx`` is used. If ``test_size`` is also ``None``, 2-fold split is taken.
scaler : object implementing fit/transform, default=``StandardFlexibleScaler``
Scales X and Y before computing the reconstruction measure. The default value
scales the features such that the reconstruction measure on the training set is
upper bounded to 1.
estimator : object implementing fit/predict, default=``Ridge2FoldCV``
Sklearn estimator used to reconstruct test features/samples.

Returns
-------
pointwise_global_reconstruction_error : numpy.ndarray
The global reconstruction error for each sample/point
The global reconstruction error for each test sample/point.
"""
(
train_idx,
test_idx,
scaler,
estimator,
) = check_global_reconstruction_measures_input(
train_idx, test_idx, scaler, estimator = check_global_reconstruction_measures_input(
X, Y, train_idx, test_idx, scaler, estimator
)
X_train, X_test, Y_train, Y_test = (
X[train_idx],
X[test_idx],
Y[train_idx],
Y[test_idx],
)

scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
scaler.fit(Y_train)
Y_train = scaler.transform(Y_train)
Y_test = scaler.transform(Y_test)
X_train, X_test, Y_train, Y_test = _prepare_data(X, Y, train_idx, test_idx, scaler)

estimator.fit(X_train, Y_train)

Expand Down Expand Up @@ -120,27 +102,25 @@ def global_reconstruction_error(
Parameters
----------
X : numpy.ndarray of shape (n_samples, X_n_features)
Source data which reconstructs target Y.
For feature reconstruction of Y using X use input shape (samples, features).
For sample reconstruction of Y using X use input shape (features, samples).
Source data which reconstructs target Y. For feature reconstruction of Y using X
use input shape (samples, features). For sample reconstruction of Y using X use
input shape (features, samples).
Y : numpy.ndarray of shape (n_samples, Y_n_targets)
Target data which is reconstructed with X.
For feature reconstruction of Y using X use input shape (samples, features).
For sample reconstruction of Y using X use input shape (features, samples).
Target data which is reconstructed with X. For feature reconstruction of Y using
X use input shape (samples, features). For sample reconstruction of Y using X
use input shape (features, samples).
train_idx : numpy.ndarray, dtype=int, default=None
array of indices used for training, if None,
If None, the complement of the ``test_idx`` is used. If ``train_size`` is
also None, 2-fold split is taken.
Array of indices used for training. If ``None``, the complement of the
``test_idx`` is used. If ``train_size`` is also ``None``, 2-fold split is taken.
test_idx : numpy.ndarray, dtype=int, default=None
array of indices used for training, if None,
If None, the complement of the ``train_idx`` is used. If ``test_size`` is
also None, 2-fold split is taken.
scaler : object implementing fit/transfom
Scales the X and Y before computing the reconstruction measure.
The default value scales the features such that the reconstruction
measure on the training set is upper bounded to 1.
estimator : object implementing fit/predict, default=None
Sklearn estimator used to reconstruct features/samples.
Array of indices used for testing. If ``None``, the complement of the
``train_idx`` is used. If ``test_size`` is also ``None``, 2-fold split is taken.
scaler : object implementing fit/transform, default=``StandardFlexibleScaler``
Scales X and Y before computing the reconstruction measure. The default value
scales the features such that the reconstruction measure on the training set is
upper bounded to 1.
estimator : object implementing fit/predict, default=``Ridge2FoldCV``
Sklearn estimator used to reconstruct test features/samples.

Returns
-------
Expand Down Expand Up @@ -201,46 +181,28 @@ def pointwise_global_reconstruction_distortion(
For feature reconstruction of Y using X use input shape (samples, features).
For sample reconstruction of Y using X use input shape (features, samples).
train_idx : numpy.ndarray, dtype=int, default=None
array of indices used for training, if None,
If None, the complement of the ``test_idx`` is used. If ``train_size`` is
also None, 2-fold split is taken.
Array of indices used for training. If ``None``, the complement of the
``test_idx`` is used. If ``train_size`` is also ``None``, 2-fold split is taken.
test_idx : numpy.ndarray, dtype=int, default=None
array of indices used for training, if None,
If None, the complement of the ``train_idx`` is used. If ``test_size`` is
also None, 2-fold split is taken.
scaler : object implementing fit/transfom
Scales the X and Y before computing the reconstruction measure.
The default value scales the features such that the reconstruction
measure on the training set is upper bounded to 1.
estimator : object implementing fit/predict, default=None
Sklearn estimator used to reconstruct features/samples.
Array of indices used for testing. If ``None``, the complement of the
``train_idx`` is used. If ``test_size`` is also ``None``, 2-fold split is taken.
scaler : object implementing fit/transform, default=``StandardFlexibleScaler``
Scales X and Y before computing the reconstruction measure. The default value
scales the features such that the reconstruction measure on the training set is
upper bounded to 1.
estimator : object implementing fit/predict, default=``Ridge2FoldCV``
Sklearn estimator used to reconstruct test features/samples.

Returns
-------
pointwise_global_reconstruction_distortion : ndarray
The global reconstruction distortion for each sample/point
"""
(
train_idx,
test_idx,
scaler,
estimator,
) = check_global_reconstruction_measures_input(
train_idx, test_idx, scaler, estimator = check_global_reconstruction_measures_input(
X, Y, train_idx, test_idx, scaler, estimator
)
X_train, X_test, Y_train, Y_test = (
X[train_idx],
X[test_idx],
Y[train_idx],
Y[test_idx],
)

scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
scaler.fit(Y_train)
Y_train = scaler.transform(Y_train)
Y_test = scaler.transform(Y_test)
X_train, X_test, Y_train, _Y_test = _prepare_data(X, Y, train_idx, test_idx, scaler)

predictions_Y_test = estimator.fit(X_train, Y_train).predict(X_test)
orthogonal_predictions_Y_test = (
Expand Down Expand Up @@ -291,19 +253,17 @@ def global_reconstruction_distortion(
For feature reconstruction of Y using X use input shape (samples, features).
For sample reconstruction of Y using X use input shape (features, samples).
train_idx : numpy.ndarray, dtype=int, default=None
array of indices used for training, if None,
If None, the complement of the ``test_idx`` is used. If ``train_size`` is
also None, 2-fold split is taken.
Array of indices used for training. If ``None``, the complement of the
``test_idx`` is used. If ``train_size`` is also ``None``, 2-fold split is taken.
test_idx : numpy.ndarray, dtype=int, default=None
array of indices used for training, if None,
If None, the complement of the ``train_idx`` is used. If ``test_size`` is
also None, 2-fold split is taken.
scaler : object implementing fit/transfom
Scales the X and Y before computing the reconstruction measure.
The default value scales the features such that the reconstruction
measure on the training set is upper bounded to 1.
estimator : object implementing fit/predict, default=None
Sklearn estimator used to reconstruct features/samples.
Array of indices used for testing. If ``None``, the complement of the
``train_idx`` is used. If ``test_size`` is also ``None``, 2-fold split is taken.
scaler : object implementing fit/transform, default=``StandardFlexibleScaler``
Scales X and Y before computing the reconstruction measure. The default value
scales the features such that the reconstruction measure on the training set is
upper bounded to 1.
estimator : object implementing fit/predict, default=``Ridge2FoldCV``
Sklearn estimator used to reconstruct test features/samples.

Returns
-------
Expand Down Expand Up @@ -373,47 +333,35 @@ def pointwise_local_reconstruction_error(
Number of neighbour points used to compute the local reconstruction weight for
each sample/point.
train_idx : numpy.ndarray, dtype=int, default=None
array of indices used for training, if None,
If None, the complement of the ``test_idx`` is used. If ``train_size`` is
also None, 2-fold split is taken.
Array of indices used for training. If ``None``, the complement of the
``test_idx`` is used. If ``train_size`` is also ``None``, 2-fold split is taken.
test_idx : numpy.ndarray, dtype=int, default=None
array of indices used for training, if None,
If None, the complement of the ``train_idx`` is used. If ``test_size`` is
also None, 2-fold split is taken.
scaler : object implementing fit/transfom
Scales the X and Y before computing the reconstruction measure.
The default value scales the features such that the reconstruction
measure on the training set is upper bounded to 1.
estimator : object implementing fit/predict, default=None
Sklearn estimator used to reconstruct features/samples.
Array of indices used for testing. If ``None``, the complement of the
``train_idx`` is used. If ``test_size`` is also ``None``, 2-fold split is taken.
scaler : object implementing fit/transform, default=``StandardFlexibleScaler``
Scales X and Y before computing the reconstruction measure. The default value
scales the features such that the reconstruction measure on the training set is
upper bounded to 1.
estimator : object implementing fit/predict, default=``Ridge2FoldCV``
Sklearn estimator used to reconstruct test features/samples.
n_jobs : int, default=None
The number of CPUs to use to do the computation.
:obj:`None` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See
`n_jobs glossary from sklearn (external link) <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`_
for more details.

Returns
-------
pointwise_local_reconstruction_error : numpy.ndarray
The local reconstruction error for each sample/point

"""
(
train_idx,
test_idx,
scaler,
estimator,
) = check_local_reconstruction_measures_input(
train_idx, test_idx, scaler, estimator = check_local_reconstruction_measures_input(
X, Y, n_local_points, train_idx, test_idx, scaler, estimator
)
X_train, X_test, Y_train, Y_test = (
X[train_idx],
X[test_idx],
Y[train_idx],
Y[test_idx],
)

scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test).astype(X_train.dtype)
scaler.fit(Y_train)
Y_train = scaler.transform(Y_train)
Y_test = scaler.transform(Y_test)
X_train, X_test, Y_train, Y_test = _prepare_data(X, Y, train_idx, test_idx, scaler)

squared_dist = (
np.sum(X_train**2, axis=1)
Expand Down Expand Up @@ -496,19 +444,23 @@ def local_reconstruction_error(
Number of neighbour points used to compute the local reconstruction weight for
each sample/point.
train_idx : numpy.ndarray, dtype=int, default=None
array of indices used for training, if None,
If None, the complement of the ``test_idx`` is used. If ``train_size`` is
also None, 2-fold split is taken.
Array of indices used for training. If ``None``, the complement of the
``test_idx`` is used. If ``train_size`` is also ``None``, 2-fold split is taken.
test_idx : numpy.ndarray, dtype=int, default=None
array of indices used for training, if None,
If None, the complement of the ``train_idx`` is used. If ``test_size`` is
also None, 2-fold split is taken.
scaler : object implementing fit/transfom
Scales the X and Y before computing the reconstruction measure.
The default value scales the features such that the reconstruction
measure on the training set is upper bounded to 1.
estimator : object implementing fit/predict, default=None
Sklearn estimator used to reconstruct features/samples.
Array of indices used for testing. If ``None``, the complement of the
``train_idx`` is used. If ``test_size`` is also ``None``, 2-fold split is taken.
scaler : object implementing fit/transform, default=``StandardFlexibleScaler``
Scales X and Y before computing the reconstruction measure. The default value
scales the features such that the reconstruction measure on the training set is
upper bounded to 1.
estimator : object implementing fit/predict, default=``Ridge2FoldCV``
Sklearn estimator used to reconstruct test features/samples.
n_jobs : int, default=None
The number of CPUs to use to do the computation.
:obj:`None` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See
`n_jobs glossary from sklearn (external link) <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`_
for more details.

Returns
-------
Expand All @@ -534,7 +486,11 @@ def check_global_reconstruction_measures_input(
X, Y, train_idx, test_idx, scaler, estimator
):
"""Returns default reconstruction measure inputs for all None parameters"""
assert len(X) == len(Y)
if X.shape[0] != Y.shape[0]:
raise ValueError(
f"First dimension of X ({X.shape[0]}) and Y ({Y.shape[0]}) must match"
)

if (train_idx is None) and (test_idx is None):
train_idx, test_idx = train_test_split(
np.arange(len(X)),
Expand Down Expand Up @@ -562,6 +518,7 @@ def check_global_reconstruction_measures_input(
scoring="neg_root_mean_squared_error",
n_jobs=1,
)

return train_idx, test_idx, scaler, estimator


Expand All @@ -570,7 +527,28 @@ def check_local_reconstruction_measures_input(
):
"""Returns default reconstruction measure inputs for all None parameters"""
# only needs to check one extra parameter
assert len(X) >= n_local_points
if len(X) < n_local_points:
raise ValueError(
f"X has {len(X)} samples but n_local_points={n_local_points}. "
"Must have at least n_local_points samples"
)

return check_global_reconstruction_measures_input(
X, Y, train_idx, test_idx, scaler, estimator
)


def _prepare_data(X, Y, train_idx, test_idx, scaler):
"""Split and scale data for reconstruction measures"""
X_train, X_test = X[train_idx], X[test_idx]
Y_train, Y_test = Y[train_idx], Y[test_idx]

scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

scaler.fit(Y_train)
Y_train_scaled = scaler.transform(Y_train)
Y_test_scaled = scaler.transform(Y_test)

return X_train_scaled, X_test_scaled, Y_train_scaled, Y_test_scaled
Loading