diff --git a/onedal/decomposition/pca.cpp b/onedal/decomposition/pca.cpp index 49226f3265..11b48c9d8a 100644 --- a/onedal/decomposition/pca.cpp +++ b/onedal/decomposition/pca.cpp @@ -112,6 +112,10 @@ void init_train_result(py::module_& m) { .def_property_readonly("explained_variances_ratio", &result_t::get_explained_variances_ratio) #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION>=20240100 +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250100 + .def_property_readonly("noise_variance", + &result_t::get_noise_variance) +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION>=20250100 .def_property_readonly("means", &result_t::get_means) .def_property_readonly("variances", &result_t::get_variances); } diff --git a/onedal/decomposition/pca.py b/onedal/decomposition/pca.py index aa251489a8..d46ea9c651 100644 --- a/onedal/decomposition/pca.py +++ b/onedal/decomposition/pca.py @@ -104,22 +104,6 @@ def _resolve_n_components_for_result(self, shape_tuple): else: return self.n_components - def _compute_noise_variance(self, n_components, n_sf_min): - if n_components < n_sf_min: - if len(self.explained_variance_) == n_sf_min: - return self.explained_variance_[n_components:].mean() - elif len(self.explained_variance_) < n_sf_min: - # TODO Rename variances_ to var_ to align with sklearn/sklearnex IncrementalPCA - if hasattr(self, "variances_"): - resid_var = self.variances_.sum() - elif hasattr(self, "var_"): - resid_var = self.var_.sum() - - resid_var -= self.explained_variance_.sum() - return resid_var / (n_sf_min - n_components) - else: - return 0.0 - def _create_model(self): # Not supported with spmd policy so BasePCA must be specified m = BasePCA._get_backend(BasePCA, "decomposition", "dim_reduction", "model") @@ -185,6 +169,7 @@ def fit(self, X, y=None, queue=None): self.explained_variance_ratio_ = from_table( result.explained_variances_ratio ).ravel() + self.noise_variance_ = from_table(result.noise_variance_).ravel() self.n_samples_ = n_samples self.n_features_ = n_features @@ -194,7 +179,6 @@ def fit(self, X, y=None, queue=None): n_components = self._resolve_n_components_for_result(X.shape) self.n_components_ = n_components - self.noise_variance_ = self._compute_noise_variance(n_components, n_sf_min) if n_components < params["n_components"]: self.explained_variance_ = self.explained_variance_[:n_components] diff --git a/onedal/decomposition/tests/test_incremental_pca.py b/onedal/decomposition/tests/test_incremental_pca.py index 94a0b1acc6..95701c610e 100644 --- a/onedal/decomposition/tests/test_incremental_pca.py +++ b/onedal/decomposition/tests/test_incremental_pca.py @@ -183,8 +183,8 @@ def test_on_random_data( if len(sorted_eigenvalues) > n_components else 0.0 ) - # TODO Fix noise variance computation (It is necessary to update C++ side) - # assert np.abs(incpca.noise_variance_ - expected_noise_variance) < tol + + assert np.abs(incpca.noise_variance_ - expected_noise_variance) < tol expected_transformed_data = centered_data @ components.T if whiten: diff --git a/sklearnex/preview/decomposition/tests/test_incremental_pca.py b/sklearnex/preview/decomposition/tests/test_incremental_pca.py index c4c47c8adb..f34c6c93c2 100644 --- a/sklearnex/preview/decomposition/tests/test_incremental_pca.py +++ b/sklearnex/preview/decomposition/tests/test_incremental_pca.py @@ -167,8 +167,8 @@ def check_pca(incpca, dtype, whiten, data, transformed_data): if len(sorted_eigenvalues) > n_components else 0.0 ) - # TODO Fix noise variance computation (It is necessary to update C++ side) - # assert np.abs(incpca.noise_variance_ - expected_noise_variance) < tol + + assert np.abs(incpca.noise_variance_ - expected_noise_variance) < tol expected_transformed_data = centered_data @ components.T if whiten: