From d9951f684c93b2fc85def817c66dee1ef5f63454 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Fri, 5 Dec 2025 17:28:36 +0100 Subject: [PATCH 1/4] clarification about sparse handling in tsne --- daal4py/sklearn/manifold/_t_sne.py | 6 ------ doc/sources/algorithms.rst | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/daal4py/sklearn/manifold/_t_sne.py b/daal4py/sklearn/manifold/_t_sne.py index 88d964e1fa..d50827b57a 100755 --- a/daal4py/sklearn/manifold/_t_sne.py +++ b/daal4py/sklearn/manifold/_t_sne.py @@ -147,12 +147,6 @@ def _fit(self, X, skip_num_points=0): daal_check_version((2021, "P", 600)), "oneDAL version is lower than 2021.6.", ), - ( - not ( - isinstance(self.init, str) and self.init == "pca" and issparse(X) - ), - "PCA initialization is not supported with sparse input matrices.", - ), # Note: these conditions below should result in errors, but stock scikit-learn # does not check for errors at this exact point. Hence, this offloads the erroring # out to the base class, wherever in the process they might be encountered. diff --git a/doc/sources/algorithms.rst b/doc/sources/algorithms.rst index d287ec56f8..ea3b33718e 100755 --- a/doc/sources/algorithms.rst +++ b/doc/sources/algorithms.rst @@ -196,7 +196,7 @@ Dimensionality Reduction - ``method`` != ``"barnes_hut"`` Refer to :ref:`TSNE acceleration details ` to learn more. - - Sparse data with ``init`` = ``"pca"`` is not supported + - Sparse data is not supported for the initialization and distance calculation stages. Nearest Neighbors ***************** From c7c27f24a7537d2a49afe3dbd98b204865601666 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Mon, 8 Dec 2025 10:13:02 +0100 Subject: [PATCH 2/4] check for older versions --- daal4py/sklearn/manifold/_t_sne.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/daal4py/sklearn/manifold/_t_sne.py b/daal4py/sklearn/manifold/_t_sne.py index d50827b57a..379d432a5d 100755 --- a/daal4py/sklearn/manifold/_t_sne.py +++ b/daal4py/sklearn/manifold/_t_sne.py @@ -147,6 +147,16 @@ def _fit(self, X, skip_num_points=0): daal_check_version((2021, "P", 600)), "oneDAL version is lower than 2021.6.", ), + # Scikit-learn didn't support sparse PCA initialization before 1.8. + # This nevertheless offloads it to sklearn because it produces a different + # error message than what would be throws by simply passing the input to PCA. + ( + not sklearn_check_version("1.8") + and isinstance(self.init, str) + and self.init == "pca" + and issparse(X), + "PCA initialization is not supported with sparse input matrices.", + ), # Note: these conditions below should result in errors, but stock scikit-learn # does not check for errors at this exact point. Hence, this offloads the erroring # out to the base class, wherever in the process they might be encountered. From efd30f79cb60d8d54163357949707989d5b4bc69 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Mon, 8 Dec 2025 11:29:38 +0100 Subject: [PATCH 3/4] fix --- daal4py/sklearn/manifold/_t_sne.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/daal4py/sklearn/manifold/_t_sne.py b/daal4py/sklearn/manifold/_t_sne.py index 379d432a5d..c479ae4ca7 100755 --- a/daal4py/sklearn/manifold/_t_sne.py +++ b/daal4py/sklearn/manifold/_t_sne.py @@ -151,10 +151,10 @@ def _fit(self, X, skip_num_points=0): # This nevertheless offloads it to sklearn because it produces a different # error message than what would be throws by simply passing the input to PCA. ( - not sklearn_check_version("1.8") - and isinstance(self.init, str) - and self.init == "pca" - and issparse(X), + sklearn_check_version("1.8") + or not ( + isinstance(self.init, str) and self.init == "pca" and issparse(X) + ), "PCA initialization is not supported with sparse input matrices.", ), # Note: these conditions below should result in errors, but stock scikit-learn From 18a1634f3854f1c42ebfae6d70dde59c0d7f9397 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Mon, 8 Dec 2025 11:30:18 +0100 Subject: [PATCH 4/4] better message --- daal4py/sklearn/manifold/_t_sne.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daal4py/sklearn/manifold/_t_sne.py b/daal4py/sklearn/manifold/_t_sne.py index c479ae4ca7..1971f01fc9 100755 --- a/daal4py/sklearn/manifold/_t_sne.py +++ b/daal4py/sklearn/manifold/_t_sne.py @@ -155,7 +155,7 @@ def _fit(self, X, skip_num_points=0): or not ( isinstance(self.init, str) and self.init == "pca" and issparse(X) ), - "PCA initialization is not supported with sparse input matrices.", + "PCA initialization is not supported with sparse input matrices before scikit-learn 1.8.", ), # Note: these conditions below should result in errors, but stock scikit-learn # does not check for errors at this exact point. Hence, this offloads the erroring