Skip to content

Commit 2405630

Browse files
jmhatre-workjui.mhatreAlexsandrusssamir-nasibli
authored
PCA SPMD python interfaces (#1211)
* Initial pca spmd changes * pca spmd example * flake8 fixes * simplifying examples * flake8 fixes * flake8 changes in examples * fixing pca call from sklearnex than onedal * removing y from fit params in pca * y remove from examples * flake8 changes to examples * flake8 changes after pull * Fixes for PR comments on examples and class names * Update runexamples with pca_spmd * Update onedal/__init__.py Co-authored-by: Samir Nasibli <[email protected]> * Update setup.py Co-authored-by: Samir Nasibli <[email protected]> * Add featues and samples in example --------- Co-authored-by: jui.mhatre <[email protected]> Co-authored-by: Alexander Andreev <[email protected]> Co-authored-by: Samir Nasibli <[email protected]>
1 parent 5bb4c31 commit 2405630

File tree

14 files changed

+157
-16
lines changed

14 files changed

+157
-16
lines changed

examples/sklearnex/pca_spmd.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# ===============================================================================
2+
# Copyright 2023 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# ===============================================================================
16+
17+
import numpy as np
18+
from mpi4py import MPI
19+
import dpctl
20+
from sklearnex.spmd.decomposition import PCA
21+
22+
23+
def get_data(data_seed):
24+
ns, nf = 300, 30
25+
drng = np.random.default_rng(data_seed)
26+
X = drng.random(size=(ns, nf))
27+
return X
28+
29+
30+
q = dpctl.SyclQueue("gpu")
31+
comm = MPI.COMM_WORLD
32+
rank = comm.Get_rank()
33+
size = comm.Get_size()
34+
35+
X = get_data(rank)
36+
37+
pca = PCA(n_components=2).fit(X, q)
38+
39+
print(f"Singular values on rank {rank}:\n", pca.singular_values_)
40+
print(f"Explained variance Ratio on rank {rank}:\n", pca.explained_variance_ratio_)

onedal/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,4 @@
4949
__all__ += ['basic_statistics', 'linear_model']
5050

5151
if _is_dpc_backend:
52-
__all__ += ['spmd.basic_statistics', 'spmd.linear_model']
52+
__all__ += ['spmd.basic_statistics', 'spmd.decomposition', 'spmd.linear_model',]

onedal/decomposition/pca.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,12 @@ ONEDAL_PY_INIT_MODULE(decomposition) {
136136

137137
using task_list = types<task::dim_reduction>;
138138
auto sub = m.def_submodule("decomposition");
139-
140-
ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list, task_list);
139+
#ifdef ONEDAL_DATA_PARALLEL_SPMD
140+
ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list_spmd, task_list);
141+
#else
142+
ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list, task_list);
143+
#endif
141144
ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list, task_list);
142-
143145
ONEDAL_PY_INSTANTIATE(init_model, sub, task_list);
144146
ONEDAL_PY_INSTANTIATE(init_train_result, sub, task_list);
145147
ONEDAL_PY_INSTANTIATE(init_infer_result, sub, task_list);

onedal/decomposition/pca.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818

1919
from onedal import _backend
2020
from ..common._policy import _get_policy
21-
from ..datatypes._data_conversion import from_table, to_table, _convert_to_supported
21+
from ..datatypes._data_conversion import from_table, to_table
22+
from ..datatypes import _convert_to_supported
2223
from daal4py.sklearn._utils import sklearn_check_version
2324

2425

@@ -43,17 +44,20 @@ def get_onedal_params(self, data):
4344
'is_deterministic': self.is_deterministic
4445
}
4546

46-
def fit(self, X, y, queue):
47+
def _get_policy(self, queue, *data):
48+
return _get_policy(queue, *data)
49+
50+
def fit(self, X, queue):
4751
n_samples, n_features = X.shape
4852
n_sf_min = min(n_samples, n_features)
4953

50-
policy = _get_policy(queue, X, y)
51-
54+
policy = self._get_policy(queue, X)
5255
# TODO: investigate why np.ndarray with OWNDATA=FALSE flag
5356
# fails to be converted to oneDAL table
5457
if isinstance(X, np.ndarray) and not X.flags['OWNDATA']:
5558
X = X.copy()
56-
X, y = _convert_to_supported(policy, X, y)
59+
X = _convert_to_supported(policy, X)
60+
5761
params = self.get_onedal_params(X)
5862
cov_result = _backend.covariance.compute(
5963
policy,
@@ -99,10 +103,11 @@ def fit(self, X, y, queue):
99103
def _create_model(self):
100104
m = _backend.decomposition.dim_reduction.model()
101105
m.eigenvectors = to_table(self.components_)
106+
self._onedal_model = m
102107
return m
103108

104109
def predict(self, X, queue):
105-
policy = _get_policy(queue, X)
110+
policy = self._get_policy(queue, X)
106111
model = self._create_model()
107112

108113
X = _convert_to_supported(policy, X)

onedal/primitives/covariance.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,11 @@ ONEDAL_PY_INIT_MODULE(covariance) {
7878
using namespace dal::covariance;
7979

8080
auto sub = m.def_submodule("covariance");
81-
ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task::compute);
81+
#ifdef ONEDAL_DATA_PARALLEL_SPMD
82+
ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list_spmd, task::compute);
83+
#else
84+
ONEDAL_PY_INSTANTIATE(init_compute_ops, sub, policy_list, task::compute);
85+
#endif
8286
ONEDAL_PY_INSTANTIATE(init_compute_result, sub, task::compute);
8387
}
8488

onedal/spmd/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@
1414
# limitations under the License.
1515
#===============================================================================
1616

17-
__all__ = ['linear_model', 'basic_statistics']
17+
__all__ = ['basic_statistics', 'decomposition', 'linear_model']
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#===============================================================================
2+
# Copyright 2023 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#===============================================================================
16+
17+
from .pca import PCA
18+
19+
__all__ = ['PCA']

onedal/spmd/decomposition/pca.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#===============================================================================
2+
# Copyright 2023 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#===============================================================================
16+
17+
18+
from ...common._spmd_policy import _get_spmd_policy
19+
from onedal.decomposition.pca import PCA as PCABatch
20+
21+
22+
class BasePCASPMD:
23+
def _get_policy(self, queue, *data):
24+
return _get_spmd_policy(queue)
25+
26+
27+
class PCA(BasePCASPMD, PCABatch):
28+
pass

setup.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -475,8 +475,9 @@ def run(self):
475475
] if ONEDAL_VERSION >= 20230100 else []
476476
) + (
477477
['onedal.spmd',
478-
'onedal.spmd.linear_model',
479-
'onedal.spmd.basic_statistics'
478+
'onedal.spmd.basic_statistics',
479+
'onedal.spmd.decomposition',
480+
'onedal.spmd.linear_model'
480481
] if build_distribute else [])),
481482
package_data={
482483
'daal4py.oneapi': [

sklearnex/preview/decomposition/pca.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ def _onedal_fit(self, X, y=None, queue=None):
214214
'method': "precomputed",
215215
}
216216
self._onedal_estimator = onedal_PCA(**onedal_params)
217-
self._onedal_estimator.fit(X, y, queue=queue)
217+
self._onedal_estimator.fit(X, queue=queue)
218218
self._save_attributes()
219219

220220
U = None

0 commit comments

Comments
 (0)