Skip to content

Commit a72a6f8

Browse files
support usm_ndarray in onedal.spmd (#1216)
support usm_ndarray in onedal
1 parent 93577ee commit a72a6f8

File tree

13 files changed

+175
-21
lines changed

13 files changed

+175
-21
lines changed

examples/sklearnex/basic_statistics_spmd.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from mpi4py import MPI
1919

2020
from dpctl import SyclQueue
21+
import dpctl.tensor as dpt
2122
from sklearnex.spmd.basic_statistics import BasicStatistics as BasicStatisticsSpmd
2223

2324

@@ -51,11 +52,14 @@ def generate_data(par, size, seed=777):
5152
data, weights = generate_data(params_spmd, size)
5253
weighted_data = np.diag(weights) @ data
5354

55+
dpt_data = dpt.asarray(data, usm_type="device", sycl_queue=q)
56+
dpt_weights = dpt.asarray(weights, usm_type="device", sycl_queue=q)
57+
5458
gtr_mean = np.mean(weighted_data, axis=0)
5559
gtr_std = np.std(weighted_data, axis=0)
5660

5761
bss = BasicStatisticsSpmd(["mean", "standard_deviation"])
58-
res = bss.compute(data, weights, queue=q)
62+
res = bss.compute(dpt_data, dpt_weights)
5963

6064
print(f"Computed mean on rank {rank}:\n", res["mean"])
6165
print(f"Computed std on rank {rank}:\n", res["standard_deviation"])

examples/sklearnex/knn_bf_classification_spmd.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from warnings import warn
2020
from mpi4py import MPI
2121
import dpctl
22+
import dpctl.tensor as dpt
2223
from sklearnex.spmd.neighbors import KNeighborsClassifier
2324

2425

@@ -48,18 +49,23 @@ def generate_X_y(par, seed):
4849
X_train, y_train = generate_X_y(params_train, rank)
4950
X_test, y_test = generate_X_y(params_test, rank + 99)
5051

52+
dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q)
53+
dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q)
54+
dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q)
55+
dpt_y_test = dpt.asarray(y_test, usm_type="device", sycl_queue=q)
56+
5157
model_spmd = KNeighborsClassifier(algorithm='brute',
5258
n_neighbors=20,
5359
weights='uniform',
5460
p=2,
5561
metric='minkowski')
56-
model_spmd.fit(X_train, y_train, queue=q)
62+
model_spmd.fit(dpt_X_train, dpt_y_train)
5763

58-
y_predict = model_spmd.predict(X_test, queue=q)
64+
y_predict = model_spmd.predict(dpt_X_test)
5965

6066
print("Brute Force Distributed kNN classification results:")
6167
print("Ground truth (first 5 observations on rank {}):\n{}".format(rank, y_test[:5]))
6268
print("Classification results (first 5 observations on rank {}):\n{}"
63-
.format(rank, y_predict[:5]))
69+
.format(rank, dpt.to_numpy(y_predict)[:5]))
6470
print("Accuracy for entire rank {} (256 classes): {}\n"
65-
.format(rank, accuracy_score(y_test, y_predict)))
71+
.format(rank, accuracy_score(y_test, dpt.to_numpy(y_predict))))

examples/sklearnex/knn_bf_regression_spmd.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from warnings import warn
2020
from mpi4py import MPI
2121
import dpctl
22+
import dpctl.tensor as dpt
2223
from numpy.testing import assert_allclose
2324
from sklearnex.spmd.neighbors import KNeighborsRegressor
2425

@@ -52,20 +53,25 @@ def generate_X_y(par, coef_seed, data_seed):
5253
X_train, y_train, coef_train = generate_X_y(params_train, 10, rank)
5354
X_test, y_test, coef_test = generate_X_y(params_test, 10, rank + 99)
5455

56+
dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q)
57+
dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q)
58+
dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q)
59+
# dpt_y_test = dpt.asarray(y_test, usm_type="device", sycl_queue=q)
60+
5561
assert_allclose(coef_train, coef_test)
5662

5763
model_spmd = KNeighborsRegressor(algorithm='brute',
5864
n_neighbors=5,
5965
weights='uniform',
6066
p=2,
6167
metric='minkowski')
62-
model_spmd.fit(X_train, y_train, queue=q)
68+
model_spmd.fit(dpt_X_train, dpt_y_train)
6369

64-
y_predict = model_spmd.predict(X_test, queue=q)
70+
y_predict = model_spmd.predict(dpt_X_test)
6571

6672
print("Brute Force Distributed kNN regression results:")
6773
print("Ground truth (first 5 observations on rank {}):\n{}".format(rank, y_test[:5]))
6874
print("Regression results (first 5 observations on rank {}):\n{}"
69-
.format(rank, y_predict[:5]))
75+
.format(rank, dpt.to_numpy(y_predict)[:5]))
7076
print("RMSE for entire rank {}: {}\n"
71-
.format(rank, mean_squared_error(y_test, y_predict, squared=False)))
77+
.format(rank, mean_squared_error(y_test, dpt.to_numpy(y_predict), squared=False)))

examples/sklearnex/linear_regression_spmd.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from mpi4py import MPI
2121
from dpctl import SyclQueue
22+
import dpctl.tensor as dpt
2223
from sklearnex.spmd.linear_model import LinearRegression
2324

2425

@@ -56,13 +57,17 @@ def get_test_data(rank):
5657

5758
queue = SyclQueue("gpu")
5859

59-
model = LinearRegression().fit(X, y, queue)
60+
dpt_X = dpt.asarray(X, usm_type="device", sycl_queue=queue)
61+
dpt_y = dpt.asarray(y, usm_type="device", sycl_queue=queue)
62+
63+
model = LinearRegression().fit(dpt_X, dpt_y)
6064

6165
print(f"Coefficients on rank {rank}:\n", model.coef_)
6266
print(f"Intercept on rank {rank}:\n", model.intercept_)
6367

6468
X_test, _ = get_test_data(rank)
69+
dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=queue)
6570

66-
result = model.predict(X_test, queue)
71+
result = model.predict(dpt_X_test)
6772

68-
print(f"Result on rank {rank}:\n", result)
73+
print(f"Result on rank {rank}:\n", dpt.to_numpy(result))

examples/sklearnex/pca_spmd.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import numpy as np
1818
from mpi4py import MPI
1919
import dpctl
20+
import dpctl.tensor as dpt
2021
from sklearnex.spmd.decomposition import PCA
2122

2223

@@ -33,8 +34,9 @@ def get_data(data_seed):
3334
size = comm.Get_size()
3435

3536
X = get_data(rank)
37+
dpt_X = dpt.asarray(X, usm_type="device", sycl_queue=q)
3638

37-
pca = PCA(n_components=2).fit(X, q)
39+
pca = PCA(n_components=2).fit(dpt_X)
3840

3941
print(f"Singular values on rank {rank}:\n", pca.singular_values_)
4042
print(f"Explained variance Ratio on rank {rank}:\n", pca.explained_variance_ratio_)

examples/sklearnex/random_forest_classifier_spmd.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ def generate_X_y(par, seed):
5353
dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q)
5454
dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q)
5555
dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q)
56-
dpt_y_test = dpt.asarray(y_test, usm_type="device", sycl_queue=q)
5756

5857
rf = RandomForestClassifier(max_depth=2, random_state=0).fit(dpt_X_train, dpt_y_train)
5958

examples/sklearnex/random_forest_regressor_spmd.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ def generate_X_y(par, coef_seed, data_seed):
5959
dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q)
6060
dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q)
6161
dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q)
62-
# dpt_y_test = dpt.asarray(y_test, usm_type="device", sycl_queue=q)
6362

6463
rf = RandomForestRegressor(max_depth=2, random_state=0).fit(dpt_X_train, dpt_y_train)
6564

onedal/_device_offload.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#===============================================================================
2+
# Copyright 2023 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#===============================================================================
16+
17+
from functools import wraps
18+
19+
try:
20+
from sklearnex._device_offload import (_get_global_queue,
21+
_transfer_to_host,
22+
_copy_to_usm)
23+
_sklearnex_available = True
24+
except ImportError:
25+
import logging
26+
logging.warning('Device support requires '
27+
'Intel(R) Extension for Scikit-learn*.')
28+
_sklearnex_available = False
29+
30+
31+
def _get_host_inputs(*args, **kwargs):
32+
q = _get_global_queue()
33+
q, hostargs = _transfer_to_host(q, *args)
34+
q, hostvalues = _transfer_to_host(q, *kwargs.values())
35+
hostkwargs = dict(zip(kwargs.keys(), hostvalues))
36+
return q, hostargs, hostkwargs
37+
38+
39+
def _extract_usm_iface(*args, **kwargs):
40+
allargs = (*args, *kwargs.values())
41+
if len(allargs) == 0:
42+
return None
43+
return getattr(allargs[0],
44+
'__sycl_usm_array_interface__',
45+
None)
46+
47+
48+
def _run_on_device(func, obj=None, *args, **kwargs):
49+
if obj is not None:
50+
return func(obj, *args, **kwargs)
51+
return func(*args, **kwargs)
52+
53+
54+
def support_usm_ndarray(freefunc=False):
55+
def decorator(func):
56+
def wrapper_impl(obj, *args, **kwargs):
57+
if _sklearnex_available:
58+
usm_iface = _extract_usm_iface(*args, **kwargs)
59+
data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs)
60+
hostkwargs['queue'] = data_queue
61+
result = _run_on_device(func, obj, *hostargs, **hostkwargs)
62+
if usm_iface is not None and hasattr(result, '__array_interface__'):
63+
return _copy_to_usm(data_queue, result)
64+
return result
65+
return _run_on_device(func, obj, *args, **kwargs)
66+
67+
if freefunc:
68+
@wraps(func)
69+
def wrapper_free(*args, **kwargs):
70+
return wrapper_impl(None, *args, **kwargs)
71+
return wrapper_free
72+
73+
@wraps(func)
74+
def wrapper_with_self(self, *args, **kwargs):
75+
return wrapper_impl(self, *args, **kwargs)
76+
return wrapper_with_self
77+
return decorator

onedal/spmd/basic_statistics/basic_statistics.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from abc import ABC
1818
from ...common._spmd_policy import _get_spmd_policy
19+
from ..._device_offload import support_usm_ndarray
1920
from onedal.basic_statistics import BasicStatistics as BasicStatistics_Batch
2021

2122

@@ -25,4 +26,7 @@ def _get_policy(self, queue, *data):
2526

2627

2728
class BasicStatistics(BaseBasicStatisticsSPMD, BasicStatistics_Batch):
28-
pass
29+
30+
@support_usm_ndarray()
31+
def compute(self, data, weights=None, queue=None):
32+
return super().compute(data, weights, queue)

onedal/spmd/decomposition/pca.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717

1818
from ...common._spmd_policy import _get_spmd_policy
19+
from ..._device_offload import support_usm_ndarray
1920
from onedal.decomposition.pca import PCA as PCABatch
2021

2122

@@ -25,4 +26,11 @@ def _get_policy(self, queue, *data):
2526

2627

2728
class PCA(BasePCASPMD, PCABatch):
28-
pass
29+
30+
@support_usm_ndarray()
31+
def fit(self, X, queue):
32+
return super().fit(X, queue)
33+
34+
@support_usm_ndarray()
35+
def predict(self, X, queue):
36+
return super().predict(X, queue)

0 commit comments

Comments
 (0)