Skip to content

Commit e887d99

Browse files
SPMD random forest (#1214)
SPMD random forest
1 parent 94b9db8 commit e887d99

File tree

13 files changed

+397
-75
lines changed

13 files changed

+397
-75
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#===============================================================================
2+
# Copyright 2023 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#===============================================================================
16+
17+
# sklearnex RF example for distributed systems; SPMD mode
18+
# run like this:
19+
# mpirun -n 4 python ./random_forest_classifier_spmd.py
20+
21+
import dpctl
22+
import dpctl.tensor as dpt
23+
24+
import numpy as np
25+
26+
from mpi4py import MPI
27+
28+
from sklearnex.spmd.ensemble import RandomForestClassifier
29+
30+
31+
def generate_X_y(par, seed):
32+
ns, nf = par['ns'], par['nf']
33+
34+
drng = np.random.default_rng(seed)
35+
data = drng.uniform(-1, 1, size=(ns, nf))
36+
resp = (data > 0) @ (2 ** np.arange(nf))
37+
38+
return data, resp
39+
40+
41+
params_train = {'ns': 10000, 'nf': 8}
42+
params_test = {'ns': 100, 'nf': 8}
43+
44+
comm = MPI.COMM_WORLD
45+
mpi_size = comm.Get_size()
46+
mpi_rank = comm.Get_rank()
47+
48+
X_train, y_train = generate_X_y(params_train, mpi_rank)
49+
X_test, y_test = generate_X_y(params_test, mpi_rank + 777)
50+
51+
q = dpctl.SyclQueue("gpu") # GPU
52+
53+
dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q)
54+
dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q)
55+
dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q)
56+
dpt_y_test = dpt.asarray(y_test, usm_type="device", sycl_queue=q)
57+
58+
rf = RandomForestClassifier(max_depth=2, random_state=0).fit(dpt_X_train, dpt_y_train)
59+
60+
pred = rf.predict(dpt_X_test)
61+
62+
print("Random Forest classification results:")
63+
print("Ground truth (first 5 observations on rank {}):\n{}".format(mpi_rank, y_test[:5]))
64+
print("Classification results (first 5 observations on rank {}):\n{}"
65+
.format(mpi_rank, dpt.to_numpy(pred)[:5]))
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#===============================================================================
2+
# Copyright 2023 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#===============================================================================
16+
17+
# sklearnex RF example for distributed systems; SPMD mode
18+
# run like this:
19+
# mpirun -n 4 python ./random_forest_regressor_spmd.py
20+
21+
import numpy as np
22+
23+
import dpctl
24+
import dpctl.tensor as dpt
25+
26+
from mpi4py import MPI
27+
from sklearnex.spmd.ensemble import RandomForestRegressor
28+
29+
from numpy.testing import assert_allclose
30+
31+
32+
def generate_X_y(par, coef_seed, data_seed):
33+
ns, nf = par['ns'], par['nf']
34+
35+
crng = np.random.default_rng(coef_seed)
36+
coef = crng.uniform(-10, 10, size=(nf,))
37+
38+
drng = np.random.default_rng(data_seed)
39+
data = drng.uniform(-100, 100, size=(ns, nf))
40+
resp = data @ coef
41+
42+
return data, resp, coef
43+
44+
45+
comm = MPI.COMM_WORLD
46+
mpi_size = comm.Get_size()
47+
mpi_rank = comm.Get_rank()
48+
49+
params_train = {'ns': 10000, 'nf': 3}
50+
params_test = {'ns': 100, 'nf': 3}
51+
52+
X_train, y_train, coef_train = generate_X_y(params_train, 10, mpi_rank)
53+
X_test, y_test, coef_test = generate_X_y(params_test, 10, mpi_rank + 99)
54+
55+
assert_allclose(coef_train, coef_test)
56+
57+
q = dpctl.SyclQueue("gpu") # GPU
58+
59+
dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q)
60+
dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q)
61+
dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q)
62+
# dpt_y_test = dpt.asarray(y_test, usm_type="device", sycl_queue=q)
63+
64+
rf = RandomForestRegressor(max_depth=2, random_state=0).fit(dpt_X_train, dpt_y_train)
65+
66+
y_predict = rf.predict(dpt_X_test)
67+
68+
print("Ground truth (first 5 observations on rank {}):\n{}".format(mpi_rank, y_test[:5]))
69+
print("Regression results (first 5 observations on rank {}):\n{}"
70+
.format(mpi_rank, dpt.to_numpy(y_predict)[:5]))

onedal/ensemble/forest.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,13 @@ ONEDAL_PY_INIT_MODULE(ensemble) {
293293
using task_list = types<task::classification, task::regression>;
294294
auto sub = m.def_submodule("decision_forest");
295295

296+
#ifdef ONEDAL_DATA_PARALLEL_SPMD
297+
ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list_spmd, task_list);
298+
ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list_spmd, task_list);
299+
#else // ONEDAL_DATA_PARALLEL_SPMD
296300
ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list, task_list);
297301
ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list, task_list);
302+
#endif // ONEDAL_DATA_PARALLEL_SPMD
298303

299304
ONEDAL_PY_INSTANTIATE(init_model, sub, task_list);
300305
ONEDAL_PY_INSTANTIATE(init_train_result, sub, task_list);

onedal/spmd/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@
1414
# limitations under the License.
1515
#===============================================================================
1616

17-
__all__ = ['basic_statistics', 'decomposition', 'linear_model', 'neighbors']
17+
__all__ = ['basic_statistics', 'decomposition', 'ensemble', 'linear_model', 'neighbors']

onedal/spmd/ensemble/__init__.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#===============================================================================
2+
# Copyright 2023 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#===============================================================================
16+
17+
from .forest import RandomForestClassifier, RandomForestRegressor
18+
19+
__all__ = ['RandomForestClassifier', 'RandomForestRegressor']

onedal/spmd/ensemble/forest.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#===============================================================================
2+
# Copyright 2023 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#===============================================================================
16+
17+
from abc import ABC
18+
19+
from ...common._spmd_policy import _get_spmd_policy
20+
21+
from onedal.ensemble import RandomForestClassifier as RandomForestClassifier_Batch
22+
from onedal.ensemble import RandomForestRegressor as RandomForestRegressor_Batch
23+
24+
25+
class BaseForestSPMD(ABC):
26+
def _get_policy(self, queue, *data):
27+
return _get_spmd_policy(queue)
28+
29+
30+
class RandomForestClassifier(BaseForestSPMD, RandomForestClassifier_Batch):
31+
pass
32+
33+
34+
class RandomForestRegressor(BaseForestSPMD, RandomForestRegressor_Batch):
35+
pass

setup.py

Lines changed: 41 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,46 @@ def run(self):
401401
with open('README.md', 'r', encoding='utf8') as f:
402402
long_description = f.read()
403403

404+
packages_with_tests = [
405+
'daal4py',
406+
'daal4py.oneapi',
407+
'daal4py.sklearn',
408+
'daal4py.sklearn.cluster',
409+
'daal4py.sklearn.decomposition',
410+
'daal4py.sklearn.ensemble',
411+
'daal4py.sklearn.linear_model',
412+
'daal4py.sklearn.manifold',
413+
'daal4py.sklearn.metrics',
414+
'daal4py.sklearn.neighbors',
415+
'daal4py.sklearn.monkeypatch',
416+
'daal4py.sklearn.svm',
417+
'daal4py.sklearn.utils',
418+
'daal4py.sklearn.model_selection',
419+
'onedal',
420+
'onedal.common',
421+
'onedal.datatypes',
422+
'onedal.decomposition',
423+
'onedal.ensemble',
424+
'onedal.neighbors',
425+
'onedal.primitives',
426+
'onedal.svm']
427+
428+
if ONEDAL_VERSION >= 20230100:
429+
packages_with_tests += [
430+
'onedal.basic_statistics',
431+
'onedal.linear_model']
432+
433+
if build_distribute:
434+
packages_with_tests += [
435+
'onedal.spmd',
436+
'onedal.spmd.decomposition',
437+
'onedal.spmd.ensemble']
438+
if ONEDAL_VERSION >= 20230100:
439+
packages_with_tests += [
440+
'onedal.spmd.basic_statistics',
441+
'onedal.spmd.linear_model',
442+
'onedal.spmd.neighbors']
443+
404444
setup(
405445
name="daal4py",
406446
description="A convenient Python API to Intel(R) oneAPI Data Analytics Library",
@@ -447,40 +487,7 @@ def run(self):
447487
'data science',
448488
'data analytics'
449489
],
450-
packages=get_packages_with_tests([
451-
'daal4py',
452-
'daal4py.oneapi',
453-
'daal4py.sklearn',
454-
'daal4py.sklearn.cluster',
455-
'daal4py.sklearn.decomposition',
456-
'daal4py.sklearn.ensemble',
457-
'daal4py.sklearn.linear_model',
458-
'daal4py.sklearn.manifold',
459-
'daal4py.sklearn.metrics',
460-
'daal4py.sklearn.neighbors',
461-
'daal4py.sklearn.monkeypatch',
462-
'daal4py.sklearn.svm',
463-
'daal4py.sklearn.utils',
464-
'daal4py.sklearn.model_selection',
465-
'onedal',
466-
'onedal.ensemble',
467-
'onedal.decomposition',
468-
'onedal.svm',
469-
'onedal.neighbors',
470-
'onedal.primitives',
471-
'onedal.datatypes',
472-
'onedal.common'
473-
] + (['onedal.basic_statistics',
474-
'onedal.linear_model'
475-
] if ONEDAL_VERSION >= 20230100 else []
476-
) + (
477-
['onedal.spmd',
478-
'onedal.spmd.basic_statistics',
479-
'onedal.spmd.decomposition',
480-
'onedal.spmd.linear_model'
481-
] + (['onedal.spmd.neighbors']
482-
if ONEDAL_VERSION >= 20230100 else [])
483-
if build_distribute else [])),
490+
packages=get_packages_with_tests(packages_with_tests),
484491
package_data={
485492
'daal4py.oneapi': [
486493
'liboneapi_backend.so',

setup_sklearnex.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,30 @@
6868
with open("README.md", "r", encoding="utf8") as f:
6969
long_description = f.read()
7070

71+
packages_with_tests = [
72+
"sklearnex",
73+
'sklearnex.cluster',
74+
'sklearnex.decomposition',
75+
'sklearnex.ensemble',
76+
'sklearnex.glob',
77+
'sklearnex.linear_model',
78+
'sklearnex.manifold',
79+
'sklearnex.metrics',
80+
'sklearnex.model_selection',
81+
'sklearnex.neighbors',
82+
'sklearnex.preview',
83+
'sklearnex.preview.decomposition',
84+
'sklearnex.preview.ensemble',
85+
'sklearnex.preview.linear_model',
86+
'sklearnex.svm',
87+
'sklearnex.utils']
88+
89+
if build_distribute:
90+
packages_with_tests += [
91+
'sklearnex.spmd',
92+
'sklearnex.spmd.ensemble',
93+
'sklearnex.spmd.linear_model']
94+
7195
# sklearnex setup
7296
setup(name="scikit-learn-intelex",
7397
description="Intel(R) Extension for Scikit-learn is a "
@@ -112,22 +136,5 @@
112136
"data science",
113137
"data analytics",
114138
],
115-
packages=get_packages_with_tests([
116-
"sklearnex",
117-
'sklearnex.cluster',
118-
'sklearnex.decomposition',
119-
'sklearnex.ensemble',
120-
'sklearnex.glob',
121-
'sklearnex.linear_model',
122-
'sklearnex.manifold',
123-
'sklearnex.metrics',
124-
'sklearnex.model_selection',
125-
'sklearnex.neighbors',
126-
'sklearnex.preview',
127-
'sklearnex.preview.ensemble',
128-
'sklearnex.preview.decomposition',
129-
'sklearnex.preview.linear_model',
130-
'sklearnex.svm',
131-
'sklearnex.utils'
132-
] + (['sklearnex.spmd'] if build_distribute else [])),
139+
packages=get_packages_with_tests(packages_with_tests),
133140
)

0 commit comments

Comments
 (0)