uxlfoundation
diff --git a/‎examples/sklearnex/random_forest_classifier_spmd.py‎
Lines changed: 65 additions & 0 deletions b/‎examples/sklearnex/random_forest_classifier_spmd.py‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎examples/sklearnex/random_forest_regressor_spmd.py‎
Lines changed: 70 additions & 0 deletions b/‎examples/sklearnex/random_forest_regressor_spmd.py‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎onedal/ensemble/forest.cpp‎
Lines changed: 5 additions & 0 deletions b/‎onedal/ensemble/forest.cpp‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎onedal/spmd/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎onedal/spmd/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎onedal/spmd/ensemble/__init__.py‎
Lines changed: 19 additions & 0 deletions b/‎onedal/spmd/ensemble/__init__.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎onedal/spmd/ensemble/forest.py‎
Lines changed: 35 additions & 0 deletions b/‎onedal/spmd/ensemble/forest.py‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎setup.py‎
Lines changed: 41 additions & 34 deletions b/‎setup.py‎
Lines changed: 41 additions & 34 deletions
diff --git a/‎setup_sklearnex.py‎
Lines changed: 25 additions & 18 deletions b/‎setup_sklearnex.py‎
Lines changed: 25 additions & 18 deletions
@@ -0,0 +1,65 @@
+#===============================================================================
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+# sklearnex RF example for distributed systems; SPMD mode
+# run like this:
+#    mpirun -n 4 python ./random_forest_classifier_spmd.py
+
+import dpctl
+import dpctl.tensor as dpt
+
+import numpy as np
+
+from mpi4py import MPI
+
+from sklearnex.spmd.ensemble import RandomForestClassifier
+
+
+def generate_X_y(par, seed):
+    ns, nf = par['ns'], par['nf']
+
+    drng = np.random.default_rng(seed)
+    data = drng.uniform(-1, 1, size=(ns, nf))
+    resp = (data > 0) @ (2 ** np.arange(nf))
+
+    return data, resp
+
+
+params_train = {'ns': 10000, 'nf': 8}
+params_test = {'ns': 100, 'nf': 8}
+
+comm = MPI.COMM_WORLD
+mpi_size = comm.Get_size()
+mpi_rank = comm.Get_rank()
+
+X_train, y_train = generate_X_y(params_train, mpi_rank)
+X_test, y_test = generate_X_y(params_test, mpi_rank + 777)
+
+q = dpctl.SyclQueue("gpu")  # GPU
+
+dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q)
+dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q)
+dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q)
+dpt_y_test = dpt.asarray(y_test, usm_type="device", sycl_queue=q)
+
+rf = RandomForestClassifier(max_depth=2, random_state=0).fit(dpt_X_train, dpt_y_train)
+
+pred = rf.predict(dpt_X_test)
+
+print("Random Forest classification results:")
+print("Ground truth (first 5 observations on rank {}):\n{}".format(mpi_rank, y_test[:5]))
+print("Classification results (first 5 observations on rank {}):\n{}"
+      .format(mpi_rank, dpt.to_numpy(pred)[:5]))
@@ -0,0 +1,70 @@
+#===============================================================================
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+# sklearnex RF example for distributed systems; SPMD mode
+# run like this:
+#    mpirun -n 4 python ./random_forest_regressor_spmd.py
+
+import numpy as np
+
+import dpctl
+import dpctl.tensor as dpt
+
+from mpi4py import MPI
+from sklearnex.spmd.ensemble import RandomForestRegressor
+
+from numpy.testing import assert_allclose
+
+
+def generate_X_y(par, coef_seed, data_seed):
+    ns, nf = par['ns'], par['nf']
+
+    crng = np.random.default_rng(coef_seed)
+    coef = crng.uniform(-10, 10, size=(nf,))
+
+    drng = np.random.default_rng(data_seed)
+    data = drng.uniform(-100, 100, size=(ns, nf))
+    resp = data @ coef
+
+    return data, resp, coef
+
+
+comm = MPI.COMM_WORLD
+mpi_size = comm.Get_size()
+mpi_rank = comm.Get_rank()
+
+params_train = {'ns': 10000, 'nf': 3}
+params_test = {'ns': 100, 'nf': 3}
+
+X_train, y_train, coef_train = generate_X_y(params_train, 10, mpi_rank)
+X_test, y_test, coef_test = generate_X_y(params_test, 10, mpi_rank + 99)
+
+assert_allclose(coef_train, coef_test)
+
+q = dpctl.SyclQueue("gpu")  # GPU
+
+dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q)
+dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q)
+dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q)
+# dpt_y_test = dpt.asarray(y_test, usm_type="device", sycl_queue=q)
+
+rf = RandomForestRegressor(max_depth=2, random_state=0).fit(dpt_X_train, dpt_y_train)
+
+y_predict = rf.predict(dpt_X_test)
+
+print("Ground truth (first 5 observations on rank {}):\n{}".format(mpi_rank, y_test[:5]))
+print("Regression results (first 5 observations on rank {}):\n{}"
+      .format(mpi_rank, dpt.to_numpy(y_predict)[:5]))
@@ -293,8 +293,13 @@ ONEDAL_PY_INIT_MODULE(ensemble) {
     using task_list = types<task::classification, task::regression>;
     auto sub = m.def_submodule("decision_forest");
 
+#ifdef ONEDAL_DATA_PARALLEL_SPMD
+    ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list_spmd, task_list);
+    ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list_spmd, task_list);
+#else // ONEDAL_DATA_PARALLEL_SPMD
     ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list, task_list);
     ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list, task_list);
+#endif // ONEDAL_DATA_PARALLEL_SPMD
 
     ONEDAL_PY_INSTANTIATE(init_model, sub, task_list);
     ONEDAL_PY_INSTANTIATE(init_train_result, sub, task_list);
 
@@ -14,4 +14,4 @@
 # limitations under the License.
 #===============================================================================
 
-__all__ = ['basic_statistics', 'decomposition', 'linear_model', 'neighbors']
+__all__ = ['basic_statistics', 'decomposition', 'ensemble', 'linear_model', 'neighbors']
@@ -0,0 +1,19 @@
+#===============================================================================
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+from .forest import RandomForestClassifier, RandomForestRegressor
+
+__all__ = ['RandomForestClassifier', 'RandomForestRegressor']
@@ -0,0 +1,35 @@
+#===============================================================================
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+from abc import ABC
+
+from ...common._spmd_policy import _get_spmd_policy
+
+from onedal.ensemble import RandomForestClassifier as RandomForestClassifier_Batch
+from onedal.ensemble import RandomForestRegressor as RandomForestRegressor_Batch
+
+
+class BaseForestSPMD(ABC):
+    def _get_policy(self, queue, *data):
+        return _get_spmd_policy(queue)
+
+
+class RandomForestClassifier(BaseForestSPMD, RandomForestClassifier_Batch):
+    pass
+
+
+class RandomForestRegressor(BaseForestSPMD, RandomForestRegressor_Batch):
+    pass
@@ -401,6 +401,46 @@ def run(self):
 with open('README.md', 'r', encoding='utf8') as f:
     long_description = f.read()
 
+packages_with_tests = [
+    'daal4py',
+    'daal4py.oneapi',
+    'daal4py.sklearn',
+    'daal4py.sklearn.cluster',
+    'daal4py.sklearn.decomposition',
+    'daal4py.sklearn.ensemble',
+    'daal4py.sklearn.linear_model',
+    'daal4py.sklearn.manifold',
+    'daal4py.sklearn.metrics',
+    'daal4py.sklearn.neighbors',
+    'daal4py.sklearn.monkeypatch',
+    'daal4py.sklearn.svm',
+    'daal4py.sklearn.utils',
+    'daal4py.sklearn.model_selection',
+    'onedal',
+    'onedal.common',
+    'onedal.datatypes',
+    'onedal.decomposition',
+    'onedal.ensemble',
+    'onedal.neighbors',
+    'onedal.primitives',
+    'onedal.svm']
+
+if ONEDAL_VERSION >= 20230100:
+    packages_with_tests += [
+        'onedal.basic_statistics',
+        'onedal.linear_model']
+
+if build_distribute:
+    packages_with_tests += [
+        'onedal.spmd',
+        'onedal.spmd.decomposition',
+        'onedal.spmd.ensemble']
+    if ONEDAL_VERSION >= 20230100:
+        packages_with_tests += [
+            'onedal.spmd.basic_statistics',
+            'onedal.spmd.linear_model',
+            'onedal.spmd.neighbors']
+
 setup(
     name="daal4py",
     description="A convenient Python API to Intel(R) oneAPI Data Analytics Library",
@@ -447,40 +487,7 @@ def run(self):
         'data science',
         'data analytics'
     ],
-    packages=get_packages_with_tests([
-        'daal4py',
-        'daal4py.oneapi',
-        'daal4py.sklearn',
-        'daal4py.sklearn.cluster',
-        'daal4py.sklearn.decomposition',
-        'daal4py.sklearn.ensemble',
-        'daal4py.sklearn.linear_model',
-        'daal4py.sklearn.manifold',
-        'daal4py.sklearn.metrics',
-        'daal4py.sklearn.neighbors',
-        'daal4py.sklearn.monkeypatch',
-        'daal4py.sklearn.svm',
-        'daal4py.sklearn.utils',
-        'daal4py.sklearn.model_selection',
-        'onedal',
-        'onedal.ensemble',
-        'onedal.decomposition',
-        'onedal.svm',
-        'onedal.neighbors',
-        'onedal.primitives',
-        'onedal.datatypes',
-        'onedal.common'
-    ] + (['onedal.basic_statistics',
-          'onedal.linear_model'
-          ] if ONEDAL_VERSION >= 20230100 else []
-         ) + (
-        ['onedal.spmd',
-         'onedal.spmd.basic_statistics',
-         'onedal.spmd.decomposition',
-         'onedal.spmd.linear_model'
-         ] + (['onedal.spmd.neighbors']
-              if ONEDAL_VERSION >= 20230100 else [])
-        if build_distribute else [])),
+    packages=get_packages_with_tests(packages_with_tests),
     package_data={
         'daal4py.oneapi': [
             'liboneapi_backend.so',
 
@@ -68,6 +68,30 @@
 with open("README.md", "r", encoding="utf8") as f:
     long_description = f.read()
 
+packages_with_tests = [
+    "sklearnex",
+    'sklearnex.cluster',
+    'sklearnex.decomposition',
+    'sklearnex.ensemble',
+    'sklearnex.glob',
+    'sklearnex.linear_model',
+    'sklearnex.manifold',
+    'sklearnex.metrics',
+    'sklearnex.model_selection',
+    'sklearnex.neighbors',
+    'sklearnex.preview',
+    'sklearnex.preview.decomposition',
+    'sklearnex.preview.ensemble',
+    'sklearnex.preview.linear_model',
+    'sklearnex.svm',
+    'sklearnex.utils']
+
+if build_distribute:
+    packages_with_tests += [
+        'sklearnex.spmd',
+        'sklearnex.spmd.ensemble',
+        'sklearnex.spmd.linear_model']
+
 # sklearnex setup
 setup(name="scikit-learn-intelex",
       description="Intel(R) Extension for Scikit-learn is a "
@@ -112,22 +136,5 @@
           "data science",
           "data analytics",
       ],
-      packages=get_packages_with_tests([
-          "sklearnex",
-          'sklearnex.cluster',
-          'sklearnex.decomposition',
-          'sklearnex.ensemble',
-          'sklearnex.glob',
-          'sklearnex.linear_model',
-          'sklearnex.manifold',
-          'sklearnex.metrics',
-          'sklearnex.model_selection',
-          'sklearnex.neighbors',
-          'sklearnex.preview',
-          'sklearnex.preview.ensemble',
-          'sklearnex.preview.decomposition',
-          'sklearnex.preview.linear_model',
-          'sklearnex.svm',
-          'sklearnex.utils'
-      ] + (['sklearnex.spmd'] if build_distribute else [])),
+      packages=get_packages_with_tests(packages_with_tests),
       )