Skip to content

Commit 130d068

Browse files
Add SPMD interfaces for LogisticRegression (#1673)
* Initial commit * Add requirements for logistic_regression_spmd, update version requirements * Substitute BaseLogisticRegressionSPMD with BaseEstimatorSPMD * Remove redundant import
1 parent 4e1a23e commit 130d068

File tree

7 files changed

+162
-2
lines changed

7 files changed

+162
-2
lines changed
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# ==============================================================================
2+
# Copyright 2024 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# ==============================================================================
16+
17+
from warnings import warn
18+
19+
import dpctl
20+
import dpctl.tensor as dpt
21+
import numpy as np
22+
from mpi4py import MPI
23+
from scipy.special import expit
24+
from sklearn.datasets import make_classification
25+
from sklearn.metrics import accuracy_score
26+
from sklearn.model_selection import train_test_split
27+
28+
from sklearnex.spmd.linear_model import LogisticRegression
29+
30+
31+
def generate_X_y(par, seed):
32+
np.random.seed()
33+
ns, nf = par["ns"], par["nf"]
34+
35+
assert nf > 2
36+
# 2 last features will be redundant, weights are same for all ranks
37+
np.random.seed(42)
38+
intercept = np.random.normal(0, 5)
39+
weights = np.hstack([np.random.normal(0, 4, nf - 2), np.zeros(2)])
40+
41+
np.random.seed(seed)
42+
X = np.random.normal(0, 3, (ns, nf))
43+
noise = np.random.normal(0, 4, ns)
44+
y = expit(X @ weights + noise + intercept) >= 0.5
45+
y = y.astype(np.int32)
46+
return X, y
47+
48+
49+
comm = MPI.COMM_WORLD
50+
rank = comm.Get_rank()
51+
size = comm.Get_size()
52+
53+
if dpctl.has_gpu_devices:
54+
q = dpctl.SyclQueue("gpu")
55+
else:
56+
raise RuntimeError(
57+
"GPU devices unavailable. Currently, "
58+
"SPMD execution mode is implemented only for this device type."
59+
)
60+
61+
params = {"ns": 100000, "nf": 8}
62+
63+
X, y = generate_X_y(params, rank)
64+
X_train, X_test, y_train, y_test = train_test_split(
65+
X, y, test_size=0.2, random_state=rank
66+
)
67+
68+
dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q)
69+
dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q)
70+
dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q)
71+
dpt_y_test = dpt.asarray(y_test, usm_type="device", sycl_queue=q)
72+
73+
model_spmd = LogisticRegression()
74+
model_spmd.fit(dpt_X_train, dpt_y_train)
75+
76+
y_predict = model_spmd.predict(dpt_X_test)
77+
78+
print("Distributed LogisticRegression results:")
79+
print("Coeficients on rank {}:\n{}:".format(rank, model_spmd.coef_))
80+
print("Intercept on rank {}:\n{}:".format(rank, model_spmd.intercept_))
81+
print("Ground truth (first 5 observations on rank {}):\n{}".format(rank, y_test[:5]))
82+
print(
83+
"Classification results (first 5 observations on rank {}):\n{}".format(
84+
rank, dpt.to_numpy(y_predict)[:5]
85+
)
86+
)
87+
print(
88+
"Accuracy for entire rank {} (2 classes): {}\n".format(
89+
rank, accuracy_score(y_test, dpt.to_numpy(y_predict))
90+
)
91+
)

onedal/linear_model/logistic_regression.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,13 @@ ONEDAL_PY_INIT_MODULE(logistic_regression) {
241241
auto sub = m.def_submodule("logistic_regression");
242242

243243

244+
#if defined(ONEDAL_DATA_PARALLEL_SPMD) && defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240100
245+
ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list_spmd, task_list);
246+
ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list_spmd, task_list);
247+
#else
244248
ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list, task_list);
245249
ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list, task_list);
250+
#endif // defined(ONEDAL_DATA_PARALLEL_SPMD) && defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240100
246251

247252
ONEDAL_PY_INSTANTIATE(init_model, sub, task_list);
248253
ONEDAL_PY_INSTANTIATE(init_train_result, sub, task_list);

onedal/spmd/linear_model/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@
1515
# ==============================================================================
1616

1717
from .linear_model import LinearRegression
18+
from .logistic_regression import LogisticRegression
1819

19-
__all__ = ["LinearRegression"]
20+
__all__ = ["LinearRegression", "LogisticRegression"]
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# ==============================================================================
2+
# Copyright 2024 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# ==============================================================================
16+
17+
from onedal.linear_model import LogisticRegression as LogisticRegression_Batch
18+
19+
from ..._device_offload import support_usm_ndarray
20+
from .._common import BaseEstimatorSPMD
21+
22+
23+
class LogisticRegression(BaseEstimatorSPMD, LogisticRegression_Batch):
24+
@support_usm_ndarray()
25+
def fit(self, X, y, queue=None):
26+
return super().fit(X, y, queue)
27+
28+
@support_usm_ndarray()
29+
def predict(self, X, queue=None):
30+
return super().predict(X, queue)
31+
32+
@support_usm_ndarray()
33+
def predict_proba(self, X, queue=None):
34+
return super().predict_proba(X, queue)
35+
36+
@support_usm_ndarray()
37+
def predict_log_proba(self, X, queue=None):
38+
return super().predict_log_proba(X, queue)

sklearnex/spmd/linear_model/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@
1515
# ==============================================================================
1616

1717
from .linear_model import LinearRegression
18+
from .logistic_regression import LogisticRegression
1819

19-
__all__ = ["LinearRegression"]
20+
__all__ = ["LinearRegression", "LogisticRegression"]
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# ==============================================================================
2+
# Copyright 2024 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# ==============================================================================
16+
17+
from onedal.spmd.linear_model import LogisticRegression
18+
19+
# TODO:
20+
# Currently it uses `onedal` module interface.
21+
# Add sklearnex dispatching.

tests/run_examples.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ def check_library(rule):
144144
req_version["knn_bf_classification_spmd.py"] = (2023, "P", 100)
145145
req_version["knn_bf_regression_spmd.py"] = (2023, "P", 100)
146146
req_version["linear_regression_spmd.py"] = (2023, "P", 100)
147+
req_version["logistic_regression_spmd.py"] = (2024, "P", 100)
147148

148149
req_device = defaultdict(lambda: [])
149150
req_device["basic_statistics_spmd.py"] = ["gpu"]
@@ -153,6 +154,7 @@ def check_library(rule):
153154
req_device["knn_bf_classification_spmd.py"] = ["gpu"]
154155
req_device["knn_bf_regression_spmd.py"] = ["gpu"]
155156
req_device["linear_regression_spmd.py"] = ["gpu"]
157+
req_device["logistic_regression_spmd.py"] = ["gpu"]
156158
req_device["pca_spmd.py"] = ["gpu"]
157159
req_device["random_forest_classifier_dpctl.py"] = ["gpu"]
158160
req_device["random_forest_classifier_spmd.py"] = ["gpu"]
@@ -169,6 +171,7 @@ def check_library(rule):
169171
req_library["knn_bf_classification_spmd.py"] = ["dpctl", "mpi4py"]
170172
req_library["knn_bf_regression_spmd.py"] = ["dpctl", "mpi4py"]
171173
req_library["linear_regression_spmd.py"] = ["dpctl", "mpi4py"]
174+
req_library["logistic_regression_spmd.py"] = ["dpctl", "mpi4py"]
172175
req_library["pca_spmd.py"] = ["dpctl", "mpi4py"]
173176
req_library["random_forest_classifier_dpctl.py"] = ["dpctl"]
174177
req_library["random_forest_classifier_spmd.py"] = ["dpctl", "mpi4py"]

0 commit comments

Comments
 (0)