Skip to content

Commit 45fc83d

Browse files
authored
ENH: SPMD interface for IncrementalLinearRegression (#1972)
1 parent a8b7373 commit 45fc83d

File tree

9 files changed

+504
-39
lines changed

9 files changed

+504
-39
lines changed

onedal/linear_model/incremental_linear_model.py

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,13 @@ class IncrementalLinearRegression(BaseLinearRegression):
4343
"""
4444

4545
def __init__(self, fit_intercept=True, copy_X=False, algorithm="norm_eq"):
46-
module = self._get_backend("linear_model", "regression")
4746
super().__init__(fit_intercept=fit_intercept, copy_X=copy_X, algorithm=algorithm)
48-
self._partial_result = module.partial_train_result()
47+
self._reset()
4948

5049
def _reset(self):
51-
module = self._get_backend("linear_model", "regression")
52-
self._partial_result = module.partial_train_result()
50+
self._partial_result = self._get_backend(
51+
"linear_model", "regression", "partial_train_result"
52+
)
5353

5454
def partial_fit(self, X, y, queue=None):
5555
"""
@@ -74,26 +74,27 @@ def partial_fit(self, X, y, queue=None):
7474
"""
7575
module = self._get_backend("linear_model", "regression")
7676

77-
if not hasattr(self, "_policy"):
78-
self._policy = self._get_policy(queue, X)
77+
self._queue = queue
78+
policy = self._get_policy(queue, X)
7979

80-
X, y = _convert_to_supported(self._policy, X, y)
80+
X, y = _convert_to_supported(policy, X, y)
8181

8282
if not hasattr(self, "_dtype"):
8383
self._dtype = get_dtype(X)
8484
self._params = self._get_onedal_params(self._dtype)
8585

86-
y = np.asarray(y).astype(dtype=self._dtype)
87-
self._y_ndim_1 = y.ndim == 1
86+
y = np.asarray(y, dtype=self._dtype)
8887

89-
X, y = _check_X_y(X, y, dtype=[np.float64, np.float32], accept_2d_y=True)
88+
X, y = _check_X_y(
89+
X, y, dtype=[np.float64, np.float32], accept_2d_y=True, force_all_finite=False
90+
)
9091

9192
self.n_features_in_ = _num_features(X, fallback_1d=True)
9293
X_table, y_table = to_table(X, y)
9394
hparams = get_hyperparameters("linear_regression", "train")
9495
if hparams is not None and not hparams.is_default:
9596
self._partial_result = module.partial_train(
96-
self._policy,
97+
policy,
9798
self._params,
9899
hparams.backend,
99100
self._partial_result,
@@ -102,7 +103,7 @@ def partial_fit(self, X, y, queue=None):
102103
)
103104
else:
104105
self._partial_result = module.partial_train(
105-
self._policy, self._params, self._partial_result, X_table, y_table
106+
policy, self._params, self._partial_result, X_table, y_table
106107
)
107108

108109
def finalize_fit(self, queue=None):
@@ -113,36 +114,36 @@ def finalize_fit(self, queue=None):
113114
Parameters
114115
----------
115116
queue : dpctl.SyclQueue
116-
Not used here, added for API conformance
117+
If not None, use this queue for computations.
117118
118119
Returns
119120
-------
120121
self : object
121122
Returns the instance itself.
122123
"""
124+
125+
if queue is not None:
126+
policy = self._get_policy(queue)
127+
else:
128+
policy = self._get_policy(self._queue)
129+
123130
module = self._get_backend("linear_model", "regression")
124131
hparams = get_hyperparameters("linear_regression", "train")
125132
if hparams is not None and not hparams.is_default:
126133
result = module.finalize_train(
127-
self._policy, self._params, hparams.backend, self._partial_result
134+
policy, self._params, hparams.backend, self._partial_result
128135
)
129136
else:
130-
result = module.finalize_train(
131-
self._policy, self._params, self._partial_result
132-
)
137+
result = module.finalize_train(policy, self._params, self._partial_result)
133138

134139
self._onedal_model = result.model
135140

136141
packed_coefficients = from_table(result.model.packed_coefficients)
137142
self.coef_, self.intercept_ = (
138-
packed_coefficients[:, 1:],
139-
packed_coefficients[:, 0],
143+
packed_coefficients[:, 1:].squeeze(),
144+
packed_coefficients[:, 0].squeeze(),
140145
)
141146

142-
if self.coef_.shape[0] == 1 and self._y_ndim_1:
143-
self.coef_ = self.coef_.ravel()
144-
self.intercept_ = self.intercept_[0]
145-
146147
return self
147148

148149

@@ -203,8 +204,7 @@ def partial_fit(self, X, y, queue=None):
203204
"""
204205
module = self._get_backend("linear_model", "regression")
205206

206-
if not hasattr(self, "_queue"):
207-
self._queue = queue
207+
self._queue = queue
208208
policy = self._get_policy(queue, X)
209209

210210
X, y = _convert_to_supported(policy, X, y)
@@ -213,9 +213,11 @@ def partial_fit(self, X, y, queue=None):
213213
self._dtype = get_dtype(X)
214214
self._params = self._get_onedal_params(self._dtype)
215215

216-
y = np.asarray(y).astype(dtype=self._dtype)
216+
y = np.asarray(y, dtype=self._dtype)
217217

218-
X, y = _check_X_y(X, y, dtype=[np.float64, np.float32], accept_2d_y=True)
218+
X, y = _check_X_y(
219+
X, y, dtype=[np.float64, np.float32], accept_2d_y=True, force_all_finite=False
220+
)
219221

220222
self.n_features_in_ = _num_features(X, fallback_1d=True)
221223
X_table, y_table = to_table(X, y)

onedal/linear_model/linear_model.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ ONEDAL_PY_INIT_MODULE(linear_model) {
304304
#ifdef ONEDAL_DATA_PARALLEL_SPMD
305305
ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_spmd, task_list);
306306
ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_spmd, task_list);
307+
ONEDAL_PY_INSTANTIATE(init_finalize_train_ops, sub, policy_spmd, task_list);
307308
#else // ONEDAL_DATA_PARALLEL_SPMD
308309
ONEDAL_PY_INSTANTIATE(init_train_ops, sub, policy_list, task_list);
309310
ONEDAL_PY_INSTANTIATE(init_infer_ops, sub, policy_list, task_list);

onedal/spmd/linear_model/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
# limitations under the License.
1515
# ==============================================================================
1616

17+
from .incremental_linear_model import IncrementalLinearRegression
1718
from .linear_model import LinearRegression
1819
from .logistic_regression import LogisticRegression
1920

20-
__all__ = ["LinearRegression", "LogisticRegression"]
21+
__all__ = ["IncrementalLinearRegression", "LinearRegression", "LogisticRegression"]
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# ==============================================================================
2+
# Copyright 2024 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# ==============================================================================
16+
17+
import numpy as np
18+
19+
from daal4py.sklearn._utils import get_dtype
20+
21+
from ...common.hyperparameters import get_hyperparameters
22+
from ...datatypes import _convert_to_supported, to_table
23+
from ...linear_model import (
24+
IncrementalLinearRegression as base_IncrementalLinearRegression,
25+
)
26+
from ...utils import _check_X_y, _num_features
27+
from .._base import BaseEstimatorSPMD
28+
29+
30+
class IncrementalLinearRegression(BaseEstimatorSPMD, base_IncrementalLinearRegression):
31+
"""
32+
Distributed incremental Linear Regression oneDAL implementation.
33+
34+
API is the same as for `onedal.linear_model.IncrementalLinearRegression`.
35+
"""
36+
37+
def _reset(self):
38+
self._partial_result = super(base_IncrementalLinearRegression, self)._get_backend(
39+
"linear_model", "regression", "partial_train_result"
40+
)
41+
42+
def partial_fit(self, X, y, queue=None):
43+
"""
44+
Computes partial data for linear regression
45+
from data batch X and saves it to `_partial_result`.
46+
Parameters
47+
----------
48+
X : array-like of shape (n_samples, n_features)
49+
Training data batch, where `n_samples` is the number of samples
50+
in the batch, and `n_features` is the number of features.
51+
52+
y: array-like of shape (n_samples,) or (n_samples, n_targets) in
53+
case of multiple targets
54+
Responses for training data.
55+
56+
queue : dpctl.SyclQueue
57+
If not None, use this queue for computations.
58+
Returns
59+
-------
60+
self : object
61+
Returns the instance itself.
62+
"""
63+
module = super(base_IncrementalLinearRegression, self)._get_backend(
64+
"linear_model", "regression"
65+
)
66+
67+
self._queue = queue
68+
policy = super(base_IncrementalLinearRegression, self)._get_policy(queue, X)
69+
70+
X, y = _convert_to_supported(policy, X, y)
71+
72+
if not hasattr(self, "_dtype"):
73+
self._dtype = get_dtype(X)
74+
self._params = self._get_onedal_params(self._dtype)
75+
76+
y = np.asarray(y, dtype=self._dtype)
77+
78+
X, y = _check_X_y(
79+
X, y, dtype=[np.float64, np.float32], accept_2d_y=True, force_all_finite=False
80+
)
81+
82+
self.n_features_in_ = _num_features(X, fallback_1d=True)
83+
X_table, y_table = to_table(X, y)
84+
hparams = get_hyperparameters("linear_regression", "train")
85+
if hparams is not None and not hparams.is_default:
86+
self._partial_result = module.partial_train(
87+
policy,
88+
self._params,
89+
hparams.backend,
90+
self._partial_result,
91+
X_table,
92+
y_table,
93+
)
94+
else:
95+
self._partial_result = module.partial_train(
96+
policy, self._params, self._partial_result, X_table, y_table
97+
)

sklearnex/linear_model/incremental_linear.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def _onedal_predict(self, X, queue=None):
147147
assert hasattr(self, "_onedal_estimator")
148148
if self._need_to_finalize:
149149
self._onedal_finalize_fit()
150-
return self._onedal_estimator.predict(X, queue)
150+
return self._onedal_estimator.predict(X, queue=queue)
151151

152152
def _onedal_score(self, X, y, sample_weight=None, queue=None):
153153
return r2_score(
@@ -194,17 +194,17 @@ def _onedal_partial_fit(self, X, y, check_input=True, queue=None):
194194
onedal_params = {"fit_intercept": self.fit_intercept, "copy_X": self.copy_X}
195195
if not hasattr(self, "_onedal_estimator"):
196196
self._onedal_estimator = self._onedal_incremental_linear(**onedal_params)
197-
self._onedal_estimator.partial_fit(X, y, queue)
197+
self._onedal_estimator.partial_fit(X, y, queue=queue)
198198
self._need_to_finalize = True
199199

200-
def _onedal_finalize_fit(self):
200+
def _onedal_finalize_fit(self, queue=None):
201201
assert hasattr(self, "_onedal_estimator")
202202
is_underdetermined = self.n_samples_seen_ < self.n_features_in_ + int(
203203
self.fit_intercept
204204
)
205205
if is_underdetermined:
206206
raise ValueError("Not enough samples to finalize")
207-
self._onedal_estimator.finalize_fit()
207+
self._onedal_estimator.finalize_fit(queue=queue)
208208
self._need_to_finalize = False
209209

210210
def _onedal_fit(self, X, y, queue=None):
@@ -263,8 +263,7 @@ def _onedal_fit(self, X, y, queue=None):
263263
"Only one sample available. You may want to reshape your data array"
264264
)
265265

266-
self._onedal_finalize_fit()
267-
266+
self._onedal_finalize_fit(queue=queue)
268267
return self
269268

270269
def get_intercept_(self):

sklearnex/linear_model/tests/test_incremental_linear.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, fit_intercept, macro_block
3434
X = np.array([[1], [2]])
3535
X = X.astype(dtype=dtype)
3636
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
37-
y = np.array([1, 2])
37+
y = np.array([[1], [2]])
3838
y = y.astype(dtype=dtype)
3939
y_df = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
4040

@@ -185,16 +185,16 @@ def test_sklearnex_partial_fit_on_random_data(
185185
inclin.partial_fit(X_split_df, y_split_df)
186186

187187
tol = 1e-4 if inclin.coef_.dtype == np.float32 else 1e-7
188-
assert_allclose(coef, inclin.coef_.T, atol=tol)
188+
assert_allclose(coef.T.squeeze(), inclin.coef_, atol=tol)
189189

190190
if fit_intercept:
191191
assert_allclose(intercept, inclin.intercept_, atol=tol)
192192

193193
X_test = gen.random(size=(num_samples, num_features), dtype=dtype)
194194
if fit_intercept:
195-
expected_y_pred = X_test @ coef + intercept[np.newaxis, :]
195+
expected_y_pred = (X_test @ coef + intercept[np.newaxis, :]).squeeze()
196196
else:
197-
expected_y_pred = X_test @ coef
197+
expected_y_pred = (X_test @ coef).squeeze()
198198

199199
X_test_df = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe)
200200

sklearnex/spmd/linear_model/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
# limitations under the License.
1515
# ==============================================================================
1616

17+
from .incremental_linear_model import IncrementalLinearRegression
1718
from .linear_model import LinearRegression
1819
from .logistic_regression import LogisticRegression
1920

20-
__all__ = ["LinearRegression", "LogisticRegression"]
21+
__all__ = ["IncrementalLinearRegression", "LinearRegression", "LogisticRegression"]
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# ==============================================================================
2+
# Copyright 2024 Intel Corporation
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# ==============================================================================
16+
17+
18+
from onedal.spmd.linear_model import (
19+
IncrementalLinearRegression as onedalSPMD_IncrementalLinearRegression,
20+
)
21+
22+
from ...linear_model import (
23+
IncrementalLinearRegression as base_IncrementalLinearRegression,
24+
)
25+
26+
27+
class IncrementalLinearRegression(base_IncrementalLinearRegression):
28+
"""
29+
Distributed incremental estimator for linear regression.
30+
Allows for distributed training of linear regression if data is split into batches.
31+
32+
API is the same as for `sklearnex.linear_model.IncrementalLinearRegression`.
33+
"""
34+
35+
_onedal_incremental_linear = staticmethod(onedalSPMD_IncrementalLinearRegression)

0 commit comments

Comments
 (0)