Skip to content

Commit c43c1ab

Browse files
authored
sklearn tests subset in CI (#1199)
* Subset of sklearn tests in CI * New deselected tests * Filtering of deselected tests based on sys.platform * Pinning of scipy version in CI to prevent sklearn test failures related to scipy API and impl. changes * Parameters validation from sklearn 1.1 integrated to sklearnex DBSCAN, SVM and linear models.
1 parent 7ae0171 commit c43c1ab

File tree

11 files changed

+310
-23
lines changed

11 files changed

+310
-23
lines changed

.ci/pipeline/ci.yml

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ jobs:
101101
- script: |
102102
conda update -y -q conda
103103
if [ $(echo $(PYTHON_VERSION) | grep '3.11') ]; then export DPCTL_PACKAGE=; else export DPCTL_PACKAGE=; fi
104-
conda create -q -y -n CB -c conda-forge -c intel python=$(PYTHON_VERSION) dal-devel mpich $DPCTL_PACKAGE
104+
conda create -q -y -n CB -c conda-forge -c intel python=$(PYTHON_VERSION) dal-devel mpich pyyaml $DPCTL_PACKAGE
105105
displayName: 'Conda create'
106106
- script: |
107107
. /usr/share/miniconda/etc/profile.d/conda.sh
@@ -128,16 +128,21 @@ jobs:
128128
conda activate CB
129129
sed -i -e "s/scikit-learn==1.2.0/scikit-learn==$(SKLEARN_VERSION).*/" requirements-test.txt
130130
sed -i -e "s/scikit-learn==1.0.2/scikit-learn==$(SKLEARN_VERSION).*/" requirements-test.txt
131-
pip install --upgrade -r requirements-test.txt
132-
pip install -r requirements-test-optional.txt
131+
pip install --upgrade -r requirements-test.txt -r requirements-test-optional.txt
132+
pip install $(python .ci/scripts/get_compatible_scipy_version.py)
133133
pip list
134134
displayName: 'Install testing requirements'
135135
- script: |
136136
. /usr/share/miniconda/etc/profile.d/conda.sh
137137
conda activate CB
138138
cd ..
139139
./s/conda-recipe/run_test.sh
140-
displayName: 'Testing'
140+
displayName: 'Sklearnex testing'
141+
- script: |
142+
. /usr/share/miniconda/etc/profile.d/conda.sh
143+
conda activate CB
144+
bash .ci/scripts/run_sklearn_tests.sh
145+
displayName: 'Sklearn testing'
141146
- script: |
142147
. /usr/share/miniconda/etc/profile.d/conda.sh
143148
conda activate CB
@@ -170,7 +175,7 @@ jobs:
170175
sudo chown -R $USER $CONDA
171176
conda config --set always_yes yes --set changeps1 no
172177
conda update -q conda
173-
conda create -n CB -c conda-forge python=$(PYTHON_VERSION) dal dal-include mpich clang-format
178+
conda create -n CB -c conda-forge python=$(PYTHON_VERSION) dal dal-include mpich clang-format pyyaml
174179
displayName: Create Anaconda environment
175180
- script: |
176181
source activate CB
@@ -192,15 +197,19 @@ jobs:
192197
source activate CB
193198
sed -i.bak -e "s/scikit-learn==1.2.0/scikit-learn==$(SKLEARN_VERSION).*/" requirements-test.txt
194199
sed -i.bak -e "s/scikit-learn==1.0.2/scikit-learn==$(SKLEARN_VERSION).*/" requirements-test.txt
195-
pip install --upgrade -r requirements-test.txt
196-
pip install -r requirements-test-optional.txt
200+
pip install --upgrade -r requirements-test.txt -r requirements-test-optional.txt
201+
pip install $(python .ci/scripts/get_compatible_scipy_version.py)
197202
pip list
198203
displayName: 'Install testing requirements'
199204
- script: |
200205
source activate CB
201206
cd ..
202207
./s/conda-recipe/run_test.sh
203-
displayName: Testing
208+
displayName: 'Sklearnex testing'
209+
- script: |
210+
source activate CB
211+
bash .ci/scripts/run_sklearn_tests.sh
212+
displayName: 'Sklearn testing'
204213
- script: |
205214
source activate CB
206215
cd ..
@@ -229,7 +238,7 @@ jobs:
229238
steps:
230239
- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
231240
displayName: Add conda to PATH
232-
- script: conda create -q -y -n CB -c conda-forge -c intel python=$(PYTHON_VERSION) dal-devel impi-devel clang-format
241+
- script: conda create -q -y -n CB -c conda-forge -c intel python=$(PYTHON_VERSION) dal-devel impi-devel clang-format pyyaml
233242
displayName: 'Create Anaconda environment'
234243
- script: |
235244
call activate CB
@@ -257,15 +266,21 @@ jobs:
257266
call activate CB
258267
sed -i -e "s/scikit-learn==1.2.0/scikit-learn==$(SKLEARN_VERSION).*/" requirements-test.txt
259268
sed -i -e "s/scikit-learn==1.0.2/scikit-learn==$(SKLEARN_VERSION).*/" requirements-test.txt
260-
pip install --upgrade -r requirements-test.txt
261-
pip install -r requirements-test-optional.txt
269+
pip install --upgrade -r requirements-test.txt -r requirements-test-optional.txt
270+
cd ..
271+
for /f "delims=" %%c in ('python s\.ci\scripts\get_compatible_scipy_version.py') do set SCIPY_VERSION=%%c
272+
pip install %SCIPY_VERSION%
262273
pip list
263274
displayName: 'Install testing requirements'
264275
- script: |
265276
call activate CB
266277
cd ..
267278
call s\conda-recipe\run_test.bat s
268-
displayName: daal4py/sklearnex testing
279+
displayName: 'Sklearnex testing'
280+
- script: |
281+
call activate CB
282+
bash .ci/scripts/run_sklearn_tests.sh
283+
displayName: 'Sklearn testing'
269284
- script: |
270285
call activate CB
271286
cd ..
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#! /usr/bin/env python
2+
#===============================================================================
3+
# Copyright 2023 Intel Corporation
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#===============================================================================
17+
from daal4py.sklearn._utils import sklearn_check_version
18+
19+
20+
if sklearn_check_version('1.2'):
21+
print('scipy==1.9')
22+
elif sklearn_check_version('1.1'):
23+
print('scipy==1.8')
24+
elif sklearn_check_version('1.0'):
25+
print('scipy==1.7')
26+
elif sklearn_check_version('0.24'):
27+
print('scipy==1.6')
28+
else:
29+
print('scipy')

.ci/scripts/run_sklearn_tests.sh

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
#===============================================================================
3+
# Copyright 2023 Intel Corporation
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#===============================================================================
17+
18+
ci_dir=$( dirname $( dirname "${BASH_SOURCE[0]}" ) )
19+
cd $ci_dir
20+
21+
export SELECTED_TESTS=$(python scripts/select_sklearn_tests.py)
22+
export DESELECTED_TESTS=$(python ../.circleci/deselect_tests.py ../deselected_tests.yaml)
23+
cd $(python -c "import sklearn, os; print(os.path.dirname(sklearn.__file__))")
24+
export SKLEARNEX_VERBOSE=DEBUG
25+
python -m sklearnex -m pytest --verbose --pyargs --durations=100 --durations-min=0.01 $DESELECTED_TESTS $SELECTED_TESTS
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#! /usr/bin/env python
2+
#===============================================================================
3+
# Copyright 2023 Intel Corporation
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#===============================================================================
17+
import argparse
18+
import sys
19+
20+
21+
def parse_tests_tree(entry, prefix=''):
22+
global tests_list
23+
24+
if isinstance(entry, dict):
25+
for key, value in entry.items():
26+
parse_tests_tree(value, f'{prefix}/{key}' if prefix != '' else key)
27+
elif isinstance(entry, list):
28+
for value in entry:
29+
parse_tests_tree(value, prefix)
30+
elif isinstance(entry, str):
31+
tests_list.append(f'{prefix}/{entry}' if prefix != '' else entry)
32+
else:
33+
raise ValueError(f'Unknown type {type(entry)} in tests map')
34+
35+
36+
tests_map = {
37+
'cluster/tests': [
38+
'test_dbscan.py',
39+
'test_k_means.py'
40+
],
41+
'decomposition/tests': 'test_pca.py',
42+
'ensemble/tests': 'test_forest.py',
43+
'linear_model/tests': [
44+
'test_base.py',
45+
'test_coordinate_descent.py',
46+
'test_ridge.py'
47+
],
48+
'manifold/tests': 'test_t_sne.py',
49+
'model_selection/tests': [
50+
'test_split.py',
51+
'test_validation.py'
52+
],
53+
'neighbors/tests': [
54+
'test_lof.py',
55+
'test_neighbors.py',
56+
'test_neighbors_pipeline.py'
57+
],
58+
'svm/tests': [
59+
'test_sparse.py',
60+
'test_svm.py'
61+
]
62+
}
63+
64+
65+
if __name__ == '__main__':
66+
arg_parser = argparse.ArgumentParser()
67+
arg_parser.add_argument('--base-dir', type=str, default='')
68+
args = arg_parser.parse_args()
69+
70+
tests_list = []
71+
parse_tests_tree(tests_map, args.base_dir)
72+
result = ''
73+
for test in tests_list:
74+
75+
result += test + ' '
76+
# correct paths for non-Unix envs
77+
if sys.platform in ['win32', 'cygwin']:
78+
result = result.replace('/', '\\')
79+
print(result[:-1])

.circleci/deselect_tests.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
# coding: utf-8
1919
import argparse
20+
import sys
2021
import os.path
2122
from yaml import FullLoader, load as yaml_load
2223
try:
@@ -48,15 +49,19 @@ def evaluate_cond(cond, v):
4849
return False
4950

5051

51-
def filter_by_version(entry, sk_ver):
52+
def filter_by_version_and_platform(entry, sk_ver):
5253
if not entry:
5354
return None
5455
t = entry.split(' ')
5556
if len(t) == 1:
5657
return entry
57-
if len(t) != 2:
58+
elif len(t) == 2:
59+
t.append(None)
60+
if len(t) != 3:
61+
return None
62+
test_name, cond, platform = t
63+
if platform is not None and platform != sys.platform:
5864
return None
59-
test_name, cond = t
6065
conds = cond.split(',')
6166
if all([evaluate_cond(cond, sk_ver) for cond in conds]):
6267
return test_name
@@ -79,19 +84,19 @@ def create_pytest_switches(filename, absolute, reduced, public, gpu, base_dir=No
7984
base_dir += '/'
8085

8186
filtered_deselection = [
82-
filter_by_version(test_name, sklearn_version)
87+
filter_by_version_and_platform(test_name, sklearn_version)
8388
for test_name in dt.get('deselected_tests', [])]
8489
if reduced:
8590
filtered_deselection.extend(
86-
[filter_by_version(test_name, sklearn_version)
91+
[filter_by_version_and_platform(test_name, sklearn_version)
8792
for test_name in dt.get('reduced_tests', [])])
8893
if public:
8994
filtered_deselection.extend(
90-
[filter_by_version(test_name, sklearn_version)
95+
[filter_by_version_and_platform(test_name, sklearn_version)
9196
for test_name in dt.get('public', [])])
9297
if gpu:
9398
filtered_deselection.extend(
94-
[filter_by_version(test_name, sklearn_version)
99+
[filter_by_version_and_platform(test_name, sklearn_version)
95100
for test_name in dt.get('gpu', [])])
96101
pytest_switches = []
97102
for test_name in filtered_deselection:

daal4py/sklearn/cluster/_dbscan.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import numpy as np
1818
from scipy import sparse as sp
19+
import numbers
1920

2021
from sklearn.utils import check_array
2122
from sklearn.utils.validation import _check_sample_weight
@@ -29,6 +30,9 @@
2930
from .._device_offload import support_usm_ndarray
3031
from .._utils import sklearn_check_version
3132

33+
if sklearn_check_version('1.1') and not sklearn_check_version('1.2'):
34+
from sklearn.utils import check_scalar
35+
3236

3337
def _daal_dbscan(X, eps=0.5, min_samples=5, sample_weight=None):
3438
ww = make2d(sample_weight) if sample_weight is not None else None
@@ -236,6 +240,38 @@ def fit(self, X, y=None, sample_weight=None):
236240
"""
237241
if sklearn_check_version("1.2"):
238242
self._validate_params()
243+
elif sklearn_check_version("1.1"):
244+
check_scalar(
245+
self.eps,
246+
"eps",
247+
target_type=numbers.Real,
248+
min_val=0.0,
249+
include_boundaries="neither",
250+
)
251+
check_scalar(
252+
self.min_samples,
253+
"min_samples",
254+
target_type=numbers.Integral,
255+
min_val=1,
256+
include_boundaries="left",
257+
)
258+
check_scalar(
259+
self.leaf_size,
260+
"leaf_size",
261+
target_type=numbers.Integral,
262+
min_val=1,
263+
include_boundaries="left",
264+
)
265+
if self.p is not None:
266+
check_scalar(
267+
self.p,
268+
"p",
269+
target_type=numbers.Real,
270+
min_val=0.0,
271+
include_boundaries="left",
272+
)
273+
if self.n_jobs is not None:
274+
check_scalar(self.n_jobs, "n_jobs", target_type=numbers.Integral)
239275
else:
240276
if self.eps <= 0.0:
241277
raise ValueError(f"eps == {self.eps}, must be > 0.0.")

daal4py/sklearn/linear_model/_coordinate_descent.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
make2d, getFPType, get_patch_message, sklearn_check_version, PatchingConditionsChain)
2626
if sklearn_check_version('1.0') and not sklearn_check_version('1.2'):
2727
from sklearn.linear_model._base import _deprecate_normalize
28+
if sklearn_check_version('1.1') and not sklearn_check_version('1.2'):
29+
from sklearn.utils import check_scalar
2830

2931
import logging
3032

@@ -394,6 +396,37 @@ def _fit(self, X, y, sample_weight=None, check_input=True):
394396
self._check_feature_names(X, reset=True)
395397
if sklearn_check_version("1.2"):
396398
self._validate_params()
399+
elif sklearn_check_version('1.1'):
400+
check_scalar(
401+
self.alpha,
402+
"alpha",
403+
target_type=numbers.Real,
404+
min_val=0.0,
405+
)
406+
if self.alpha == 0:
407+
warnings.warn(
408+
"With alpha=0, this algorithm does not converge "
409+
"well. You are advised to use the LinearRegression "
410+
"estimator",
411+
stacklevel=2,
412+
)
413+
if isinstance(self.precompute, str):
414+
raise ValueError(
415+
"precompute should be one of True, False or array-like. Got %r"
416+
% self.precompute
417+
)
418+
check_scalar(
419+
self.l1_ratio,
420+
"l1_ratio",
421+
target_type=numbers.Real,
422+
min_val=0.0,
423+
max_val=1.0,
424+
)
425+
if self.max_iter is not None:
426+
check_scalar(
427+
self.max_iter, "max_iter", target_type=numbers.Integral, min_val=1
428+
)
429+
check_scalar(self.tol, "tol", target_type=numbers.Real, min_val=0.0)
397430
# check X and y
398431
if check_input:
399432
X, y = check_X_y(

0 commit comments

Comments
 (0)