Skip to content

Commit 9b64bcb

Browse files
authored
sycl .py files are no linting with xgboost CI (#21)
Co-authored-by: Dmitry Razdoburdin <>
1 parent 89e14b3 commit 9b64bcb

File tree

5 files changed

+117
-88
lines changed

5 files changed

+117
-88
lines changed

tests/ci_build/lint_python.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ class LintersPaths:
3030
"tests/python-gpu/test_gpu_pickling.py",
3131
"tests/python-gpu/test_gpu_eval_metrics.py",
3232
"tests/python-gpu/test_gpu_with_sklearn.py",
33+
"tests/python-sycl/test_sycl_prediction.py",
34+
"tests/python-sycl/test_sycl_training_continuation.py",
35+
"tests/python-sycl/test_sycl_updaters.py",
36+
"tests/python-sycl/test_sycl_with_sklearn.py",
3337
"tests/test_distributed/test_with_spark/",
3438
"tests/test_distributed/test_gpu_with_spark/",
3539
# demo

tests/python-sycl/test_sycl_prediction.py

Lines changed: 53 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@
1010

1111
rng = np.random.RandomState(1994)
1212

13-
shap_parameter_strategy = strategies.fixed_dictionaries({
14-
'max_depth': strategies.integers(1, 11),
15-
'max_leaves': strategies.integers(0, 256),
16-
'num_parallel_tree': strategies.sampled_from([1, 10]),
17-
}).filter(lambda x: x['max_depth'] > 0 or x['max_leaves'] > 0)
13+
shap_parameter_strategy = strategies.fixed_dictionaries(
14+
{
15+
"max_depth": strategies.integers(1, 11),
16+
"max_leaves": strategies.integers(0, 256),
17+
"num_parallel_tree": strategies.sampled_from([1, 10]),
18+
}
19+
).filter(lambda x: x["max_depth"] > 0 or x["max_leaves"] > 0)
1820

1921

2022
class TestSYCLPredict(unittest.TestCase):
@@ -25,25 +27,32 @@ def test_predict(self):
2527
test_num_cols = [10, 50, 500]
2628
for num_rows in test_num_rows:
2729
for num_cols in test_num_cols:
28-
dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols),
29-
label=[0, 1] * int(num_rows / 2))
30-
dval = xgb.DMatrix(np.random.randn(num_rows, num_cols),
31-
label=[0, 1] * int(num_rows / 2))
32-
dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols),
33-
label=[0, 1] * int(num_rows / 2))
34-
watchlist = [(dtrain, 'train'), (dval, 'validation')]
30+
dtrain = xgb.DMatrix(
31+
np.random.randn(num_rows, num_cols),
32+
label=[0, 1] * int(num_rows / 2),
33+
)
34+
dval = xgb.DMatrix(
35+
np.random.randn(num_rows, num_cols),
36+
label=[0, 1] * int(num_rows / 2),
37+
)
38+
dtest = xgb.DMatrix(
39+
np.random.randn(num_rows, num_cols),
40+
label=[0, 1] * int(num_rows / 2),
41+
)
42+
watchlist = [(dtrain, "train"), (dval, "validation")]
3543
res = {}
3644
param = {
3745
"objective": "binary:logistic",
38-
'eval_metric': 'logloss',
39-
'tree_method': 'hist',
40-
'device': 'cpu',
41-
'max_depth': 1,
42-
'verbosity': 0
46+
"eval_metric": "logloss",
47+
"tree_method": "hist",
48+
"device": "cpu",
49+
"max_depth": 1,
50+
"verbosity": 0,
4351
}
44-
bst = xgb.train(param, dtrain, iterations, evals=watchlist,
45-
evals_result=res)
46-
assert self.non_increasing(res["train"]["logloss"])
52+
bst = xgb.train(
53+
param, dtrain, iterations, evals=watchlist, evals_result=res
54+
)
55+
assert tm.non_increasing(res["train"]["logloss"])
4756
cpu_pred_train = bst.predict(dtrain, output_margin=True)
4857
cpu_pred_test = bst.predict(dtest, output_margin=True)
4958
cpu_pred_val = bst.predict(dval, output_margin=True)
@@ -53,15 +62,9 @@ def test_predict(self):
5362
sycl_pred_test = bst.predict(dtest, output_margin=True)
5463
sycl_pred_val = bst.predict(dval, output_margin=True)
5564

56-
np.testing.assert_allclose(cpu_pred_train, sycl_pred_train,
57-
rtol=1e-6)
58-
np.testing.assert_allclose(cpu_pred_val, sycl_pred_val,
59-
rtol=1e-6)
60-
np.testing.assert_allclose(cpu_pred_test, sycl_pred_test,
61-
rtol=1e-6)
62-
63-
def non_increasing(self, L):
64-
return all((y - x) < 0.001 for x, y in zip(L, L[1:]))
65+
np.testing.assert_allclose(cpu_pred_train, sycl_pred_train, rtol=1e-6)
66+
np.testing.assert_allclose(cpu_pred_val, sycl_pred_val, rtol=1e-6)
67+
np.testing.assert_allclose(cpu_pred_test, sycl_pred_test, rtol=1e-6)
6568

6669
@pytest.mark.skipif(**tm.no_sklearn())
6770
def test_multi_predict(self):
@@ -70,8 +73,7 @@ def test_multi_predict(self):
7073

7174
n = 1000
7275
X, y = make_regression(n, random_state=rng)
73-
X_train, X_test, y_train, y_test = train_test_split(X, y,
74-
random_state=123)
76+
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)
7577
dtrain = xgb.DMatrix(X_train, label=y_train)
7678
dtest = xgb.DMatrix(X_test)
7779

@@ -100,17 +102,19 @@ def test_sklearn(self):
100102
X_test, y_test = X[tr_size:, :], y[tr_size:]
101103

102104
# First with cpu_predictor
103-
params = {'tree_method': 'hist',
104-
'device': 'cpu',
105-
'n_jobs': -1,
106-
'verbosity' : 0,
107-
'seed': 123}
105+
params = {
106+
"tree_method": "hist",
107+
"device": "cpu",
108+
"n_jobs": -1,
109+
"verbosity": 0,
110+
"seed": 123,
111+
}
108112
m = xgb.XGBRegressor(**params).fit(X_train, y_train)
109113
cpu_train_score = m.score(X_train, y_train)
110114
cpu_test_score = m.score(X_test, y_test)
111115

112116
# Now with sycl_predictor
113-
params['device'] = 'sycl'
117+
params["device"] = "sycl"
114118
m.set_params(**params)
115119

116120
# m = xgb.XGBRegressor(**params).fit(X_train, y_train)
@@ -121,8 +125,9 @@ def test_sklearn(self):
121125
assert np.allclose(cpu_train_score, sycl_train_score)
122126
assert np.allclose(cpu_test_score, sycl_test_score)
123127

124-
@given(strategies.integers(1, 10),
125-
tm.make_dataset_strategy(), shap_parameter_strategy)
128+
@given(
129+
strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
130+
)
126131
@settings(deadline=None)
127132
def test_shap(self, num_rounds, dataset, param):
128133
if dataset.name.endswith("-l1"): # not supported by the exact tree method
@@ -138,8 +143,9 @@ def test_shap(self, num_rounds, dataset, param):
138143
assume(len(dataset.y) > 0)
139144
assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3)
140145

141-
@given(strategies.integers(1, 10),
142-
tm.make_dataset_strategy(), shap_parameter_strategy)
146+
@given(
147+
strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
148+
)
143149
@settings(deadline=None, max_examples=20)
144150
def test_shap_interactions(self, num_rounds, dataset, param):
145151
if dataset.name.endswith("-l1"): # not supported by the exact tree method
@@ -153,5 +159,9 @@ def test_shap_interactions(self, num_rounds, dataset, param):
153159
shap = bst.predict(test_dmat, pred_interactions=True)
154160
margin = bst.predict(test_dmat, output_margin=True)
155161
assume(len(dataset.y) > 0)
156-
assert np.allclose(np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)), margin,
157-
1e-3, 1e-3)
162+
assert np.allclose(
163+
np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),
164+
margin,
165+
1e-3,
166+
1e-3,
167+
)

tests/python-sycl/test_sycl_training_continuation.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,19 @@ def run_training_continuation(self, use_json):
1212
X = np.random.randn(kRows, kCols)
1313
y = np.random.randn(kRows)
1414
dtrain = xgb.DMatrix(X, y)
15-
params = {'device': 'sycl', 'max_depth': '2',
16-
'gamma': '0.1', 'alpha': '0.01',
17-
'enable_experimental_json_serialization': use_json}
15+
params = {
16+
"device": "sycl",
17+
"max_depth": "2",
18+
"gamma": "0.1",
19+
"alpha": "0.01",
20+
"enable_experimental_json_serialization": use_json,
21+
}
1822
bst_0 = xgb.train(params, dtrain, num_boost_round=64)
19-
dump_0 = bst_0.get_dump(dump_format='json')
23+
dump_0 = bst_0.get_dump(dump_format="json")
2024

2125
bst_1 = xgb.train(params, dtrain, num_boost_round=32)
2226
bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
23-
dump_1 = bst_1.get_dump(dump_format='json')
27+
dump_1 = bst_1.get_dump(dump_format="json")
2428

2529
def recursive_compare(obj_0, obj_1):
2630
if isinstance(obj_0, float):
@@ -36,9 +40,8 @@ def recursive_compare(obj_0, obj_1):
3640
values_1 = list(obj_1.values())
3741
for i in range(len(obj_0.items())):
3842
assert keys_0[i] == keys_1[i]
39-
if list(obj_0.keys())[i] != 'missing':
40-
recursive_compare(values_0[i],
41-
values_1[i])
43+
if list(obj_0.keys())[i] != "missing":
44+
recursive_compare(values_0[i], values_1[i])
4245
else:
4346
for i in range(len(obj_0)):
4447
recursive_compare(obj_0[i], obj_1[i])

tests/python-sycl/test_sycl_updaters.py

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,52 +6,62 @@
66

77
import sys
88
import os
9+
910
# sys.path.append("tests/python")
1011
# import testing as tm
1112
from xgboost import testing as tm
1213

13-
parameter_strategy = strategies.fixed_dictionaries({
14-
'max_depth': strategies.integers(0, 11),
15-
'max_leaves': strategies.integers(0, 256),
16-
'max_bin': strategies.integers(2, 1024),
17-
'grow_policy': strategies.sampled_from(['lossguide', 'depthwise']),
18-
'single_precision_histogram': strategies.booleans(),
19-
'min_child_weight': strategies.floats(0.5, 2.0),
20-
'seed': strategies.integers(0, 10),
21-
# We cannot enable subsampling as the training loss can increase
22-
# 'subsample': strategies.floats(0.5, 1.0),
23-
'colsample_bytree': strategies.floats(0.5, 1.0),
24-
'colsample_bylevel': strategies.floats(0.5, 1.0),
25-
}).filter(lambda x: (x['max_depth'] > 0 or x['max_leaves'] > 0) and (
26-
x['max_depth'] > 0 or x['grow_policy'] == 'lossguide'))
14+
parameter_strategy = strategies.fixed_dictionaries(
15+
{
16+
"max_depth": strategies.integers(0, 11),
17+
"max_leaves": strategies.integers(0, 256),
18+
"max_bin": strategies.integers(2, 1024),
19+
"grow_policy": strategies.sampled_from(["lossguide", "depthwise"]),
20+
"single_precision_histogram": strategies.booleans(),
21+
"min_child_weight": strategies.floats(0.5, 2.0),
22+
"seed": strategies.integers(0, 10),
23+
# We cannot enable subsampling as the training loss can increase
24+
# 'subsample': strategies.floats(0.5, 1.0),
25+
"colsample_bytree": strategies.floats(0.5, 1.0),
26+
"colsample_bylevel": strategies.floats(0.5, 1.0),
27+
}
28+
).filter(
29+
lambda x: (x["max_depth"] > 0 or x["max_leaves"] > 0)
30+
and (x["max_depth"] > 0 or x["grow_policy"] == "lossguide")
31+
)
2732

2833

2934
def train_result(param, dmat, num_rounds):
3035
result = {}
31-
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
32-
evals_result=result)
36+
xgb.train(
37+
param,
38+
dmat,
39+
num_rounds,
40+
[(dmat, "train")],
41+
verbose_eval=False,
42+
evals_result=result,
43+
)
3344
return result
3445

3546

3647
class TestSYCLUpdaters:
37-
@given(parameter_strategy, strategies.integers(1, 5),
38-
tm.make_dataset_strategy())
48+
@given(parameter_strategy, strategies.integers(1, 5), tm.make_dataset_strategy())
3949
@settings(deadline=None)
4050
def test_sycl_hist(self, param, num_rounds, dataset):
41-
param['tree_method'] = 'hist'
42-
param['device'] = 'sycl'
43-
param['verbosity'] = 0
51+
param["tree_method"] = "hist"
52+
param["device"] = "sycl"
53+
param["verbosity"] = 0
4454
param = dataset.set_params(param)
4555
result = train_result(param, dataset.get_dmat(), num_rounds)
4656
note(result)
47-
assert tm.non_increasing(result['train'][dataset.metric])
57+
assert tm.non_increasing(result["train"][dataset.metric])
4858

4959
@given(tm.make_dataset_strategy(), strategies.integers(0, 1))
5060
@settings(deadline=None)
5161
def test_specified_device_id_sycl_update(self, dataset, device_id):
5262
# Read the list of sycl-devicese
53-
sycl_ls = os.popen('sycl-ls').read()
54-
devices = sycl_ls.split('\n')
63+
sycl_ls = os.popen("sycl-ls").read()
64+
devices = sycl_ls.split("\n")
5565

5666
# Test should launch only on gpu
5767
# Find gpus in the list of devices
@@ -60,11 +70,11 @@ def test_specified_device_id_sycl_update(self, dataset, device_id):
6070
found_devices = 0
6171
for idx in range(len(devices)):
6272
if len(devices[idx]) >= len(target_device_type):
63-
if devices[idx][1:1+len(target_device_type)] == target_device_type:
64-
if (found_devices == device_id):
65-
param = {'device': f"sycl:gpu:{idx}"}
73+
if devices[idx][1 : 1 + len(target_device_type)] == target_device_type:
74+
if found_devices == device_id:
75+
param = {"device": f"sycl:gpu:{idx}"}
6676
param = dataset.set_params(param)
6777
result = train_result(param, dataset.get_dmat(), 10)
68-
assert tm.non_increasing(result['train'][dataset.metric])
78+
assert tm.non_increasing(result["train"][dataset.metric])
6979
else:
70-
found_devices += 1
80+
found_devices += 1

tests/python-sycl/test_sycl_with_sklearn.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import numpy as np
55

66
from xgboost import testing as tm
7+
78
sys.path.append("tests/python")
89
import test_with_sklearn as twskl # noqa
910

@@ -16,19 +17,20 @@ def test_sycl_binary_classification():
1617
from sklearn.datasets import load_digits
1718
from sklearn.model_selection import KFold
1819

19-
digits = load_digits(n_class = 2)
20-
y = digits['target']
21-
X = digits['data']
20+
digits = load_digits(n_class=2)
21+
y = digits["target"]
22+
X = digits["data"]
2223
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
2324
for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):
2425
for train_index, test_index in kf.split(X, y):
25-
xgb_model = cls(
26-
random_state=42, device='sycl',
27-
n_estimators=4).fit(X[train_index], y[train_index])
26+
xgb_model = cls(random_state=42, device="sycl", n_estimators=4).fit(
27+
X[train_index], y[train_index]
28+
)
2829
preds = xgb_model.predict(X[test_index])
2930
labels = y[test_index]
30-
err = sum(1 for i in range(len(preds))
31-
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
31+
err = sum(
32+
1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
33+
) / float(len(preds))
3234
print(preds)
3335
print(labels)
3436
print(err)

0 commit comments

Comments
 (0)