sycl .py files are no linting with xgboost CI (#21)

razdoburdin · web-flow · commit 9b64bcbe7a45 · 2023-11-27T09:53:39.000+01:00
Co-authored-by: Dmitry Razdoburdin &lt;&gt;
diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py
@@ -30,6 +30,10 @@ class LintersPaths:
         "tests/python-gpu/test_gpu_pickling.py",
         "tests/python-gpu/test_gpu_eval_metrics.py",
         "tests/python-gpu/test_gpu_with_sklearn.py",
+        "tests/python-sycl/test_sycl_prediction.py",
+        "tests/python-sycl/test_sycl_training_continuation.py",
+        "tests/python-sycl/test_sycl_updaters.py",
+        "tests/python-sycl/test_sycl_with_sklearn.py",
         "tests/test_distributed/test_with_spark/",
         "tests/test_distributed/test_gpu_with_spark/",
         # demo
diff --git a/tests/python-sycl/test_sycl_prediction.py b/tests/python-sycl/test_sycl_prediction.py
@@ -10,11 +10,13 @@
 
 rng = np.random.RandomState(1994)
 
-shap_parameter_strategy = strategies.fixed_dictionaries({
-    'max_depth': strategies.integers(1, 11),
-    'max_leaves': strategies.integers(0, 256),
-    'num_parallel_tree': strategies.sampled_from([1, 10]),
-}).filter(lambda x: x['max_depth'] > 0 or x['max_leaves'] > 0)
+shap_parameter_strategy = strategies.fixed_dictionaries(
+    {
+        "max_depth": strategies.integers(1, 11),
+        "max_leaves": strategies.integers(0, 256),
+        "num_parallel_tree": strategies.sampled_from([1, 10]),
+    }
+).filter(lambda x: x["max_depth"] > 0 or x["max_leaves"] > 0)
 
 
 class TestSYCLPredict(unittest.TestCase):
@@ -25,25 +27,32 @@ def test_predict(self):
         test_num_cols = [10, 50, 500]
         for num_rows in test_num_rows:
             for num_cols in test_num_cols:
-                dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols),
-                                     label=[0, 1] * int(num_rows / 2))
-                dval = xgb.DMatrix(np.random.randn(num_rows, num_cols),
-                                   label=[0, 1] * int(num_rows / 2))
-                dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols),
-                                    label=[0, 1] * int(num_rows / 2))
-                watchlist = [(dtrain, 'train'), (dval, 'validation')]
+                dtrain = xgb.DMatrix(
+                    np.random.randn(num_rows, num_cols),
+                    label=[0, 1] * int(num_rows / 2),
+                )
+                dval = xgb.DMatrix(
+                    np.random.randn(num_rows, num_cols),
+                    label=[0, 1] * int(num_rows / 2),
+                )
+                dtest = xgb.DMatrix(
+                    np.random.randn(num_rows, num_cols),
+                    label=[0, 1] * int(num_rows / 2),
+                )
+                watchlist = [(dtrain, "train"), (dval, "validation")]
                 res = {}
                 param = {
                     "objective": "binary:logistic",
-                    'eval_metric': 'logloss',
-                    'tree_method': 'hist',
-                    'device': 'cpu',
-                    'max_depth': 1,
-                    'verbosity': 0
+                    "eval_metric": "logloss",
+                    "tree_method": "hist",
+                    "device": "cpu",
+                    "max_depth": 1,
+                    "verbosity": 0,
                 }
-                bst = xgb.train(param, dtrain, iterations, evals=watchlist,
-                                evals_result=res)
-                assert self.non_increasing(res["train"]["logloss"])
+                bst = xgb.train(
+                    param, dtrain, iterations, evals=watchlist, evals_result=res
+                )
+                assert tm.non_increasing(res["train"]["logloss"])
                 cpu_pred_train = bst.predict(dtrain, output_margin=True)
                 cpu_pred_test = bst.predict(dtest, output_margin=True)
                 cpu_pred_val = bst.predict(dval, output_margin=True)
@@ -53,15 +62,9 @@ def test_predict(self):
                 sycl_pred_test = bst.predict(dtest, output_margin=True)
                 sycl_pred_val = bst.predict(dval, output_margin=True)
 
-                np.testing.assert_allclose(cpu_pred_train, sycl_pred_train,
-                                           rtol=1e-6)
-                np.testing.assert_allclose(cpu_pred_val, sycl_pred_val,
-                                           rtol=1e-6)
-                np.testing.assert_allclose(cpu_pred_test, sycl_pred_test,
-                                           rtol=1e-6)
-
-    def non_increasing(self, L):
-        return all((y - x) < 0.001 for x, y in zip(L, L[1:]))
+                np.testing.assert_allclose(cpu_pred_train, sycl_pred_train, rtol=1e-6)
+                np.testing.assert_allclose(cpu_pred_val, sycl_pred_val, rtol=1e-6)
+                np.testing.assert_allclose(cpu_pred_test, sycl_pred_test, rtol=1e-6)
 
     @pytest.mark.skipif(**tm.no_sklearn())
     def test_multi_predict(self):
@@ -70,8 +73,7 @@ def test_multi_predict(self):
 
         n = 1000
         X, y = make_regression(n, random_state=rng)
-        X_train, X_test, y_train, y_test = train_test_split(X, y,
-                                                            random_state=123)
+        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123)
         dtrain = xgb.DMatrix(X_train, label=y_train)
         dtest = xgb.DMatrix(X_test)
 
@@ -100,17 +102,19 @@ def test_sklearn(self):
         X_test, y_test = X[tr_size:, :], y[tr_size:]
 
         # First with cpu_predictor
-        params = {'tree_method': 'hist',
-                  'device': 'cpu',
-                  'n_jobs': -1,
-                  'verbosity' : 0,
-                  'seed': 123}
+        params = {
+            "tree_method": "hist",
+            "device": "cpu",
+            "n_jobs": -1,
+            "verbosity": 0,
+            "seed": 123,
+        }
         m = xgb.XGBRegressor(**params).fit(X_train, y_train)
         cpu_train_score = m.score(X_train, y_train)
         cpu_test_score = m.score(X_test, y_test)
 
         # Now with sycl_predictor
-        params['device'] = 'sycl'
+        params["device"] = "sycl"
         m.set_params(**params)
 
         # m = xgb.XGBRegressor(**params).fit(X_train, y_train)
@@ -121,8 +125,9 @@ def test_sklearn(self):
         assert np.allclose(cpu_train_score, sycl_train_score)
         assert np.allclose(cpu_test_score, sycl_test_score)
 
-    @given(strategies.integers(1, 10),
-           tm.make_dataset_strategy(), shap_parameter_strategy)
+    @given(
+        strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
+    )
     @settings(deadline=None)
     def test_shap(self, num_rounds, dataset, param):
         if dataset.name.endswith("-l1"):  # not supported by the exact tree method
@@ -138,8 +143,9 @@ def test_shap(self, num_rounds, dataset, param):
         assume(len(dataset.y) > 0)
         assert np.allclose(np.sum(shap, axis=len(shap.shape) - 1), margin, 1e-3, 1e-3)
 
-    @given(strategies.integers(1, 10),
-           tm.make_dataset_strategy(), shap_parameter_strategy)
+    @given(
+        strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy
+    )
     @settings(deadline=None, max_examples=20)
     def test_shap_interactions(self, num_rounds, dataset, param):
         if dataset.name.endswith("-l1"):  # not supported by the exact tree method
@@ -153,5 +159,9 @@ def test_shap_interactions(self, num_rounds, dataset, param):
         shap = bst.predict(test_dmat, pred_interactions=True)
         margin = bst.predict(test_dmat, output_margin=True)
         assume(len(dataset.y) > 0)
-        assert np.allclose(np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)), margin,
-                           1e-3, 1e-3)
+        assert np.allclose(
+            np.sum(shap, axis=(len(shap.shape) - 1, len(shap.shape) - 2)),
+            margin,
+            1e-3,
+            1e-3,
+        )
diff --git a/tests/python-sycl/test_sycl_training_continuation.py b/tests/python-sycl/test_sycl_training_continuation.py
@@ -12,15 +12,19 @@ def run_training_continuation(self, use_json):
         X = np.random.randn(kRows, kCols)
         y = np.random.randn(kRows)
         dtrain = xgb.DMatrix(X, y)
-        params = {'device': 'sycl', 'max_depth': '2',
-                  'gamma': '0.1', 'alpha': '0.01',
-                  'enable_experimental_json_serialization': use_json}
+        params = {
+            "device": "sycl",
+            "max_depth": "2",
+            "gamma": "0.1",
+            "alpha": "0.01",
+            "enable_experimental_json_serialization": use_json,
+        }
         bst_0 = xgb.train(params, dtrain, num_boost_round=64)
-        dump_0 = bst_0.get_dump(dump_format='json')
+        dump_0 = bst_0.get_dump(dump_format="json")
 
         bst_1 = xgb.train(params, dtrain, num_boost_round=32)
         bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
-        dump_1 = bst_1.get_dump(dump_format='json')
+        dump_1 = bst_1.get_dump(dump_format="json")
 
         def recursive_compare(obj_0, obj_1):
             if isinstance(obj_0, float):
@@ -36,9 +40,8 @@ def recursive_compare(obj_0, obj_1):
                 values_1 = list(obj_1.values())
                 for i in range(len(obj_0.items())):
                     assert keys_0[i] == keys_1[i]
-                    if list(obj_0.keys())[i] != 'missing':
-                        recursive_compare(values_0[i],
-                                          values_1[i])
+                    if list(obj_0.keys())[i] != "missing":
+                        recursive_compare(values_0[i], values_1[i])
             else:
                 for i in range(len(obj_0)):
                     recursive_compare(obj_0[i], obj_1[i])
diff --git a/tests/python-sycl/test_sycl_updaters.py b/tests/python-sycl/test_sycl_updaters.py
@@ -6,52 +6,62 @@
 
 import sys
 import os
+
 # sys.path.append("tests/python")
 # import testing as tm
 from xgboost import testing as tm
 
-parameter_strategy = strategies.fixed_dictionaries({
-    'max_depth': strategies.integers(0, 11),
-    'max_leaves': strategies.integers(0, 256),
-    'max_bin': strategies.integers(2, 1024),
-    'grow_policy': strategies.sampled_from(['lossguide', 'depthwise']),
-    'single_precision_histogram': strategies.booleans(),
-    'min_child_weight': strategies.floats(0.5, 2.0),
-    'seed': strategies.integers(0, 10),
-    # We cannot enable subsampling as the training loss can increase
-    # 'subsample': strategies.floats(0.5, 1.0),
-    'colsample_bytree': strategies.floats(0.5, 1.0),
-    'colsample_bylevel': strategies.floats(0.5, 1.0),
-}).filter(lambda x: (x['max_depth'] > 0 or x['max_leaves'] > 0) and (
-    x['max_depth'] > 0 or x['grow_policy'] == 'lossguide'))
+parameter_strategy = strategies.fixed_dictionaries(
+    {
+        "max_depth": strategies.integers(0, 11),
+        "max_leaves": strategies.integers(0, 256),
+        "max_bin": strategies.integers(2, 1024),
+        "grow_policy": strategies.sampled_from(["lossguide", "depthwise"]),
+        "single_precision_histogram": strategies.booleans(),
+        "min_child_weight": strategies.floats(0.5, 2.0),
+        "seed": strategies.integers(0, 10),
+        # We cannot enable subsampling as the training loss can increase
+        # 'subsample': strategies.floats(0.5, 1.0),
+        "colsample_bytree": strategies.floats(0.5, 1.0),
+        "colsample_bylevel": strategies.floats(0.5, 1.0),
+    }
+).filter(
+    lambda x: (x["max_depth"] > 0 or x["max_leaves"] > 0)
+    and (x["max_depth"] > 0 or x["grow_policy"] == "lossguide")
+)
 
 
 def train_result(param, dmat, num_rounds):
     result = {}
-    xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
-              evals_result=result)
+    xgb.train(
+        param,
+        dmat,
+        num_rounds,
+        [(dmat, "train")],
+        verbose_eval=False,
+        evals_result=result,
+    )
     return result
 
 
 class TestSYCLUpdaters:
-    @given(parameter_strategy, strategies.integers(1, 5),
-           tm.make_dataset_strategy())
+    @given(parameter_strategy, strategies.integers(1, 5), tm.make_dataset_strategy())
     @settings(deadline=None)
     def test_sycl_hist(self, param, num_rounds, dataset):
-        param['tree_method'] = 'hist'
-        param['device'] = 'sycl'
-        param['verbosity'] = 0
+        param["tree_method"] = "hist"
+        param["device"] = "sycl"
+        param["verbosity"] = 0
         param = dataset.set_params(param)
         result = train_result(param, dataset.get_dmat(), num_rounds)
         note(result)
-        assert tm.non_increasing(result['train'][dataset.metric])
+        assert tm.non_increasing(result["train"][dataset.metric])
 
     @given(tm.make_dataset_strategy(), strategies.integers(0, 1))
     @settings(deadline=None)
     def test_specified_device_id_sycl_update(self, dataset, device_id):
         # Read the list of sycl-devicese
-        sycl_ls = os.popen('sycl-ls').read()
-        devices = sycl_ls.split('\n')
+        sycl_ls = os.popen("sycl-ls").read()
+        devices = sycl_ls.split("\n")
 
         # Test should launch only on gpu
         # Find gpus in the list of devices
@@ -60,11 +70,11 @@ def test_specified_device_id_sycl_update(self, dataset, device_id):
         found_devices = 0
         for idx in range(len(devices)):
             if len(devices[idx]) >= len(target_device_type):
-                if devices[idx][1:1+len(target_device_type)] == target_device_type:
-                    if (found_devices == device_id):
-                        param = {'device': f"sycl:gpu:{idx}"}
+                if devices[idx][1 : 1 + len(target_device_type)] == target_device_type:
+                    if found_devices == device_id:
+                        param = {"device": f"sycl:gpu:{idx}"}
                         param = dataset.set_params(param)
                         result = train_result(param, dataset.get_dmat(), 10)
-                        assert tm.non_increasing(result['train'][dataset.metric])
+                        assert tm.non_increasing(result["train"][dataset.metric])
                     else:
-                        found_devices += 1
+                        found_devices += 1
diff --git a/tests/python-sycl/test_sycl_with_sklearn.py b/tests/python-sycl/test_sycl_with_sklearn.py
@@ -4,6 +4,7 @@
 import numpy as np
 
 from xgboost import testing as tm
+
 sys.path.append("tests/python")
 import test_with_sklearn as twskl  # noqa
 
@@ -16,19 +17,20 @@ def test_sycl_binary_classification():
     from sklearn.datasets import load_digits
     from sklearn.model_selection import KFold
 
-    digits = load_digits(n_class = 2)
-    y = digits['target']
-    X = digits['data']
+    digits = load_digits(n_class=2)
+    y = digits["target"]
+    X = digits["data"]
     kf = KFold(n_splits=2, shuffle=True, random_state=rng)
     for cls in (xgb.XGBClassifier, xgb.XGBRFClassifier):
         for train_index, test_index in kf.split(X, y):
-            xgb_model = cls(
-                random_state=42, device='sycl',
-                n_estimators=4).fit(X[train_index], y[train_index])
+            xgb_model = cls(random_state=42, device="sycl", n_estimators=4).fit(
+                X[train_index], y[train_index]
+            )
             preds = xgb_model.predict(X[test_index])
             labels = y[test_index]
-            err = sum(1 for i in range(len(preds))
-                      if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+            err = sum(
+                1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]
+            ) / float(len(preds))
             print(preds)
             print(labels)
             print(err)