1
+ import sys
2
+ import unittest
3
+ import pytest
4
+
5
+ import numpy as np
6
+ import xgboost as xgb
7
+ from hypothesis import given , strategies , assume , settings , note
8
+
9
+ sys .path .append ("tests/python" )
10
+ import testing as tm
11
+
12
+ rng = np .random .RandomState (1994 )
13
+
14
+ shap_parameter_strategy = strategies .fixed_dictionaries ({
15
+ 'max_depth' : strategies .integers (1 , 11 ),
16
+ 'max_leaves' : strategies .integers (0 , 256 ),
17
+ 'num_parallel_tree' : strategies .sampled_from ([1 , 10 ]),
18
+ }).filter (lambda x : x ['max_depth' ] > 0 or x ['max_leaves' ] > 0 )
19
+
20
+
21
+ class TestOneAPIPredict (unittest .TestCase ):
22
+ def test_predict (self ):
23
+ iterations = 10
24
+ np .random .seed (1 )
25
+ test_num_rows = [10 , 1000 , 5000 ]
26
+ test_num_cols = [10 , 50 , 500 ]
27
+ for num_rows in test_num_rows :
28
+ for num_cols in test_num_cols :
29
+ dtrain = xgb .DMatrix (np .random .randn (num_rows , num_cols ),
30
+ label = [0 , 1 ] * int (num_rows / 2 ))
31
+ dval = xgb .DMatrix (np .random .randn (num_rows , num_cols ),
32
+ label = [0 , 1 ] * int (num_rows / 2 ))
33
+ dtest = xgb .DMatrix (np .random .randn (num_rows , num_cols ),
34
+ label = [0 , 1 ] * int (num_rows / 2 ))
35
+ watchlist = [(dtrain , 'train' ), (dval , 'validation' )]
36
+ res = {}
37
+ param = {
38
+ "objective" : "binary:logistic_oneapi" ,
39
+ "predictor" : "oneapi_predictor" ,
40
+ 'eval_metric' : 'logloss' ,
41
+ 'tree_method' : 'hist' ,
42
+ 'updater' : 'grow_quantile_histmaker_oneapi' ,
43
+ 'max_depth' : 1
44
+ }
45
+ bst = xgb .train (param , dtrain , iterations , evals = watchlist ,
46
+ evals_result = res )
47
+ assert self .non_increasing (res ["train" ]["logloss" ])
48
+ oneapi_pred_train = bst .predict (dtrain , output_margin = True )
49
+ oneapi_pred_test = bst .predict (dtest , output_margin = True )
50
+ oneapi_pred_val = bst .predict (dval , output_margin = True )
51
+
52
+ param ["predictor" ] = "cpu_predictor"
53
+ bst_cpu = xgb .train (param , dtrain , iterations , evals = watchlist )
54
+ cpu_pred_train = bst_cpu .predict (dtrain , output_margin = True )
55
+ cpu_pred_test = bst_cpu .predict (dtest , output_margin = True )
56
+ cpu_pred_val = bst_cpu .predict (dval , output_margin = True )
57
+
58
+ np .testing .assert_allclose (cpu_pred_train , oneapi_pred_train ,
59
+ rtol = 1e-6 )
60
+ np .testing .assert_allclose (cpu_pred_val , oneapi_pred_val ,
61
+ rtol = 1e-6 )
62
+ np .testing .assert_allclose (cpu_pred_test , oneapi_pred_test ,
63
+ rtol = 1e-6 )
64
+
65
+ def non_increasing (self , L ):
66
+ return all ((y - x ) < 0.001 for x , y in zip (L , L [1 :]))
67
+
68
+ @pytest .mark .skipif (** tm .no_sklearn ())
69
+ def test_multi_predict (self ):
70
+ from sklearn .datasets import make_regression
71
+ from sklearn .model_selection import train_test_split
72
+
73
+ n = 1000
74
+ X , y = make_regression (n , random_state = rng )
75
+ X_train , X_test , y_train , y_test = train_test_split (X , y ,
76
+ random_state = 123 )
77
+ dtrain = xgb .DMatrix (X_train , label = y_train )
78
+ dtest = xgb .DMatrix (X_test )
79
+
80
+ params = {}
81
+ params ["tree_method" ] = "hist"
82
+ params ["updater" ] = "grow_quantile_histmaker_oneapi"
83
+
84
+ params ['predictor' ] = "oneapi_predictor"
85
+ bst_oneapi_predict = xgb .train (params , dtrain )
86
+
87
+ params ['predictor' ] = "cpu_predictor"
88
+ bst_cpu_predict = xgb .train (params , dtrain )
89
+
90
+ predict0 = bst_oneapi_predict .predict (dtest )
91
+ predict1 = bst_oneapi_predict .predict (dtest )
92
+ cpu_predict = bst_cpu_predict .predict (dtest )
93
+
94
+ assert np .allclose (predict0 , predict1 )
95
+ assert np .allclose (predict0 , cpu_predict )
96
+
97
+ @pytest .mark .skipif (** tm .no_sklearn ())
98
+ def test_sklearn (self ):
99
+ m , n = 15000 , 14
100
+ tr_size = 2500
101
+ X = np .random .rand (m , n )
102
+ y = 200 * np .matmul (X , np .arange (- 3 , - 3 + n ))
103
+ X_train , y_train = X [:tr_size , :], y [:tr_size ]
104
+ X_test , y_test = X [tr_size :, :], y [tr_size :]
105
+
106
+ # First with cpu_predictor
107
+ params = {'tree_method' : 'hist' ,
108
+ 'predictor' : 'cpu_predictor' ,
109
+ 'n_jobs' : - 1 ,
110
+ 'seed' : 123 }
111
+ m = xgb .XGBRegressor (** params ).fit (X_train , y_train )
112
+ cpu_train_score = m .score (X_train , y_train )
113
+ cpu_test_score = m .score (X_test , y_test )
114
+
115
+ # Now with oneapi_predictor
116
+ params ['predictor' ] = 'oneapi_predictor'
117
+
118
+ m = xgb .XGBRegressor (** params ).fit (X_train , y_train )
119
+ oneapi_train_score = m .score (X_train , y_train )
120
+ m = xgb .XGBRegressor (** params ).fit (X_train , y_train )
121
+ oneapi_test_score = m .score (X_test , y_test )
122
+
123
+ assert np .allclose (cpu_train_score , oneapi_train_score )
124
+ assert np .allclose (cpu_test_score , oneapi_test_score )
125
+
126
+ @given (strategies .integers (1 , 10 ),
127
+ tm .dataset_strategy .filter (lambda x : x .name != "empty" ), shap_parameter_strategy )
128
+ @settings (deadline = None )
129
+ def test_shap (self , num_rounds , dataset , param ):
130
+ param .update ({"predictor" : "oneapi_predictor" })
131
+ param = dataset .set_params (param )
132
+ dmat = dataset .get_dmat ()
133
+ bst = xgb .train (param , dmat , num_rounds )
134
+ test_dmat = xgb .DMatrix (dataset .X , dataset .y , dataset .w , dataset .margin )
135
+ shap = bst .predict (test_dmat , pred_contribs = True )
136
+ margin = bst .predict (test_dmat , output_margin = True )
137
+ assume (len (dataset .y ) > 0 )
138
+ assert np .allclose (np .sum (shap , axis = len (shap .shape ) - 1 ), margin , 1e-3 , 1e-3 )
139
+
140
+ @given (strategies .integers (1 , 10 ),
141
+ tm .dataset_strategy .filter (lambda x : x .name != "empty" ), shap_parameter_strategy )
142
+ @settings (deadline = None , max_examples = 20 )
143
+ def test_shap_interactions (self , num_rounds , dataset , param ):
144
+ param .update ({"predictor" : "oneapi_predictor" })
145
+ param = dataset .set_params (param )
146
+ dmat = dataset .get_dmat ()
147
+ bst = xgb .train (param , dmat , num_rounds )
148
+ test_dmat = xgb .DMatrix (dataset .X , dataset .y , dataset .w , dataset .margin )
149
+ shap = bst .predict (test_dmat , pred_interactions = True )
150
+ margin = bst .predict (test_dmat , output_margin = True )
151
+ assume (len (dataset .y ) > 0 )
152
+ assert np .allclose (np .sum (shap , axis = (len (shap .shape ) - 1 , len (shap .shape ) - 2 )), margin ,
153
+ 1e-3 , 1e-3 )
0 commit comments