59
59
60
60
import xgboost
61
61
from xgboost import XGBClassifier
62
- from xgboost .compat import is_cudf_available
62
+ from xgboost .compat import is_cudf_available , is_cupy_available
63
63
from xgboost .core import Booster , _check_distributed_params
64
64
from xgboost .sklearn import DEFAULT_N_ESTIMATORS , XGBModel , _can_use_qdm
65
65
from xgboost .training import train as worker_train
@@ -242,6 +242,13 @@ class _SparkXGBParams(
242
242
TypeConverters .toList ,
243
243
)
244
244
245
+ def set_device (self , value : str ) -> "_SparkXGBParams" :
246
+ """Set device, optional value: cpu, cuda, gpu"""
247
+ _check_distributed_params ({"device" : value })
248
+ assert value in ("cpu" , "cuda" , "gpu" )
249
+ self .set (self .device , value )
250
+ return self
251
+
245
252
@classmethod
246
253
def _xgb_cls (cls ) -> Type [XGBModel ]:
247
254
"""
@@ -1193,6 +1200,31 @@ def _post_transform(self, dataset: DataFrame, pred_col: Column) -> DataFrame:
1193
1200
dataset = dataset .drop (pred_struct_col )
1194
1201
return dataset
1195
1202
1203
+ def _gpu_transform (self ) -> bool :
1204
+ """If gpu is used to do the prediction, true to gpu prediction"""
1205
+
1206
+ if _is_local (_get_spark_session ().sparkContext ):
1207
+ # if it's local model, we just use the internal "device"
1208
+ return use_cuda (self .getOrDefault (self .device ))
1209
+
1210
+ gpu_per_task = (
1211
+ _get_spark_session ()
1212
+ .sparkContext .getConf ()
1213
+ .get ("spark.task.resource.gpu.amount" )
1214
+ )
1215
+
1216
+ # User don't set gpu configurations, just use cpu
1217
+ if gpu_per_task is None :
1218
+ if use_cuda (self .getOrDefault (self .device )):
1219
+ get_logger ("XGBoost-PySpark" ).warning (
1220
+ "Do the prediction on the CPUs since "
1221
+ "no gpu configurations are set"
1222
+ )
1223
+ return False
1224
+
1225
+ # User already sets the gpu configurations, we just use the internal "device".
1226
+ return use_cuda (self .getOrDefault (self .device ))
1227
+
1196
1228
def _transform (self , dataset : DataFrame ) -> DataFrame :
1197
1229
# pylint: disable=too-many-statements, too-many-locals
1198
1230
# Save xgb_sklearn_model and predict_params to be local variable
@@ -1216,21 +1248,77 @@ def _transform(self, dataset: DataFrame) -> DataFrame:
1216
1248
1217
1249
_ , schema = self ._out_schema ()
1218
1250
1251
+ is_local = _is_local (_get_spark_session ().sparkContext )
1252
+ run_on_gpu = self ._gpu_transform ()
1253
+
1219
1254
@pandas_udf (schema ) # type: ignore
1220
1255
def predict_udf (iterator : Iterator [pd .DataFrame ]) -> Iterator [pd .Series ]:
1221
1256
assert xgb_sklearn_model is not None
1222
1257
model = xgb_sklearn_model
1258
+
1259
+ from pyspark import TaskContext
1260
+
1261
+ context = TaskContext .get ()
1262
+ assert context is not None
1263
+
1264
+ dev_ordinal = - 1
1265
+
1266
+ if is_cudf_available ():
1267
+ if is_local :
1268
+ if run_on_gpu and is_cupy_available ():
1269
+ import cupy as cp # pylint: disable=import-error
1270
+
1271
+ total_gpus = cp .cuda .runtime .getDeviceCount ()
1272
+ if total_gpus > 0 :
1273
+ partition_id = context .partitionId ()
1274
+ # For transform local mode, default the dev_ordinal to
1275
+ # (partition id) % gpus.
1276
+ dev_ordinal = partition_id % total_gpus
1277
+ elif run_on_gpu :
1278
+ dev_ordinal = _get_gpu_id (context )
1279
+
1280
+ if dev_ordinal >= 0 :
1281
+ device = "cuda:" + str (dev_ordinal )
1282
+ get_logger ("XGBoost-PySpark" ).info (
1283
+ "Do the inference with device: %s" , device
1284
+ )
1285
+ model .set_params (device = device )
1286
+ else :
1287
+ get_logger ("XGBoost-PySpark" ).info ("Do the inference on the CPUs" )
1288
+ else :
1289
+ msg = (
1290
+ "CUDF is unavailable, fallback the inference on the CPUs"
1291
+ if run_on_gpu
1292
+ else "Do the inference on the CPUs"
1293
+ )
1294
+ get_logger ("XGBoost-PySpark" ).info (msg )
1295
+
1296
+ def to_gpu_if_possible (data : ArrayLike ) -> ArrayLike :
1297
+ """Move the data to gpu if possible"""
1298
+ if dev_ordinal >= 0 :
1299
+ import cudf # pylint: disable=import-error
1300
+ import cupy as cp # pylint: disable=import-error
1301
+
1302
+ # We must set the device after import cudf, which will change the device id to 0
1303
+ # See https://github.com/rapidsai/cudf/issues/11386
1304
+ cp .cuda .runtime .setDevice (dev_ordinal ) # pylint: disable=I1101
1305
+ df = cudf .DataFrame (data )
1306
+ del data
1307
+ return df
1308
+ return data
1309
+
1223
1310
for data in iterator :
1224
1311
if enable_sparse_data_optim :
1225
1312
X = _read_csr_matrix_from_unwrapped_spark_vec (data )
1226
1313
else :
1227
1314
if feature_col_names is not None :
1228
- X = data [feature_col_names ]
1315
+ tmp = data [feature_col_names ]
1229
1316
else :
1230
- X = stack_series (data [alias .data ])
1317
+ tmp = stack_series (data [alias .data ])
1318
+ X = to_gpu_if_possible (tmp )
1231
1319
1232
1320
if has_base_margin :
1233
- base_margin = data [alias .margin ]. to_numpy ( )
1321
+ base_margin = to_gpu_if_possible ( data [alias .margin ])
1234
1322
else :
1235
1323
base_margin = None
1236
1324
0 commit comments